diff --git a/apps/android_rpc/tests/android_rpc_test.py b/apps/android_rpc/tests/android_rpc_test.py index 122d07faf9e5e..32af005d7d4d7 100644 --- a/apps/android_rpc/tests/android_rpc_test.py +++ b/apps/android_rpc/tests/android_rpc_test.py @@ -22,6 +22,7 @@ """ import tvm +from tvm import te import os from tvm import rpc from tvm.contrib import util, ndk @@ -44,9 +45,9 @@ def test_rpc_module(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') a_np = np.random.uniform(size=1024).astype(A.dtype) temp = util.tempdir() @@ -56,7 +57,7 @@ def test_rpc_module(): session_timeout=60) # Compile the Graph for CPU target - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") @@ -79,10 +80,10 @@ def test_rpc_module(): # Compile the Graph for OpenCL target if test_opencl: - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd") @@ -102,10 +103,10 @@ def test_rpc_module(): # Compile the Graph for Vulkan target if test_vulkan: - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "vulkan", target_host=target, name="myadd") diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py index 5403e9610c32d..53b616868bdd0 100644 --- a/apps/benchmark/arm_cpu_imagenet_bench.py +++ b/apps/benchmark/arm_cpu_imagenet_bench.py @@ -22,6 +22,7 @@ import numpy as np import tvm +from tvm import te from tvm.contrib.util import tempdir import tvm.contrib.graph_runtime as runtime from tvm import relay diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py index fd96be6ad66c9..dfb0445bf214d 100644 --- a/apps/benchmark/gpu_imagenet_bench.py +++ b/apps/benchmark/gpu_imagenet_bench.py @@ -23,6 +23,7 @@ import numpy as np import tvm +from tvm import te import tvm.contrib.graph_runtime as runtime from tvm import relay diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py index d5d60a245be31..4f93a0d5e383c 100644 --- a/apps/benchmark/mobile_gpu_imagenet_bench.py +++ b/apps/benchmark/mobile_gpu_imagenet_bench.py @@ -22,6 +22,7 @@ import numpy as np import tvm +from tvm import te from tvm.contrib.util import tempdir import tvm.contrib.graph_runtime as runtime from tvm import relay diff --git a/apps/bundle_deploy/build_model.py b/apps/bundle_deploy/build_model.py index de9e73522ca2c..37e302449016e 100644 --- a/apps/bundle_deploy/build_model.py +++ b/apps/bundle_deploy/build_model.py @@ -20,6 +20,7 @@ import os from tvm import relay import tvm +from tvm import te import logging diff --git a/apps/dso_plugin_module/test_plugin_module.py b/apps/dso_plugin_module/test_plugin_module.py index 6304ef9573fbc..0704dd0f5f2d2 100644 --- a/apps/dso_plugin_module/test_plugin_module.py +++ b/apps/dso_plugin_module/test_plugin_module.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import os def test_plugin_module(): diff --git a/apps/extension/python/tvm_ext/__init__.py b/apps/extension/python/tvm_ext/__init__.py index 31b149eb4913f..377db7c1c6ea3 100644 --- a/apps/extension/python/tvm_ext/__init__.py +++ b/apps/extension/python/tvm_ext/__init__.py @@ -21,6 +21,7 @@ import ctypes # Import TVM first to get library symbols import tvm +from tvm import te def load_lib(): """Load library, the functions will be registered into TVM""" diff --git a/apps/extension/tests/test_ext.py b/apps/extension/tests/test_ext.py index 257ecd6841758..f7e17d2fdc623 100644 --- a/apps/extension/tests/test_ext.py +++ b/apps/extension/tests/test_ext.py @@ -16,6 +16,8 @@ # under the License. import tvm_ext import tvm +import tvm._ffi.registry +from tvm import te import numpy as np def test_bind_add(): @@ -26,9 +28,9 @@ def add(a, b): def test_ext_dev(): n = 10 - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): return @@ -43,8 +45,8 @@ def check_llvm(): def test_sym_add(): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') c = tvm_ext.sym_add(a, b) assert c.a == a and c.b == b @@ -59,19 +61,20 @@ def ivec_cb(v2): assert(isinstance(v2, tvm_ext.IntVec)) assert v2[2] == 3 - tvm.convert(ivec_cb)(ivec) + tvm.runtime.convert(ivec_cb)(ivec) def test_extract_ext(): - fdict = tvm.extract_ext_funcs(tvm_ext._LIB.TVMExtDeclare) + fdict = tvm._ffi.registry.extract_ext_funcs( + tvm_ext._LIB.TVMExtDeclare) assert fdict["mul"](3, 4) == 12 def test_extern_call(): n = 10 - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: tvm.call_extern("float32", "TVMTestAddOne", A(*i)), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: tvm.tir.call_extern("float32", "TVMTestAddOne", A(*i)), name='B') + s = te.create_schedule(B.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): diff --git a/apps/howto_deploy/prepare_test_libs.py b/apps/howto_deploy/prepare_test_libs.py index b620bc7a1d5f9..88d9f8ed59022 100644 --- a/apps/howto_deploy/prepare_test_libs.py +++ b/apps/howto_deploy/prepare_test_libs.py @@ -16,13 +16,14 @@ # under the License. """Script to prepare test_addone.so""" import tvm +from tvm import te import os def prepare_test_libs(base_path): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) # Compile library as dynamic library fadd_dylib = tvm.build(s, [A, B], "llvm", name="addone") dylib_path = os.path.join(base_path, "test_addone_dll.so") diff --git a/apps/howto_deploy/python_deploy.py b/apps/howto_deploy/python_deploy.py index 07a27fe9426c9..2a443253a6add 100644 --- a/apps/howto_deploy/python_deploy.py +++ b/apps/howto_deploy/python_deploy.py @@ -19,6 +19,7 @@ # file python_deploy.py import tvm +from tvm import te import numpy as np def verify(mod, fname): diff --git a/apps/ios_rpc/tests/ios_rpc_test.py b/apps/ios_rpc/tests/ios_rpc_test.py index ac3718f7ba8e1..973c252be1759 100644 --- a/apps/ios_rpc/tests/ios_rpc_test.py +++ b/apps/ios_rpc/tests/ios_rpc_test.py @@ -21,6 +21,7 @@ """ import tvm +from tvm import te import os import re import sys @@ -54,14 +55,14 @@ def compile_metal(src): def test_rpc_module(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') temp = util.tempdir() - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd") @@ -70,7 +71,7 @@ def test_rpc_module(): arch=arch, sdk=sdk) xcode.codesign(path_dso1) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") diff --git a/apps/sgx/enclave/src/build_model.py b/apps/sgx/enclave/src/build_model.py index dff5716684220..f8906d1a0e02a 100644 --- a/apps/sgx/enclave/src/build_model.py +++ b/apps/sgx/enclave/src/build_model.py @@ -23,6 +23,7 @@ from tvm import relay from tvm.relay import testing import tvm +from tvm import te def main(): diff --git a/apps/sgx/run_model.py b/apps/sgx/run_model.py index fb39e34dc6015..c7af96328ec69 100644 --- a/apps/sgx/run_model.py +++ b/apps/sgx/run_model.py @@ -17,6 +17,7 @@ import os.path as osp import numpy as np import tvm +from tvm import te CWD = osp.abspath(osp.dirname(__file__)) diff --git a/docs/api/python/te.rst b/docs/api/python/te.rst index dc3d3dacd2cae..1f70c4d384bb3 100644 --- a/docs/api/python/te.rst +++ b/docs/api/python/te.rst @@ -23,6 +23,7 @@ tvm.te :members: :imported-members: :exclude-members: + any, all, min_value, max_value, trace, exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil, trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else, div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod, diff --git a/docs/api/python/tir.rst b/docs/api/python/tir.rst index d1017cdb46efb..ea1ac669b2732 100644 --- a/docs/api/python/tir.rst +++ b/docs/api/python/tir.rst @@ -20,5 +20,5 @@ tvm.tir .. automodule:: tvm.tir :members: :imported-members: - :exclude-members: PrimExpr + :exclude-members: PrimExpr, const :autosummary: diff --git a/docs/conf.py b/docs/conf.py index 05f4cfc970d1b..d882f75d83a7e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,6 +61,7 @@ os.environ['TVM_BUILD_DOC'] = '1' # Version information. import tvm +from tvm import te version = tvm.__version__ release = tvm.__version__ diff --git a/golang/sample/deploy.py b/golang/sample/deploy.py index 3b221369dbc4d..d523b9c85ffeb 100644 --- a/golang/sample/deploy.py +++ b/golang/sample/deploy.py @@ -21,6 +21,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np # Global declarations of environment. @@ -31,15 +32,15 @@ ###################################################################### # Describe the Computation # ------------------------ -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.placeholder((n,), name='B') -C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.placeholder((n,), name='B') +C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") ###################################################################### # Schedule the Computation # ------------------------ -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) ###################################################################### # Compilation diff --git a/jvm/core/src/test/scripts/test_add_cpu.py b/jvm/core/src/test/scripts/test_add_cpu.py index dd7e4a8de73a8..bda66f8c5c738 100644 --- a/jvm/core/src/test/scripts/test_add_cpu.py +++ b/jvm/core/src/test/scripts/test_add_cpu.py @@ -17,14 +17,15 @@ import os import tvm +from tvm import te from tvm.contrib import cc, util def test_add(target_dir): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = te.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], "llvm", target_host="llvm", name="myadd") fadd.save(os.path.join(target_dir, "add_cpu.o")) diff --git a/jvm/core/src/test/scripts/test_add_gpu.py b/jvm/core/src/test/scripts/test_add_gpu.py index e3f4fbfedaf36..d520054a4c759 100644 --- a/jvm/core/src/test/scripts/test_add_gpu.py +++ b/jvm/core/src/test/scripts/test_add_gpu.py @@ -17,22 +17,23 @@ import os import tvm +from tvm import te from tvm.contrib import cc, util def test_add(target_dir): if not tvm.runtime.enabled("cuda"): print("skip %s because cuda is not enabled..." % __file__) return - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) bx, tx = s[C].split(C.op.axis[0], factor=64) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd") fadd_cuda.save(os.path.join(target_dir, "add_gpu.o")) diff --git a/jvm/core/src/test/scripts/test_graph_runtime.py b/jvm/core/src/test/scripts/test_graph_runtime.py index 4d82973ae0313..63a76d1169234 100644 --- a/jvm/core/src/test/scripts/test_graph_runtime.py +++ b/jvm/core/src/test/scripts/test_graph_runtime.py @@ -17,14 +17,15 @@ import os import tvm +from tvm import te import json from tvm.contrib import graph_runtime def dump_graph_lib(target_dir): dim = 4 - A = tvm.placeholder((dim,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - sched = tvm.create_schedule(B.op) + A = te.placeholder((dim,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + sched = te.create_schedule(B.op) node0 = {"op": "null", "name": "x", "inputs": []} node1 = {"op": "tvm_op", "name": "add", diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 65cb67266de69..62265eab111f0 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -24,7 +24,7 @@ # tvm._ffi from ._ffi.base import TVMError, __version__ from ._ffi.runtime_ctypes import TypeCode, DataType -from ._ffi.registry import register_object, register_func, register_extension +from ._ffi import register_object, register_func, register_extension, get_global_func # top-level alias # tvm.runtime @@ -65,12 +65,8 @@ from . import arith # backward compact for topi, to be removed later -from .api import * from .tir import expr, stmt, ir_builder, ir_pass, generic from .te import tensor, schedule -from .tir.op import * -from . import intrin -from . import make # Contrib initializers from .contrib import rocm as _rocm, nvcc as _nvcc, sdaccel as _sdaccel diff --git a/python/tvm/api.py b/python/tvm/api.py deleted file mode 100644 index 9afaf03ee2559..0000000000000 --- a/python/tvm/api.py +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Functions defined in TVM.""" -# pylint: disable=invalid-name,unused-import,redefined-builtin -import tvm._ffi -import tvm.ir -import tvm.tir - -from tvm.runtime import convert, const, DataType -from tvm.ir import container as _container, Range -from tvm.tir import decl_buffer, layout, bijective_layout -from tvm.tir import min_value, max_value, indexdiv, indexmod, all, any -from tvm.te import placeholder, compute, scan, extern, var, size_var, thread_axis, reduce_axis - - -from ._ffi.base import string_types, TVMError -from ._ffi.registry import register_func, get_global_func, extract_ext_funcs - -from . import make as _make - -int8 = "int8" -int32 = "int32" -float32 = "float32" -handle = "handle" diff --git a/python/tvm/arith/analyzer.py b/python/tvm/arith/analyzer.py index 382a7e033e753..5a420ad817550 100644 --- a/python/tvm/arith/analyzer.py +++ b/python/tvm/arith/analyzer.py @@ -212,7 +212,7 @@ def constraint_scope(self, constraint): -------- .. code-block:: python - x = tvm.var("x") + x = te.var("x") analyzer = tvm.arith.Analyzer() with analzyer.constraint_scope(x % 3 == 0): # constraint in effect diff --git a/python/tvm/autotvm/feature.py b/python/tvm/autotvm/feature.py index 4ff1139d85f18..c576ffd76e565 100644 --- a/python/tvm/autotvm/feature.py +++ b/python/tvm/autotvm/feature.py @@ -28,8 +28,11 @@ import struct import numpy as np +import tvm._ffi -from tvm import schedule, ir_pass, get_global_func, target as _target +from tvm import target as _target +from tvm.tir import ir_pass +from tvm.te import schedule from tvm.driver import build_module def ana_lower(sch, args, @@ -49,10 +52,12 @@ def ana_lower(sch, args, return stmt try: - _get_buffer_curve_sample_flatten = get_global_func( + _get_buffer_curve_sample_flatten = tvm._ffi.get_global_func( "autotvm.feature.GetCurveSampleFeatureFlatten") - _get_itervar_feature = get_global_func("autotvm.feature.GetItervarFeature") - _get_itervar_feature_flatten = get_global_func("autotvm.feature.GetItervarFeatureFlatten") + _get_itervar_feature = tvm._ffi.get_global_func( + "autotvm.feature.GetItervarFeature") + _get_itervar_feature_flatten = tvm._ffi.get_global_func( + "autotvm.feature.GetItervarFeatureFlatten") except ValueError as e: def raise_error(*args, **kwargs): # pylint: disable=unused-argument raise RuntimeError("Cannot load autotvm c++ API") @@ -64,8 +69,8 @@ def get_itervar_feature(sch, args, take_log=False): Parameters ---------- - sch: tvm.schedule.Schedule - args: Array of tvm.tensor.Tensor + sch: tvm.te.schedule.Schedule + args: Array of te.tensor.Tensor the buffer args for lower take_log: bool whether take log of numerical statics @@ -112,8 +117,8 @@ def get_itervar_feature_flatten(sch, args, take_log=True): Parameters ---------- - sch: tvm.schedule.Schedule - args: Array of tvm.tensor.Tensor + sch: tvm.te.schedule.Schedule + args: Array of te.tensor.Tensor the buffer args for lower take_log: bool whether take log of numerical statics @@ -185,8 +190,8 @@ def get_buffer_curve_sample_flatten(sch, args, sample_n=30): Parameters ---------- - sch: tvm.schedule.Schedule - args: Array of tvm.tensor.Tensor + sch: tvm.te.schedule.Schedule + args: Array of te.tensor.Tensor the buffer args for lower sample_n: int number of sample points along one dimension diff --git a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py index 3e85e938fa823..c6b79fabdaf56 100644 --- a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py +++ b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py @@ -23,6 +23,7 @@ import topi import tvm +from tvm import te from tvm import autotvm, relay from tvm.autotvm.task import get_config from tvm.autotvm.record import encode, load_from_file @@ -301,8 +302,8 @@ def _iterate_layout_transform(self, callback): _, out_layout = o_input_info[0] else: _, out_layout = o_output_info[0] - data_placeholder = tvm.placeholder(in_shape, name="data", - dtype=self._dtype) + data_placeholder = te.placeholder(in_shape, name="data", + dtype=self._dtype) args = [data_placeholder, in_layout, out_layout] callback(i_idx, o_idx, m, n, args) diff --git a/python/tvm/autotvm/task/code_hash.py b/python/tvm/autotvm/task/code_hash.py index 9410f526c45fe..d5358ec437e4d 100644 --- a/python/tvm/autotvm/task/code_hash.py +++ b/python/tvm/autotvm/task/code_hash.py @@ -30,7 +30,7 @@ def attach_code_hash(s): Parameters ---------- s: Schedule - tvm.schedule.Schedule to attach the hash to + tvm.te.schedule.Schedule to attach the hash to """ def decorator(func): def wrapper(*args, **kwargs): diff --git a/python/tvm/autotvm/task/space.py b/python/tvm/autotvm/task/space.py index 47c227073677c..fbf474fc4df71 100644 --- a/python/tvm/autotvm/task/space.py +++ b/python/tvm/autotvm/task/space.py @@ -32,7 +32,7 @@ from collections import namedtuple, OrderedDict import numpy as np -from tvm import schedule, thread_axis +from tvm.te import schedule, thread_axis from tvm.autotvm.util import get_const_int Axis = namedtuple('Axis', ['space', 'index']) @@ -57,7 +57,7 @@ class TransformSpace(object): .. note:: We can regard our schedule code as a transformation graph of axes. - Starting from raw axes in the definition of tvm.compute, we can transform these axes + Starting from raw axes in the definition of te.compute, we can transform these axes by some operators. The operator includes 'split', 'reorder' and 'annotate'. Each operator has some tunable parameters (e.g. the split factor). Then the tuning process is just to find good parameters of these op. @@ -106,7 +106,7 @@ class VirtualAxis(TransformSpace): Parameters ---------- - var: int or tvm.schedule.IterVar + var: int or tvm.te.schedule.IterVar If is int, return a virtual axis whose length is the provided argument. If is IterVar, return a virtual axis whose length is extracted from the IterVar's extent domain. @@ -266,11 +266,11 @@ def apply(self, sch, op, axis): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - op: tvm.tensor.Operation + op: tvm.te.Operation The stage to be applied - axis: tvm.schedule.IterVar + axis: tvm.te.schedule.IterVar axis to split Returns @@ -390,11 +390,11 @@ def apply(self, sch, op, axes): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - op: tvm.tensor.Operation + op: tvm.te.Operation The stage to be applied - axis: tvm.schedule.IterVar + axis: tvm.te.schedule.IterVar axis to split Returns @@ -513,11 +513,11 @@ def apply(self, sch, op, axes, axis_lens=None, Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - op: tvm.tensor.Operation + op: tvm.te.Operation The stage to be applied - axes: Array of tvm.schedule.IterVar + axes: Array of tvm.te.schedule.IterVar axis to split axis_lens: Array of int, optional the length of axes @@ -532,7 +532,7 @@ def apply(self, sch, op, axes, axis_lens=None, Returns ------- - axes : list of tvm.schedule.IterVar + axes : list of tvm.te.schedule.IterVar The transformed axes """ if source is not None: # special case : attach cache_read/cache_write @@ -624,7 +624,7 @@ def axis(var): Parameters ---------- - var: int or tvm.schedule.IterVar + var: int or tvm.te.schedule.IterVar If is int, return an axis whose length is the provided argument. If is IterVar, return an axis whose length is extracted from the IterVar's extent domain. @@ -640,7 +640,7 @@ def define_split(self, name, axis, policy='factors', **kwargs): ---------- name: str name to index the entity of this space - axis: tvm.schedule.IterVar + axis: tvm.te.schedule.IterVar axis to split policy: str name of policy. @@ -681,7 +681,7 @@ def define_reorder(self, name, axes, policy, **kwargs): ---------- name: str name to index the entity of this space - axes: Array of tvm.schedule.IterVar + axes: Array of tvm.te.schedule.IterVar axes to reorder policy: str name of policy @@ -702,7 +702,7 @@ def define_annotate(self, name, axes, policy, **kwargs): ---------- name: str name to index the entity of this space - axes: Array of tvm.schedule.IterVar + axes: Array of tvm.te.schedule.IterVar axes to annotate policy: str name of policy diff --git a/python/tvm/autotvm/task/task.py b/python/tvm/autotvm/task/task.py index ca1ae0eefefdd..c75105b413b72 100644 --- a/python/tvm/autotvm/task/task.py +++ b/python/tvm/autotvm/task/task.py @@ -21,10 +21,13 @@ func is a state-less function, or a string that registers the standard task. """ - import numpy as np -from ... import tensor, expr, container, placeholder, target as _target +from tvm import target as _target +from tvm.ir import container +from tvm.tir import expr +from tvm.te import tensor, placeholder + from ..util import get_const_int, get_const_tuple from .dispatcher import DispatchContext, ApplyConfig @@ -81,7 +84,7 @@ def deserialize_args(args): def args_to_workload(args, task_name=None): """Convert argument list to hashable workload tuple. This function will convert list to tuple, tvm node to python value and - flatten tvm.tensor.Tensor to a tuple + flatten te.tensor.Tensor to a tuple Parameters ---------- @@ -138,9 +141,9 @@ def instantiate(self, config): Returns ------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - arg_bufs: Array of tvm.tensor.Tensor + arg_bufs: Array of te.tensor.Tensor The input/output buffers """ config.flop = 0 @@ -303,12 +306,12 @@ def register_customized_task(name, func=None): @autotvm.register_customized_task("matmul") def matmul(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -400,7 +403,7 @@ def compute_flop(sch): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule schedule Returns @@ -475,8 +478,8 @@ def traverse(ops): elif isinstance(op, tensor.PlaceholderOp): pass else: - raise FlopCalculationError("Only support tvm.compute currently. " - "Other ops like tvm.scan/tvm.extern is not supported") + raise FlopCalculationError("Only support te.compute currently. " + "Other ops like tvm.te.scan/te.extern is not supported") return ret try: diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py index 45385fbe8f7e0..1a381069112e8 100644 --- a/python/tvm/autotvm/task/topi_integration.py +++ b/python/tvm/autotvm/task/topi_integration.py @@ -21,8 +21,8 @@ These decorators can make your simple implementation be able to use different configurations for different workloads. Here we directly use all arguments to the TOPI call as "workload", so make sure all the arguments -(except tvm.Tensor) in you calls are hashable. For tvm.Tensor, we will serialize it to a hashable -tuple. +(except tvm.te.Tensor) in you calls are hashable. For tvm.te.Tensor, +we will serialize it to a hashable tuple. See tvm/topi/python/topi/arm_cpu/depthwise_conv2d.py for example usage. """ diff --git a/python/tvm/contrib/binutil.py b/python/tvm/contrib/binutil.py index 1f322acdf8b92..521e0885548ce 100644 --- a/python/tvm/contrib/binutil.py +++ b/python/tvm/contrib/binutil.py @@ -18,8 +18,9 @@ """Utilities for binary file manipulation""" import os import subprocess +import tvm._ffi from . import util -from ..api import register_func + RELOCATION_LD_SCRIPT_TEMPLATE = """ /* linker symbol for use in UTVMInit */ @@ -95,7 +96,7 @@ def run_cmd(cmd): return output -@register_func("tvm_callback_get_section_size") +@tvm._ffi.register_func("tvm_callback_get_section_size") def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): """Finds size of the section in the binary. Assumes `size` shell command exists (typically works only on Linux machines) @@ -162,7 +163,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): return section_size -@register_func("tvm_callback_relocate_binary") +@tvm._ffi.register_func("tvm_callback_relocate_binary") def tvm_callback_relocate_binary( binary_path, word_size, @@ -233,7 +234,7 @@ def tvm_callback_relocate_binary( return rel_bin -@register_func("tvm_callback_read_binary_section") +@tvm._ffi.register_func("tvm_callback_read_binary_section") def tvm_callback_read_binary_section(binary, section, toolchain_prefix): """Returns the contents of the specified section in the binary byte array @@ -273,7 +274,7 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix): return section_bin -@register_func("tvm_callback_get_symbol_map") +@tvm._ffi.register_func("tvm_callback_get_symbol_map") def tvm_callback_get_symbol_map(binary, toolchain_prefix): """Obtains a map of symbols to addresses in the passed binary diff --git a/python/tvm/contrib/cblas.py b/python/tvm/contrib/cblas.py index 2337f846be516..e1a4a8a7849b1 100644 --- a/python/tvm/contrib/cblas.py +++ b/python/tvm/contrib/cblas.py @@ -16,7 +16,7 @@ # under the License. """External function interface to BLAS libraries.""" import tvm -from .. import api as _api +from tvm import te def matmul(lhs, rhs, transa=False, transb=False, **kwargs): @@ -41,7 +41,7 @@ def matmul(lhs, rhs, transa=False, transb=False, **kwargs): """ n = lhs.shape[1] if transa else lhs.shape[0] m = rhs.shape[0] if transb else rhs.shape[1] - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( @@ -75,7 +75,7 @@ def batch_matmul(lhs, rhs, transa=False, transb=False, iterative=False, **kwargs b = lhs.shape[0] n = lhs.shape[2] if transa else lhs.shape[1] m = rhs.shape[1] if transb else rhs.shape[2] - return _api.extern( + return te.extern( (b, n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( diff --git a/python/tvm/contrib/cublas.py b/python/tvm/contrib/cublas.py index 75290a8f64022..7b42becec2be2 100644 --- a/python/tvm/contrib/cublas.py +++ b/python/tvm/contrib/cublas.py @@ -16,7 +16,8 @@ # under the License. """External function interface to cuBLAS libraries.""" import tvm -from .. import api as _api +from tvm import te + def matmul(lhs, rhs, transa=False, transb=False, dtype=None): """Create an extern op that compute matrix mult of A and rhs with cuBLAS @@ -40,7 +41,7 @@ def matmul(lhs, rhs, transa=False, transb=False, dtype=None): n = lhs.shape[1] if transa else lhs.shape[0] m = rhs.shape[0] if transb else rhs.shape[1] dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cublas.matmul", @@ -69,7 +70,7 @@ def batch_matmul(lhs, rhs, transa=False, transb=False, dtype=None): n = lhs.shape[2] if transa else lhs.shape[1] m = rhs.shape[1] if transb else rhs.shape[2] dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( + return te.extern( (b, n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cublas.batch_matmul", diff --git a/python/tvm/contrib/cublaslt.py b/python/tvm/contrib/cublaslt.py index 1000ede1379dd..3b36f4720fece 100644 --- a/python/tvm/contrib/cublaslt.py +++ b/python/tvm/contrib/cublaslt.py @@ -16,7 +16,7 @@ # under the License. """External function interface to cuBLASlt libraries.""" import tvm -from .. import api as _api +from tvm import te def matmul(lhs, rhs, transa=False, transb=False, n=0, m=0, dtype=None): @@ -43,7 +43,7 @@ def matmul(lhs, rhs, transa=False, transb=False, n=0, m=0, dtype=None): if m == 0: m = rhs.shape[0] if transb else rhs.shape[1] dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cublaslt.matmul", diff --git a/python/tvm/contrib/cudnn.py b/python/tvm/contrib/cudnn.py index 20b42d79d27e7..e62724512d49b 100644 --- a/python/tvm/contrib/cudnn.py +++ b/python/tvm/contrib/cudnn.py @@ -19,8 +19,9 @@ import ctypes import numpy as np import tvm -from .. import api as _api -from .. import get_global_func as _get_global_func + +import tvm._ffi +from tvm import te # algos can be read from cudnn.h _FWD_ALGOS = [ @@ -217,7 +218,7 @@ def conv_output_shape(tensor_format, _prepare_global_func_params(dims - 2, pad, stride, dilation, x_shape, w_shape) oshape = np.zeros((dims), dtype=np.int32) - func = _get_global_func("tvm.contrib.cudnn.conv.output_shape") + func = tvm._ffi.get_global_func("tvm.contrib.cudnn.conv.output_shape") func(tensor_format, dims - 2, _get_np_int32_array_handle(pad), @@ -276,7 +277,7 @@ def conv_find_algo(tensor_format, pad, stride, dilation, xshape, wshape = \ _prepare_global_func_params(dims - 2, pad, stride, dilation, x_shape, w_shape) yshape = np.array(y_shape, dtype=np.int32) - func = _get_global_func("tvm.contrib.cudnn.conv.find_algo") + func = tvm._ffi.get_global_func("tvm.contrib.cudnn.conv.find_algo") return func(tensor_format, dims - 2, _get_np_int32_array_handle(pad), @@ -363,7 +364,7 @@ def conv_forward(x, conv_dtype) if dims == 4: - return _api.extern( + return te.extern( oshape, [x, w], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cudnn.conv2d.forward", @@ -381,7 +382,7 @@ def conv_forward(x, outs[0], conv_dtype), name="y") - return _api.extern( + return te.extern( oshape, [x, w], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cudnn.conv3d.forward", diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py index 26c16e3135e8c..18920c60719e3 100644 --- a/python/tvm/contrib/debugger/debug_result.py +++ b/python/tvm/contrib/debugger/debug_result.py @@ -21,6 +21,7 @@ import numpy as np import tvm + GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json' CHROME_TRACE_FILE_NAME = "_tvmdbg_execution_trace.json" diff --git a/python/tvm/contrib/miopen.py b/python/tvm/contrib/miopen.py index 7f024f70b21a5..04e35de920806 100644 --- a/python/tvm/contrib/miopen.py +++ b/python/tvm/contrib/miopen.py @@ -19,8 +19,9 @@ import ctypes import numpy as np import tvm -from .. import api as _api -from .. import get_global_func as _get_global_func +import tvm._ffi + +from tvm import te def _get_np_int32_array_handle(arr): @@ -91,7 +92,7 @@ def conv2d_forward(x, oshape = np.zeros((len(x.shape)), dtype=np.int32) xshape = x.shape wshape = w.shape - setup_func = _get_global_func("tvm.contrib.miopen.conv2d.setup") + setup_func = tvm._ffi.get_global_func("tvm.contrib.miopen.conv2d.setup") algo = setup_func(conv_mode, data_type, pad_h, @@ -111,7 +112,7 @@ def conv2d_forward(x, group_count, _get_np_int32_array_handle(oshape)) - return _api.extern( + return te.extern( list(oshape), [x, w], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.miopen.conv2d.forward", diff --git a/python/tvm/contrib/mps.py b/python/tvm/contrib/mps.py index 5d84e892ec741..8f310b0915b69 100644 --- a/python/tvm/contrib/mps.py +++ b/python/tvm/contrib/mps.py @@ -16,7 +16,8 @@ # under the License. """External function interface to MPS libraries.""" import tvm -from .. import api as _api +from tvm import te + # pylint: disable=C0103,W0612 @@ -47,7 +48,7 @@ def matmul(lhs, rhs, transa=False, transb=False): m = b if transb: n = c - return _api.extern( + return te.extern( (m, n), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.mps.matmul", ins[0], ins[1], outs[0], transa, transb), @@ -79,7 +80,7 @@ def conv2d(data, weight, pad='SAME', stride=1): ho = hi // stride wo = wi // stride - return _api.extern( + return te.extern( (n, ho, wo, co), [data, weight], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.mps.conv2d", ins[0], ins[1], outs[0], padding, stride), diff --git a/python/tvm/contrib/nnpack.py b/python/tvm/contrib/nnpack.py index a55a344b6410f..1ce1dcc40f400 100644 --- a/python/tvm/contrib/nnpack.py +++ b/python/tvm/contrib/nnpack.py @@ -16,8 +16,8 @@ # under the License. """External function interface to NNPACK libraries.""" import tvm +from tvm import te import tvm._ffi -from .. import api as _api def is_available(): @@ -43,7 +43,7 @@ def fully_connected_inference(lhs, rhs, nthreads=1): lhs 1D array out[output_channels] of FP32 elements. """ m = rhs.shape[0] - return _api.extern( + return te.extern( (m, ), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.nnpack.fully_connected_inference", @@ -100,13 +100,13 @@ def convolution_inference( assert isinstance(stride, list) and len(stride) == 2 batch, _, input_height, input_width = data.shape output_channels, _, kernel_height, kernel_width = kernel.shape - idxdiv = _api.indexdiv + idxdiv = te.indexdiv output_height = idxdiv( input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 output_width = idxdiv( input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 - return _api.extern( + return te.extern( (batch, output_channels, output_height, output_width), [data, kernel, bias] if bias is not None else [data, kernel], lambda ins, outs: tvm.tir.call_packed( @@ -155,11 +155,11 @@ def convolution_inference_without_weight_transform( batch, _, input_height, input_width = data.shape output_channels, _, _, _ = transformed_kernel.shape kernel_height, kernel_width = (3, 3) - idxdiv = _api.indexdiv + idxdiv = te.indexdiv output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 - return _api.extern( + return te.extern( (batch, output_channels, output_height, output_width), [data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel], lambda ins, outs: tvm.tir.call_packed( @@ -194,7 +194,7 @@ def convolution_inference_weight_transform( transform_tile_size = 8 if not isinstance(dtype, str): dtype = dtype.dtype - return _api.extern( + return te.extern( (output_channels, input_channels, transform_tile_size, transform_tile_size), [kernel], lambda ins, outs: tvm.tir.call_packed( diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 8712f73c23431..fc8232053b5f7 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -21,10 +21,11 @@ import subprocess import os import warnings + +import tvm._ffi from tvm.runtime import ndarray as nd from . import util -from ..api import register_func from .._ffi.base import py_str def compile_cuda(code, @@ -152,7 +153,7 @@ def get_cuda_version(cuda_path): raise RuntimeError("Cannot read cuda version file") -@register_func("tvm_callback_libdevice_path") +@tvm._ffi.register_func("tvm_callback_libdevice_path") def find_libdevice_path(arch): """Utility function to find libdevice diff --git a/python/tvm/contrib/peak.py b/python/tvm/contrib/peak.py index bc93afbf165e8..1d987a5aeea49 100644 --- a/python/tvm/contrib/peak.py +++ b/python/tvm/contrib/peak.py @@ -19,6 +19,7 @@ import logging import tvm +from tvm import te from . import util from .. import rpc @@ -79,17 +80,17 @@ def measure_bandwidth_sum(total_item, item_per_thread, stride, base_type = str(base_type) + str(bits) dtype = base_type if lanes == 1 else base_type + "x" + str(lanes) - k = tvm.reduce_axis((0, m), name="k") + k = te.reduce_axis((0, m), name="k") - x = tvm.placeholder((n,), dtype=dtype, name="x") - op = tvm.comm_reducer(lambda x, y: x*y, lambda t: tvm.const(1, dtype=t), name="sum") - y = tvm.compute((n // m,), - lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k)) - s = tvm.create_schedule(y.op) + x = te.placeholder((n,), dtype=dtype, name="x") + op = te.comm_reducer(lambda x, y: x*y, lambda t: tvm.tir.const(1, dtype=t), name="sum") + y = te.compute((n // m,), + lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k)) + s = te.create_schedule(y.op) yo, yi = s[y].split(y.op.axis[0], target.max_num_threads) - s[y].bind(yo, tvm.thread_axis("blockIdx.x")) - s[y].bind(yi, tvm.thread_axis("threadIdx.x")) + s[y].bind(yo, te.thread_axis("blockIdx.x")) + s[y].bind(yi, te.thread_axis("threadIdx.x")) s[y].unroll(k) try: @@ -209,8 +210,8 @@ def extern(ins, outs): """construct measurement function by building IR directly""" ib = tvm.ir_builder.create() - bx = tvm.thread_axis("blockIdx.x") - tx = tvm.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") ib.scope_attr(bx, "thread_extent", n // max_threads) ib.scope_attr(tx, "thread_extent", max_threads) @@ -235,8 +236,8 @@ def extern(ins, outs): ib.emit(outs[0].vstore(idx, b[0])) return ib.get() - y = tvm.extern((n,), [], extern, name="y", dtype=dtype) - s = tvm.create_schedule(y.op) + y = te.extern((n,), [], extern, name="y", dtype=dtype) + s = te.create_schedule(y.op) try: func = tvm.build(s, [y], target, target_host=target_host) diff --git a/python/tvm/contrib/random.py b/python/tvm/contrib/random.py index bcc9b17033863..727b68bbbd19c 100644 --- a/python/tvm/contrib/random.py +++ b/python/tvm/contrib/random.py @@ -16,8 +16,8 @@ # under the License. """External function interface to random library.""" import tvm +from tvm import te import tvm._ffi -from .. import api as _api def randint(low, high, size, dtype='int32'): @@ -38,7 +38,7 @@ def randint(low, high, size, dtype='int32'): A tensor with specified size and dtype """ assert 'int' in dtype, "the type of randint output must be int or uint" - return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed( + return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.random.randint", int(low), int(high), outs[0]), dtype=dtype) @@ -66,7 +66,7 @@ def uniform(low, high, size): out : Tensor A tensor with specified size and dtype. """ - return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed( + return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.random.uniform", float(low), float(high), outs[0]), dtype='float32') @@ -90,7 +90,7 @@ def normal(loc, scale, size): out : Tensor A tensor with specified size and dtype """ - return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed( + return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.random.normal", float(loc), float(scale), outs[0]), dtype='float32') diff --git a/python/tvm/contrib/rocblas.py b/python/tvm/contrib/rocblas.py index e11be5a1d973b..86ffaea4e0400 100644 --- a/python/tvm/contrib/rocblas.py +++ b/python/tvm/contrib/rocblas.py @@ -16,7 +16,8 @@ # under the License. """External function interface to rocBLAS libraries.""" import tvm -from .. import api as _api +from tvm import te + def matmul(lhs, rhs, transa=False, transb=False): """Create an extern op that compute matrix mult of A and rhs with rocBLAS @@ -39,7 +40,7 @@ def matmul(lhs, rhs, transa=False, transb=False): """ n = lhs.shape[1] if transa else lhs.shape[0] m = rhs.shape[0] if transb else rhs.shape[1] - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.rocblas.matmul", diff --git a/python/tvm/contrib/rocm.py b/python/tvm/contrib/rocm.py index e5cebdd3f5dc1..7d4b4a2ebefd7 100644 --- a/python/tvm/contrib/rocm.py +++ b/python/tvm/contrib/rocm.py @@ -18,11 +18,13 @@ import subprocess from os.path import join, exists +import tvm._ffi from tvm._ffi.base import py_str +import tvm.runtime import tvm.target from . import util -from ..api import register_func, convert + def find_lld(required=True): """Find ld.lld in system. @@ -85,7 +87,7 @@ def rocm_link(in_file, out_file, lld=None): raise RuntimeError(msg) -@register_func("tvm_callback_rocm_link") +@tvm._ffi.register_func("tvm_callback_rocm_link") def callback_rocm_link(obj_bin): """Links object file generated from LLVM to HSA Code Object @@ -108,7 +110,7 @@ def callback_rocm_link(obj_bin): cobj_bin = bytearray(open(tmp_cobj, "rb").read()) return cobj_bin -@register_func("tvm_callback_rocm_bitcode_path") +@tvm._ffi.register_func("tvm_callback_rocm_bitcode_path") def callback_rocm_bitcode_path(rocdl_dir="/opt/rocm/lib/"): """Utility function to find ROCm device library bitcodes @@ -138,4 +140,4 @@ def callback_rocm_bitcode_path(rocdl_dir="/opt/rocm/lib/"): "oclc_wavefrontsize64_on.amdgcn.bc" ] paths = [join(rocdl_dir, bitcode) for bitcode in bitcode_files] - return convert([path for path in paths if exists(path)]) + return tvm.runtime.convert([path for path in paths if exists(path)]) diff --git a/python/tvm/contrib/sdaccel.py b/python/tvm/contrib/sdaccel.py index 1234d546ebaef..3f9bf43a85d34 100644 --- a/python/tvm/contrib/sdaccel.py +++ b/python/tvm/contrib/sdaccel.py @@ -17,11 +17,12 @@ """Utility for Interacting with SDAccel Tools""" import subprocess import os + +import tvm._ffi from . import util -from ..api import register_func -@register_func("tvm_callback_sdaccel_compile") +@tvm._ffi.register_func("tvm_callback_sdaccel_compile") def compile_vhls(kernel_info, device_name): """Compile Vivado HLS code for SDAccel. diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py index 966e180ec2b87..77f84b1eb4edd 100644 --- a/python/tvm/contrib/sparse.py +++ b/python/tvm/contrib/sparse.py @@ -18,10 +18,9 @@ # pylint: disable=invalid-name import numpy as _np from tvm.runtime import ndarray as _nd - -from .. import expr as _expr -from .. import api as _api -from .. import tensor as _tensor +from tvm import te +from tvm.tir import expr as _expr +from tvm.te import tensor as _tensor float32 = "float32" @@ -136,9 +135,9 @@ def __init__(self, shape, nonzeros, dtype, name): """ SparsePlaceholderOp.__init__(self, shape, nonzeros, dtype, name) self.stype = 'csr' - self.data = _api.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data') - self.indices = _api.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices') - self.indptr = _api.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr') + self.data = te.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data') + self.indices = te.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices') + self.indptr = te.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr') assert isinstance(self.data, _tensor.Tensor) assert isinstance(self.indices, _tensor.Tensor) assert isinstance(self.indptr, _tensor.Tensor) diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py index f529ee26b58fa..336db833e24bf 100644 --- a/python/tvm/driver/build_module.py +++ b/python/tvm/driver/build_module.py @@ -325,9 +325,9 @@ def build(inputs, .. code-block:: python n = 2 - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s = tvm.create_schedule(C.op) f = tvm.lower(s, [A, B, C], name="test_add") m = tvm.build(f, target="llvm") @@ -337,9 +337,9 @@ def build(inputs, .. code-block:: python n = 2 - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s1 = tvm.create_schedule(C.op) with tvm.target.cuda() as cuda_tgt: s2 = topi.cuda.schedule_injective(cuda_tgt, [C]) diff --git a/python/tvm/hybrid/calls.py b/python/tvm/hybrid/calls.py index 0933628a99438..5b5c34d5cb0f3 100644 --- a/python/tvm/hybrid/calls.py +++ b/python/tvm/hybrid/calls.py @@ -16,6 +16,9 @@ # under the License. """Intrinsics of TVM-Python Hybrid Script for Python compilation time semantic support.""" + +from tvm.runtime import const, convert +import tvm.te from tvm.ir.container import Array from tvm import target as _tgt from tvm.tir import expr as _expr @@ -23,8 +26,6 @@ from tvm.tir import call_pure_intrin from tvm.tir.stmt import For -from .. import api as _api - from .util import _internal_assert # pylint: disable=redefined-builtin @@ -42,11 +43,11 @@ def _range(annotation, args): """Handling TVM loop types""" n = args.__len__() if n == 1: - low, ext = _api.const(0, dtype='int32'), args[0] + low, ext = const(0, dtype='int32'), args[0] else: _internal_assert(n == 2, "A loop intrinsic should only have 1 or 2 arguments!") low, ext = args[0], args[1] - if not ir_pass.Equal(low, _api.const(0, dtype='int32')): + if not ir_pass.Equal(low, const(0, dtype='int32')): ext = ext - low for_type = LOOP_INTRIN[annotation] iter_var = None @@ -62,16 +63,16 @@ def bind(func_id, args): _internal_assert(args.__len__() == 2, "A loop bind should only have 2 arguments!") _internal_assert(isinstance(args[0], str), \ "A loop bind's first argument should be a string!") - low, ext = _api.const(0, "int32"), args[1] - iter_var = _api.thread_axis((low, ext), args[0]) + low, ext = const(0, "int32"), args[1] + iter_var = tvm.te.thread_axis((low, ext), args[0]) for_type = None return iter_var, low, ext, for_type def _math_intrin(func_id, args): # pylint: disable=import-outside-toplevel - import tvm.tir.op - return getattr(tvm.tir.op, func_id)(*args) + from tvm.tir import op + return getattr(op, func_id)(*args) sqrt = log = exp = tanh = sigmoid = power = popcount = _math_intrin #pylint: disable=invalid-name @@ -88,7 +89,7 @@ def _allocate_tensor(func_id, args): """Handling TVM tensor allocation. You may refer hybrid.intrin.allocate for more details.""" n = args.__len__() - _internal_assert(isinstance(_api.convert(args[0]), Array), \ + _internal_assert(isinstance(convert(args[0]), Array), \ "allocate's first argument should be a tuple of shape!") shape = args[0] for i in shape: @@ -119,10 +120,10 @@ def len(func_id, args): _internal_assert(args.__len__() == 1, "Only 1 argument is expected!") _internal_assert(func_id == "len", "This function cannot be directly invoked!") try: - return _api.convert(args[0].__len__()) + return convert(args[0].__len__()) except: #pylint: disable=bare-except _internal_assert(args[0].shape.__len__() == 1, "Only one-dimension array can get len") - return _api.convert(args[0].shape[0]) + return convert(args[0].shape[0]) def _cast(func_id, args): @@ -159,4 +160,4 @@ def max_num_threads(func_id, args): else: _internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint") res = _tgt.Target.current(args[0].value).max_num_threads - return _api.convert(res) + return convert(res) diff --git a/python/tvm/hybrid/parser.py b/python/tvm/hybrid/parser.py index cf8584a1e999b..0f8f3dd2ad01d 100644 --- a/python/tvm/hybrid/parser.py +++ b/python/tvm/hybrid/parser.py @@ -25,7 +25,9 @@ from enum import Enum from tvm.ir import Array, Range +import tvm.runtime import tvm.tir +import tvm.te import tvm.te._ffi_api from tvm.tir import expr as _expr @@ -40,8 +42,6 @@ from . import util from .preprocessor import determine_variable_usage -from .. import api as _api - def concat_list_to_block(lst): """Concatenate a list of Python IR nodes to HalideIR Block""" @@ -125,7 +125,7 @@ def __init__(self, args, usage, symbols, closure_vars, func_name=None): """ Parameters ---------- - args: A list of tvm.placeholder or tvm.var + args: A list of tvm.te.placeholder or te.var Provided by the user, the argument list of the function to be lowered. usage: A dict of variables used in last in this function @@ -210,9 +210,9 @@ def wrap_up_realize(self, node, body): _domain = [Range.make_by_min_extent(0, i) for i in _buf.shape] _dtype = _buf.dtype - _true = _api.convert(True) + _true = tvm.runtime.convert(True) body = tvm.tir.Realize(_buf.op, 0, _dtype, _domain, _true, body) - body = tvm.tir.AttrStmt(_buf.op, 'realize_scope', _api.convert(_scope), body) + body = tvm.tir.AttrStmt(_buf.op, 'realize_scope', tvm.runtime.convert(_scope), body) for elem in to_pop: self.symbols.pop(elem) @@ -256,10 +256,10 @@ def visit_Expr(self, node): def visit_Name(self, node): name = node.id if sys.version_info[0] == 2 and name in ['True', 'False']: - return _api.convert(ast.literal_eval(name)) + return tvm.runtime.convert(ast.literal_eval(name)) if name in self.closure_vars: - return _api.convert(self.closure_vars[name]) + return tvm.runtime.convert(self.closure_vars[name]) ty, entry = self.symbols[name] _internal_assert(name in self.symbols, "Unknown symbol %s!" % name) @@ -271,9 +271,9 @@ def visit_Name(self, node): return entry if isinstance(node.ctx, ast.Load) else None if ty is Symbol.BufferVar: if isinstance(node.ctx, ast.Load): - return tvm.tir.Call(entry.dtype, entry.name, [_api.const(0, 'int32')], \ + return tvm.tir.Call(entry.dtype, entry.name, [tvm.runtime.const(0, 'int32')], \ _expr.Call.Halide, entry.op, entry.value_index) - return entry, [_api.const(0, 'int32')] + return entry, [tvm.runtime.const(0, 'int32')] # Do I need any assertion here? return entry @@ -287,11 +287,11 @@ def visit_Num(self, node): _internal_assert(isinstance(node.n, bool), "The data type should be one of (int, float, bool)") dtype = "bool" - return _api.const(node.n, dtype) + return tvm.runtime.const(node.n, dtype) def visit_NameConstant(self, node): - return _api.convert(node.value) + return tvm.runtime.convert(node.value) def visit_AugAssign(self, node): @@ -301,7 +301,7 @@ def visit_AugAssign(self, node): _internal_assert(len(buf) == 2, "LHS is supposed to be (buf, args)!") buf, args = buf else: - args = [_api.const(0, 'int32')] + args = [tvm.runtime.const(0, 'int32')] _internal_assert(isinstance(buf, Tensor), "LHS is supposed to be Tensor!") read = tvm.tir.Call(buf.dtype, buf.name, args, _expr.Call.Halide, buf.op, buf.value_index) @@ -341,7 +341,7 @@ def visit_Assign(self, node): "This value should not be defined before this point!") if isinstance(rhs, tuple): shape, dtype, scope = rhs - ph = _api.placeholder(shape, dtype=dtype, name=lhs) + ph = tvm.te.placeholder(shape, dtype=dtype, name=lhs) self.add_symbol(lhs, getattr(Symbol, scope.title() + "Buffer"), ph) if scope == 'output': self.outputs.append(lhs) @@ -353,7 +353,7 @@ def visit_Assign(self, node): "Single variable not supported in devices' side!\n" + \ "If you are using GPU, please allocate a 'local' spad " + \ "outside the bind body") - ph = _api.placeholder((1, ), dtype=rhs.dtype, name=lhs) + ph = tvm.te.placeholder((1, ), dtype=rhs.dtype, name=lhs) self.add_symbol(lhs, Symbol.BufferVar, ph) lhs = self.visit(lhs_) if lhs is not None: @@ -524,8 +524,8 @@ def visit_For(self, node): if iter_var is None: _internal_assert(for_type is not None, "The loop iterating function parse error!") - offset = iter_var = _api.var(_name) - if not _ir_pass.Equal(low, _api.const(0, 'int32')): + offset = iter_var = tvm.te.var(_name) + if not _ir_pass.Equal(low, tvm.runtime.const(0, 'int32')): offset = iter_var + low self.add_symbol(_name, Symbol.LoopVar, offset) _body = visit_list_to_block(self.visit, node.body) @@ -543,7 +543,7 @@ def visit_For(self, node): else: _internal_assert(not isinstance(for_type, tuple), \ "Micro expansion should be handled before!") - res = tvm.tir.For(iter_var, _api.const(0, 'int32'), ext, for_type, 0, _body) + res = tvm.tir.For(iter_var, tvm.runtime.const(0, 'int32'), ext, for_type, 0, _body) self.symbols.pop(_name) return res @@ -579,7 +579,7 @@ def visit_Str(self, node): def visit_Assert(self, node): test = self.visit(node.test) - mesg = _api.convert(self.visit(node.msg)) + mesg = tvm.runtime.convert(self.visit(node.msg)) return tvm.tir.AssertStmt(test, mesg, util.make_nop()) diff --git a/python/tvm/hybrid/util.py b/python/tvm/hybrid/util.py index 2b67956528787..dbdfaec716dc0 100644 --- a/python/tvm/hybrid/util.py +++ b/python/tvm/hybrid/util.py @@ -22,6 +22,7 @@ import sys import numpy +import tvm.runtime from tvm._ffi.base import numeric_types from tvm.ir.container import Array @@ -29,8 +30,6 @@ from tvm.tir import stmt as _stmt from tvm.te.tensor import Tensor -from .. import api as _api - #pylint: disable=invalid-name np_arg_types = tuple(list(numeric_types) + [numpy.ndarray]) @@ -47,7 +46,7 @@ def _internal_assert(cond, err): # Useful constants. In avoid of runtime dependences, we use function calls to return them. def make_nop(): """Returns a 'no operation' node in HalideIR.""" - return _stmt.Evaluate(_api.const(0, dtype='int32')) + return _stmt.Evaluate(tvm.runtime.const(0, dtype='int32')) def is_docstring(node): diff --git a/python/tvm/intrin.py b/python/tvm/intrin.py deleted file mode 100644 index 93e8fcb3f1403..0000000000000 --- a/python/tvm/intrin.py +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint:disable=unused-wildcard-import, wildcard-import, redefined-builtin -"""Backwared compatible layer for intrin.""" -from .tir.op import * diff --git a/python/tvm/make.py b/python/tvm/make.py deleted file mode 100644 index 089c3938723bb..0000000000000 --- a/python/tvm/make.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=unused-import -"""namespace of IR node builder make function - -This namespace is used for developers. While you do not see any declarations. -The functions are automatically exported from C++ side via PackedFunc. - -Each api is a PackedFunc that can be called in a positional argument manner. -You can use make function to build the IR node. -""" -import tvm._ffi -import tvm.ir -from tvm.ir import make_node as node -from tvm.tir import Call - - -def make_by_min_extent(min_value, extent): - """Construct a Range by min and extent. - - This constructs a range in [min_value, min_value + extent) - - Parameters - ---------- - min_value : PrimExpr - The minimum value of the range. - - extent : PrimExpr - The extent of the range. - - Returns - ------- - rng : Range - The constructed range. - """ - return tvm.ir.Range.make_by_min_extent(min_value, extent) - -tvm._ffi._init_api("tvm.make") diff --git a/python/tvm/relay/__init__.py b/python/tvm/relay/__init__.py index 2ad210e7d1090..f4a7c75864d5a 100644 --- a/python/tvm/relay/__init__.py +++ b/python/tvm/relay/__init__.py @@ -18,7 +18,7 @@ """The Relay IR namespace containing the IR definition and compiler.""" import os from sys import setrecursionlimit -from ..api import register_func + from . import call_graph from . import base from . import ty diff --git a/python/tvm/relay/backend/_backend.py b/python/tvm/relay/backend/_backend.py index 9169ef49210db..cbc4079b9e619 100644 --- a/python/tvm/relay/backend/_backend.py +++ b/python/tvm/relay/backend/_backend.py @@ -29,7 +29,7 @@ def lower(sch, inputs, func_name, source_func): sch : tvm.Schedule The schedule. - inputs : List[tvm.Tensor] + inputs : List[tvm.te.Tensor] The inputs to the function. func_name : str diff --git a/python/tvm/relay/backend/compile_engine.py b/python/tvm/relay/backend/compile_engine.py index 6466dff6c5df4..f3a0c01a661d7 100644 --- a/python/tvm/relay/backend/compile_engine.py +++ b/python/tvm/relay/backend/compile_engine.py @@ -21,6 +21,7 @@ import logging import numpy as np import tvm +from tvm import te from ..base import register_relay_node, Object from ... import target as _target from ... import autotvm @@ -84,7 +85,7 @@ def get_shape(shape): assert val <= np.iinfo(np.int32).max ret.append(tvm.expr.IntImm("int32", val)) elif isinstance(dim, tvm.expr.Any): - ret.append(tvm.var("any_dim", "int32")) + ret.append(te.var("any_dim", "int32")) else: ret.append(dim) return ret @@ -103,7 +104,7 @@ def get_valid_implementations(op, attrs, inputs, out_type, target): attrs : object The op attribute. - inputs : List[tvm.Tensor] + inputs : List[tvm.te.Tensor] Input tensors to the op. out_type : relay.Type @@ -162,7 +163,7 @@ def select_implementation(op, attrs, inputs, out_type, target, use_autotvm=True) attrs : object The op attribute. - inputs : List[tvm.Tensor] + inputs : List[tvm.te.Tensor] Input tensors to the op. out_type : relay.Type @@ -176,7 +177,7 @@ def select_implementation(op, attrs, inputs, out_type, target, use_autotvm=True) Returns ------- - ret : tuple(relay.op.OpImplementation, List[tvm.Tensor]) + ret : tuple(relay.op.OpImplementation, List[tvm.te.Tensor]) The best op implementation and the corresponding output tensors. """ all_impls = get_valid_implementations(op, attrs, inputs, out_type, target) diff --git a/python/tvm/relay/debug.py b/python/tvm/relay/debug.py index a2f3533a35641..838eab57c0f21 100644 --- a/python/tvm/relay/debug.py +++ b/python/tvm/relay/debug.py @@ -16,22 +16,20 @@ # under the License. # pylint: disable=wildcard-import, redefined-builtin, invalid-name """The Relay IR namespace containing the IR definition and compiler.""" -from __future__ import absolute_import -from ..api import register_func - +import tvm._ffi # pylint: disable=unused-argument, import-outside-toplevel def _debugger_init(expr, stack): import pdb pdb.set_trace() -@register_func("relay.debug") +@tvm._ffi.register_func("relay.debug") def _debug(*args): import pdb pdb.set_trace() # pylint: disable=unused-argument -@register_func("relay.debug_interp") +@tvm._ffi.register_func("relay.debug_interp") def _debug_interp(*args): _, _, _, ist = args print("Relay Debugger") diff --git a/python/tvm/relay/frontend/coreml.py b/python/tvm/relay/frontend/coreml.py index 99a3930a4ea14..0e5b64cbbacc4 100644 --- a/python/tvm/relay/frontend/coreml.py +++ b/python/tvm/relay/frontend/coreml.py @@ -17,7 +17,6 @@ # pylint: disable=invalid-name, import-self, unused-argument, unused-variable # pylint: disable=inconsistent-return-statements, import-outside-toplevel """CoreML frontend.""" -from __future__ import absolute_import as _abs import math import numpy as np import tvm diff --git a/python/tvm/relay/frontend/darknet.py b/python/tvm/relay/frontend/darknet.py index 7623df293cb9b..0dae645cd9a4a 100644 --- a/python/tvm/relay/frontend/darknet.py +++ b/python/tvm/relay/frontend/darknet.py @@ -19,7 +19,6 @@ DarkNet symbol frontend for Relay. """ -from __future__ import absolute_import as _abs from enum import Enum import numpy as np import tvm diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index d74277bbe402f..2787cd6d46473 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -16,8 +16,6 @@ # under the License. # pylint: disable=invalid-name, import-self, len-as-condition, no-else-return, too-many-lines """MXNet symbol frontend.""" -from __future__ import absolute_import as _abs - import json import numpy as np import tvm diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index af8715abaed38..0b766a17aa1b9 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -406,7 +406,7 @@ def _impl(inputs, input_types): val = inputs[0] dtype = type(val) - if isinstance(val, tvm.expr.IntImm): + if isinstance(val, tvm.tir.IntImm): val = val.__int__() dtype = int diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 5532e3a5c1a47..6f27d73315a19 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -18,9 +18,6 @@ # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except # pylint: disable=import-outside-toplevel """TF: Tensorflow frontend.""" -from __future__ import absolute_import as _abs -from __future__ import print_function - import warnings from collections import defaultdict @@ -1012,7 +1009,7 @@ def _impl(inputs, attr, params): 'Attribute batch_dims is not supported') new_input = inputs[0:2] return AttrCvt(op_name="take", - extras={'axis': tvm.const(axis, 'int32')}, + extras={'axis': tvm.tir.const(axis, 'int32')}, ignores=['Tindices', 'Tparams', 'validate_indices', 'Taxis', '_class', 'batch_dims'])(new_input, attr) return _impl diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py index 352bc6302ee04..3185c554eb955 100644 --- a/python/tvm/relay/frontend/tflite.py +++ b/python/tvm/relay/frontend/tflite.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name, unused-argument, too-many-lines, import-outside-toplevel - """Tensorflow lite frontend.""" import math import numpy as np diff --git a/python/tvm/relay/op/_reduce.py b/python/tvm/relay/op/_reduce.py index 9d52ed3af7770..ab8b7c2ac1a82 100644 --- a/python/tvm/relay/op/_reduce.py +++ b/python/tvm/relay/op/_reduce.py @@ -17,9 +17,9 @@ """Backend compiler related feature registration""" from __future__ import absolute_import +from tvm.runtime import convert from topi.util import get_const_int, get_const_tuple from . import op as _reg -from ...api import convert from ...hybrid import script _reg.register_reduce_schedule("argmax") diff --git a/python/tvm/relay/op/_tensor.py b/python/tvm/relay/op/_tensor.py index 7c8ccb7dd8277..0fbbaef374dff 100644 --- a/python/tvm/relay/op/_tensor.py +++ b/python/tvm/relay/op/_tensor.py @@ -16,14 +16,14 @@ # under the License. #pylint: disable=invalid-name, unused-argument, len-as-condition """Backend compiler related feature registration""" -from __future__ import absolute_import import topi + +from tvm.runtime import convert from topi.util import get_const_tuple from .op import register_compute, register_shape_func from .op import register_broadcast_schedule, register_injective_schedule from .op import register_pattern, OpPattern from ...hybrid import script -from ...api import convert register_broadcast_schedule("log") diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 42c94349da8c4..4b350093408ee 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -18,13 +18,14 @@ # pylint: disable=invalid-name,unused-argument, len-as-condition, too-many-nested-blocks, too-many-local-variables, too-many-arguments from __future__ import absolute_import import tvm +from tvm import te +from tvm.runtime import convert import topi from topi.util import get_const_int, get_const_tuple from . import op as _reg from . import strategy from .op import OpPattern from ...hybrid import script -from ...api import convert _reg.register_broadcast_schedule("broadcast_to") _reg.register_broadcast_schedule("broadcast_to_like") @@ -79,7 +80,7 @@ def compute_argwhere(attrs, inputs, output_type): output_shape.append(s) else: # see Any, replace it with a var - output_shape.append(tvm.var("any_dim", "int32")) + output_shape.append(te.var("any_dim", "int32")) new_output_type = tvm.relay.ty.TensorType(output_shape, "int32") return [topi.argwhere(new_output_type, inputs[0])] @@ -473,7 +474,7 @@ def squeeze_shape_func(attrs, inputs, _): if keep_axes: out = _squeeze_shape_func(inputs[0], convert(keep_axes)) else: - out = tvm.compute((), lambda *indices: 0) + out = te.compute((), lambda *indices: 0) return [out] @script diff --git a/python/tvm/relay/op/algorithm.py b/python/tvm/relay/op/algorithm.py index 6f875919df4c2..17fab80118af2 100644 --- a/python/tvm/relay/op/algorithm.py +++ b/python/tvm/relay/op/algorithm.py @@ -28,7 +28,7 @@ def argsort(data, axis=-1, is_ascend=1, dtype="int32"): data : relay.Expr The input data tensor. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor The number of valid elements to be sorted. axis : int, optional diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index 97a5fa6ec00b8..a4fde283daadf 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -20,11 +20,12 @@ import topi from topi.util import get_const_tuple + +from tvm.runtime import convert from .. import op as reg from .. import strategy from ..op import OpPattern from .._tensor import elemwise_shape_func -from ....api import convert from ....hybrid import script # relu diff --git a/python/tvm/relay/op/op.py b/python/tvm/relay/op/op.py index 4fd88f4383df3..d2a05af0d978b 100644 --- a/python/tvm/relay/op/op.py +++ b/python/tvm/relay/op/op.py @@ -21,7 +21,6 @@ from ..base import register_relay_node from ..expr import RelayExpr -from ...api import register_func from ...target import get_native_generic_func, GenericFunc from ...runtime import Object from . import _make @@ -155,7 +154,7 @@ def compute(self, attrs, inputs, out_type): attrs : Attrs Op attributes. - inputs : list[tvm.tensor.Tensor] + inputs : list[te.tensor.Tensor] The input tensors. out_type : relay.Type @@ -163,7 +162,7 @@ def compute(self, attrs, inputs, out_type): Returns ------- - outs : list[tvm.tensor.Tensor] + outs : list[te.tensor.Tensor] The output tensors. """ return _OpImplementationCompute(self, attrs, inputs, out_type) @@ -176,7 +175,7 @@ def schedule(self, attrs, outs, target): attrs : Attrs Op attributes. - outs : list[tvm.tensor.Tensor] + outs : list[te.tensor.Tensor] The output tensors. target : tvm.target.Target @@ -454,11 +453,11 @@ def register_shape_func(op_name, data_dependant, shape_func=None, level=10): get(op_name).set_attr("TShapeDataDependant", data_dependant, level) return register(op_name, "FShapeFunc", shape_func, level) -@register_func("relay.op.compiler._lower") +@tvm._ffi.register_func("relay.op.compiler._lower") def _lower(name, schedule, inputs, outputs): return lower(schedule, list(inputs) + list(outputs), name=name) -@register_func("relay.op.compiler._build") +@tvm._ffi.register_func("relay.op.compiler._build") def _build(lowered_funcs): return build(lowered_funcs, target="llvm") @@ -473,7 +472,7 @@ def debug(expr, debug_func=None): if debug_func: name = "debugger_func{}".format(__DEBUG_COUNTER__) - register_func(name, debug_func) + tvm._ffi.register_func(name, debug_func) __DEBUG_COUNTER__ += 1 else: name = '' diff --git a/python/tvm/relay/param_dict.py b/python/tvm/relay/param_dict.py index 4c3f6d1423695..b7fee8c12128b 100644 --- a/python/tvm/relay/param_dict.py +++ b/python/tvm/relay/param_dict.py @@ -17,9 +17,11 @@ # pylint: disable=invalid-name """Helper utility to save parameter dicts.""" import tvm +import tvm._ffi -_save_param_dict = tvm.get_global_func("tvm.relay._save_param_dict") -_load_param_dict = tvm.get_global_func("tvm.relay._load_param_dict") + +_save_param_dict = tvm._ffi.get_global_func("tvm.relay._save_param_dict") +_load_param_dict = tvm._ffi.get_global_func("tvm.relay._load_param_dict") def save_param_dict(params): """Save parameter dictionary to binary bytes. diff --git a/python/tvm/relay/quantize/quantize.py b/python/tvm/relay/quantize/quantize.py index be8a3a3233167..56a4645058e5f 100644 --- a/python/tvm/relay/quantize/quantize.py +++ b/python/tvm/relay/quantize/quantize.py @@ -16,12 +16,12 @@ # under the License. #pylint: disable=unused-argument, not-context-manager """Automatic quantization toolkit.""" -from __future__ import absolute_import +import tvm.ir + from . import _quantize from ._calibrate import calibrate from .. import expr as _expr from .. import transform as _transform -from ... import make as _make from ..base import Object, register_relay_node @@ -181,7 +181,7 @@ def qconfig(**kwargs): """ node_args = {k: v if k not in kwargs else kwargs[k] for k, v in QConfig._node_defaults.items()} - return _make.node("relay.quantize.QConfig", **node_args) + return tvm.ir.make_node("relay.quantize.QConfig", **node_args) class QuantizeContext(object): diff --git a/python/tvm/relay/testing/__init__.py b/python/tvm/relay/testing/__init__.py index bff01e859a50d..54c909179e4f0 100644 --- a/python/tvm/relay/testing/__init__.py +++ b/python/tvm/relay/testing/__init__.py @@ -20,6 +20,7 @@ import numpy as np import tvm +from tvm import te import tvm.relay as relay import tvm.relay.op as op from tvm.relay import transform diff --git a/python/tvm/relay/testing/config.py b/python/tvm/relay/testing/config.py index 68756e0a270fb..93a08db32d2ce 100644 --- a/python/tvm/relay/testing/config.py +++ b/python/tvm/relay/testing/config.py @@ -20,6 +20,7 @@ import os import tvm + def ctx_list(): """Get context list for testcases""" device_list = os.environ.get("RELAY_TEST_TARGETS", "") diff --git a/python/tvm/relay/transform.py b/python/tvm/relay/transform.py index 08b41b28bd35e..45535afc486c4 100644 --- a/python/tvm/relay/transform.py +++ b/python/tvm/relay/transform.py @@ -23,6 +23,7 @@ import functools import tvm +from tvm import te from tvm.runtime import ndarray as _nd from tvm.ir.transform import PassInfo, PassContext, Pass, ModulePass, Sequential, module_pass diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 211bee32ed3c6..2643ff131ba03 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -106,6 +106,7 @@ def save(self): import numpy as np import tvm +from tvm import te from tvm import relay # define a simple network. x = relay.var('x', shape=(10, 10)) diff --git a/python/tvm/target/generic_func.py b/python/tvm/target/generic_func.py index 1936ff1511be3..bfcd2dd56b4f4 100644 --- a/python/tvm/target/generic_func.py +++ b/python/tvm/target/generic_func.py @@ -116,6 +116,7 @@ def override_native_generic_func(func_name): .. code-block:: python import tvm +from tvm import te # wrap function as target generic @tvm.target.override_native_generic_func("my_func") def my_func(a): @@ -210,6 +211,7 @@ def generic_func(fdefault): .. code-block:: python import tvm +from tvm import te # wrap function as target generic @tvm.target.generic_func def my_func(a): diff --git a/python/tvm/te/__init__.py b/python/tvm/te/__init__.py index 5970315e854b0..065cf4e5dbdd4 100644 --- a/python/tvm/te/__init__.py +++ b/python/tvm/te/__init__.py @@ -18,6 +18,7 @@ """Namespace for Tensor Expression Language """ # expose all operators in tvm tir.op +from tvm.tir import any, all, min_value, max_value, trace from tvm.tir import exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil from tvm.tir import trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else from tvm.tir import div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod @@ -29,3 +30,5 @@ from .tag import tag_scope from .operation import placeholder, compute, scan, extern, var, size_var from .operation import thread_axis, reduce_axis + +from .tensor import PlaceholderOp, ComputeOp, TensorComputeOp, ScanOp, ExternOp, HybridOp diff --git a/python/tvm/te/operation.py b/python/tvm/te/operation.py index 3c5b610e99be7..3ccab5bfd9c30 100644 --- a/python/tvm/te/operation.py +++ b/python/tvm/te/operation.py @@ -167,13 +167,13 @@ def scan(init, update, state_placeholder, inputs=None, name="scan", tag="", attr .. code-block:: python # The following code is equivalent to numpy.cumsum - m = tvm.var("m") - n = tvm.var("n") - X = tvm.placeholder((m, n), name="X") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: X[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) - res = tvm.scan(s_init, s_update, s_state, X) + m = te.var("m") + n = te.var("n") + X = te.placeholder((m, n), name="X") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: X[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) + res = tvm.te.scan(s_init, s_update, s_state, X) """ if _tag.TagScope.get_current() is not None: if tag != "": @@ -264,10 +264,10 @@ def extern(shape, .. code-block:: python - A = tvm.placeholder((n, l), name="A") - B = tvm.placeholder((l, m), name="B") - C = tvm.extern((n, m), [A, B], - lambda ins, outs: tvm.call_packed( + A = te.placeholder((n, l), name="A") + B = te.placeholder((l, m), name="B") + C = te.extern((n, m), [A, B], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cblas.matmul", ins[0], ins[1], outs[0], 0, 0), name="C") """ diff --git a/python/tvm/te/tag.py b/python/tvm/te/tag.py index 189076d03cc36..78c89402d8ee4 100644 --- a/python/tvm/te/tag.py +++ b/python/tvm/te/tag.py @@ -73,19 +73,19 @@ def tag_scope(tag): ------- .. code-block:: python - n = tvm.var('n') - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - k = tvm.reduce_axis((0, l), name='k') + n = te.var('n') + m = te.var('m') + l = te.var('l') + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + k = te.reduce_axis((0, l), name='k') with tvm.tag_scope(tag='matmul'): - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k)) + C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k)) # or use tag_scope as decorator @tvm.tag_scope(tag="conv") def compute_relu(data): - return tvm.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i))) + return te.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i))) """ return TagScope(tag) diff --git a/python/tvm/tir/__init__.py b/python/tvm/tir/__init__.py index ab78ca6d6d63d..53ed599825ff4 100644 --- a/python/tvm/tir/__init__.py +++ b/python/tvm/tir/__init__.py @@ -17,6 +17,8 @@ # pylint: disable=unused-import, redefined-builtin """Namespace for Tensor-level IR""" from tvm.ir import PrimExpr +from tvm.runtime import const + from .buffer import Buffer, decl_buffer from .data_layout import Layout, BijectiveLayout, bijective_layout, layout from .expr import Var, SizeVar, Reduce, FloatImm, IntImm, StringImm, Cast @@ -30,7 +32,7 @@ from .stmt import IfThenElse, Evaluate, Prefetch, LoweredFunc, stmt_seq, stmt_list from .op import call_packed, call_pure_intrin, call_intrin, call_pure_extern, call_extern -from .op import call_llvm_intrin, all, any, min_value, max_value +from .op import call_llvm_intrin, all, any, min_value, max_value, trace from .op import exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil from .op import trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else from .op import div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod diff --git a/python/tvm/tir/buffer.py b/python/tvm/tir/buffer.py index d0d01d7479bed..0c7753e4d8ec1 100644 --- a/python/tvm/tir/buffer.py +++ b/python/tvm/tir/buffer.py @@ -201,15 +201,15 @@ def decl_buffer(shape, .. code-block:: python - m0, m1, m2 = tvm.var("m0"), tvm.var("m1"), tvm.var("m2") - n0, n1, n2 = tvm.var("n0"), tvm.var("n1"), tvm.var("n2") - o0, o1, o2 = tvm.var("o0"), tvm.var("o1"), tvm.var("o2") - A = tvm.placeholder((m0, m1, m2), name='A') - B = tvm.placeholder((n0, n1, n2), name='B') - C = tvm.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') + m0, m1, m2 = te.var("m0"), te.var("m1"), te.var("m2") + n0, n1, n2 = te.var("n0"), te.var("n1"), te.var("n2") + o0, o1, o2 = te.var("o0"), te.var("o1"), te.var("o2") + A = te.placeholder((m0, m1, m2), name='A') + B = te.placeholder((n0, n1, n2), name='B') + C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], target='llvm', name='bcast_add', binds={A:Ab, B:Bb}) ctx = tvm.cpu(0) a = tvm.nd.array(np.random.uniform(size=(2, 4, 3)).astype(A.dtype), ctx) diff --git a/python/tvm/tir/expr.py b/python/tvm/tir/expr.py index acf5f51941dc9..bcf596787cd43 100644 --- a/python/tvm/tir/expr.py +++ b/python/tvm/tir/expr.py @@ -25,7 +25,7 @@ .. code-block:: python - x = tvm.var("n") + x = te.var("n") y = x + 2 assert(isinstance(y, tvm.tir.Add)) assert(y.a == x) @@ -169,7 +169,7 @@ def __ge__(self, other): def __nonzero__(self): raise ValueError("Cannot use and / or / not operator to Expr, hint: " + - "use tvm.all / tvm.any instead") + "use tvm.tir.all / tvm.tir.any instead") def __bool__(self): return self.__nonzero__() @@ -346,8 +346,8 @@ class IterVar(Object, ExprOp): See Also -------- - tvm.thread_axis: Create thread axis IterVar. - tvm.reduce_axis: Create reduce axis IterVar. + te.thread_axis: Create thread axis IterVar. + te.reduce_axis: Create reduce axis IterVar. """ DataPar = 0 ThreadIndex = 1 @@ -812,7 +812,7 @@ class Select(PrimExprWithOp): Note ---- Select may compute both true_value and false_value. - Use :py:class:`tvm.if_then_else` instead if you want to + Use :py:class:`tvm.tir.if_then_else` instead if you want to get a conditional expression that only evaluates the correct branch. diff --git a/python/tvm/tir/ir_builder.py b/python/tvm/tir/ir_builder.py index b56e15377358a..6e6b1128e6cf7 100644 --- a/python/tvm/tir/ir_builder.py +++ b/python/tvm/tir/ir_builder.py @@ -99,7 +99,7 @@ class IRBuilder(object): .. code-block:: python ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") A = ib.allocate("float32", n, name="A") with ib.for_range(0, n, name="i") as i: with ib.if_scope((i % 2) == 0): @@ -159,7 +159,7 @@ def scope_attr(self, node, attr_key, value): .. code-block:: python ib = tvm.ir_builder.create() - i = tvm.var("i") + i = te.var("i") x = ib.pointer("float32") ib.scope_attr(x, "storage_scope", "global") x[i] = x[i - 1] + 1 @@ -244,7 +244,7 @@ def if_scope(self, cond): .. code-block:: python ib = tvm.ir_builder.create() - i = tvm.var("i") + i = te.var("i") x = ib.pointer("float32") with ib.if_scope((i % 2) == 0): x[i] = x[i - 1] + 1 @@ -269,7 +269,7 @@ def else_scope(self): .. code-block:: python ib = tvm.ir_builder.create() - i = tvm.var("i") + i = te.var("i") x = ib.pointer("float32") with ib.if_scope((i % 2) == 0): x[i] = x[i - 1] + 1 diff --git a/python/tvm/tir/op.py b/python/tvm/tir/op.py index 66e70c508438d..4a52787262bb3 100644 --- a/python/tvm/tir/op.py +++ b/python/tvm/tir/op.py @@ -64,7 +64,7 @@ def call_packed(*args): See Also -------- - tvm.extern : Create tensor with extern function call. + te.extern : Create tensor with extern function call. """ call_args = [_pack_buffer(x) if isinstance(x, Buffer) else x for x in args] return Call( @@ -194,7 +194,7 @@ def call_llvm_intrin(dtype, name, *args): from tvm.target import codegen llvm_id = codegen.llvm_lookup_intrinsic_id(name) assert llvm_id != 0, "%s is not an LLVM intrinsic" % name - return call_pure_intrin(dtype, 'llvm_intrin', tvm.const(llvm_id, 'uint32'), *args) + return call_pure_intrin(dtype, 'llvm_intrin', tvm.tir.const(llvm_id, 'uint32'), *args) def any(*args): @@ -274,7 +274,7 @@ def trace(args, trace_action="tvm.default_trace_action"): tvm.tir.call_packed : Creates packed function. """ if not isinstance(args, list): - raise Exception("tvm.trace consumes the args as list type") + raise Exception("tvm.tir.trace consumes the args as list type") call_args = [_pack_buffer(x) if isinstance(x, Buffer) else x for x in args] call_args.insert(0, trace_action) return tvm.tir.Call( @@ -556,9 +556,9 @@ def round(x): def nearbyint(x): """Round elements of the array to the nearest integer. This intrinsic uses llvm.nearbyint instead of llvm.round - which is faster but will results different from tvm.round. + which is faster but will results different from te.round. Notably nearbyint rounds according to the rounding mode, - whereas tvm.round (llvm.round) ignores that. + whereas te.round (llvm.round) ignores that. For differences between the two see: https://en.cppreference.com/w/cpp/numeric/math/round https://en.cppreference.com/w/cpp/numeric/math/nearbyint @@ -855,13 +855,13 @@ def comm_reducer(fcombine, fidentity, name="reduce"): ------- .. code-block:: python - n = tvm.var("n") - m = tvm.var("m") - mysum = tvm.comm_reducer(lambda x, y: x+y, - lambda t: tvm.const(0, dtype=t), name="mysum") - A = tvm.placeholder((n, m), name="A") - k = tvm.reduce_axis((0, m), name="k") - B = tvm.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B") + n = te.var("n") + m = te.var("m") + mysum = te.comm_reducer(lambda x, y: x+y, + lambda t: tvm.tir.const(0, dtype=t), name="mysum") + A = te.placeholder((n, m), name="A") + k = te.reduce_axis((0, m), name="k") + B = te.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B") """ def _reduce_directly(*args): num = len(args) @@ -943,14 +943,14 @@ def reducer(expr, axis, where=None, *args): ------- .. code-block:: python - m = tvm.var("m") - n = tvm.var("n") - A = tvm.placeholder((m, n), name="A") - k = tvm.reduce_axis((0, n), name="k") + m = te.var("m") + n = te.var("n") + A = te.placeholder((m, n), name="A") + k = te.reduce_axis((0, n), name="k") # there are two way to use this {0} reducer: # mode 1, accept (expr, axis, where) to produce an Reduce Expr - B = tvm.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B") + B = te.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B") # mode 2, simply use it with multiple Exprs: {0}_res = tvm.{0}(m, n) diff --git a/python/tvm/tir/stmt.py b/python/tvm/tir/stmt.py index bc02b7d23eadd..65c72ddfeb36f 100644 --- a/python/tvm/tir/stmt.py +++ b/python/tvm/tir/stmt.py @@ -23,8 +23,8 @@ .. code-block:: python - x = tvm.var("n") - a = tvm.var("array", tvm.handle) + x = te.var("n") + a = te.var("array", "handle") st = tvm.tir.stmt.Store(a, x + 1, 1) assert isinstance(st, tvm.tir.stmt.Store) assert(st.buffer_var == a) diff --git a/rust/frontend/examples/resnet/src/build_resnet.py b/rust/frontend/examples/resnet/src/build_resnet.py index e71381888c1f6..49c67bf1c4f31 100644 --- a/rust/frontend/examples/resnet/src/build_resnet.py +++ b/rust/frontend/examples/resnet/src/build_resnet.py @@ -25,6 +25,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import testing from tvm.contrib import graph_runtime, cc diff --git a/rust/frontend/tests/basics/src/tvm_add.py b/rust/frontend/tests/basics/src/tvm_add.py index 287084bcf9aae..3911d4074e453 100755 --- a/rust/frontend/tests/basics/src/tvm_add.py +++ b/rust/frontend/tests/basics/src/tvm_add.py @@ -20,20 +20,21 @@ import sys import tvm +from tvm import te from tvm.contrib import cc def main(target, out_dir): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C') - s = tvm.create_schedule(C.op) + n = te.var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name='C') + s = te.create_schedule(C.op) if target == 'cuda': bx, tx = s[C].split(C.op.axis[0], factor=64) - s[C].bind(bx, tvm.thread_axis('blockIdx.x')) - s[C].bind(tx, tvm.thread_axis('threadIdx.x')) + s[C].bind(bx, te.thread_axis('blockIdx.x')) + s[C].bind(tx, te.thread_axis('threadIdx.x')) fadd = tvm.build(s, [A, B, C], target, target_host='llvm', name='myadd') diff --git a/rust/runtime/tests/build_model.py b/rust/runtime/tests/build_model.py index e3da95f24fd82..d1dffad372494 100755 --- a/rust/runtime/tests/build_model.py +++ b/rust/runtime/tests/build_model.py @@ -22,6 +22,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import testing diff --git a/rust/runtime/tests/test_nn/src/build_test_graph.py b/rust/runtime/tests/test_nn/src/build_test_graph.py index dd7621b921f77..832dddf12d763 100755 --- a/rust/runtime/tests/test_nn/src/build_test_graph.py +++ b/rust/runtime/tests/test_nn/src/build_test_graph.py @@ -23,6 +23,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import testing diff --git a/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py b/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py index 38c1f3a7a223f..a04e2b80b6618 100755 --- a/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py +++ b/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py @@ -22,12 +22,13 @@ import sys import tvm +from tvm import te def main(): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + n = te.var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s = tvm.create_schedule(C.op) s[C].parallel(s[C].op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) diff --git a/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py b/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py index 63b43a5f9befa..c3e397d12ace8 100755 --- a/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py +++ b/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py @@ -22,13 +22,14 @@ import sys import tvm +from tvm import te from tvm.contrib import cc def main(): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + n = te.var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s = tvm.create_schedule(C.op) s[C].parallel(s[C].op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) diff --git a/tests/python/contrib/test_binutil.py b/tests/python/contrib/test_binutil.py index 44739bbda3cb9..3106e73136fa8 100644 --- a/tests/python/contrib/test_binutil.py +++ b/tests/python/contrib/test_binutil.py @@ -24,6 +24,7 @@ """ import tvm +from tvm import te import subprocess from tvm.contrib import util from tvm.contrib import cc diff --git a/tests/python/contrib/test_cblas.py b/tests/python/contrib/test_cblas.py index 99614a8d93add..18ea57a003f0b 100644 --- a/tests/python/contrib/test_cblas.py +++ b/tests/python/contrib/test_cblas.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import topi.testing from tvm.contrib import cblas -def verify_matmul_add(m, l, n, transa=False, transb=False, dtype=tvm.float32): - bias = tvm.var('bias', dtype=dtype) +def verify_matmul_add(m, l, n, transa=False, transb=False, dtype="float32"): + bias = te.var('bias', dtype=dtype) ashape = (l, n) if transa else (n, l) bshape = (m, l) if transb else (l, m) - A = tvm.placeholder(ashape, name='A', dtype=dtype) - B = tvm.placeholder(bshape, name='B', dtype=dtype) + A = te.placeholder(ashape, name='A', dtype=dtype) + B = te.placeholder(bshape, name='B', dtype=dtype) C = cblas.matmul(A, B, transa, transb) - D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") - s = tvm.create_schedule(D.op) + D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") + s = te.create_schedule(D.op) def get_numpy(a, b, bb, transa, transb): if transa: @@ -64,14 +65,14 @@ def test_matmul_add(): verify_matmul_add(1, 16, 3, False, False) verify_matmul_add(1, 16, 3, True, True) -def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype=tvm.float32): +def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype="float32"): ashape = (batch, l, n) if transa else (batch, n, l) bshape = (batch, m, l) if transb else (batch, l, m) - A = tvm.placeholder(ashape, name='A', dtype=dtype) - B = tvm.placeholder(bshape, name='B', dtype=dtype) + A = te.placeholder(ashape, name='A', dtype=dtype) + B = te.placeholder(bshape, name='B', dtype=dtype) C = cblas.batch_matmul(A, B, transa, transb) - D = tvm.compute(C.shape, lambda k, i, j: C[k, i,j], name="D") - s = tvm.create_schedule(D.op) + D = te.compute(C.shape, lambda k, i, j: C[k, i,j], name="D") + s = te.create_schedule(D.op) def get_numpy(a, b, transa, transb): if transa: diff --git a/tests/python/contrib/test_cublas.py b/tests/python/contrib/test_cublas.py index a3baa8c829e36..517e6e1240303 100644 --- a/tests/python/contrib/test_cublas.py +++ b/tests/python/contrib/test_cublas.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import cublas from tvm.contrib import cublaslt @@ -23,10 +24,10 @@ def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5): n = 1024 l = 128 m = 236 - A = tvm.placeholder((n, l), name='A', dtype=in_dtype) - B = tvm.placeholder((l, m), name='B', dtype=in_dtype) + A = te.placeholder((n, l), name='A', dtype=in_dtype) + B = te.placeholder((l, m), name='B', dtype=in_dtype) C = cublas.matmul(A, B, dtype=out_dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="cuda"): if not tvm.runtime.enabled(target): @@ -56,11 +57,11 @@ def verify_matmul_add_igemm(in_dtype, out_dtype, rtol=1e-5): N = roundoff(n, 8) N_out = roundoff(n, 32) - A = tvm.placeholder((N, L), name='A', dtype=in_dtype) - B = tvm.placeholder((m, L), name='B', dtype=in_dtype) + A = te.placeholder((N, L), name='A', dtype=in_dtype) + B = te.placeholder((m, L), name='B', dtype=in_dtype) # C has CUBLASLT_ORDER_COL32 layout, thus a different shape C = cublaslt.matmul(A, B, False, True, m, N_out, dtype=out_dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="cuda"): if not tvm.runtime.enabled(target): @@ -108,10 +109,10 @@ def verify_batch_matmul(in_dtype, out_dtype, rtol=1e-5): n = 1024 l = 128 m = 236 - A = tvm.placeholder((j, n, l), name='A', dtype=in_dtype) - B = tvm.placeholder((j, l, m), name='B', dtype=in_dtype) + A = te.placeholder((j, n, l), name='A', dtype=in_dtype) + B = te.placeholder((j, l, m), name='B', dtype=in_dtype) C = cublas.batch_matmul(A, B, dtype=out_dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="cuda"): if not tvm.runtime.enabled(target): diff --git a/tests/python/contrib/test_cudnn.py b/tests/python/contrib/test_cudnn.py index 1a22f90eb804d..58e7b4905988f 100644 --- a/tests/python/contrib/test_cudnn.py +++ b/tests/python/contrib/test_cudnn.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import cudnn import numpy as np import topi.testing @@ -48,8 +49,8 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0): xshape = [batch, height, weight, in_channel] wshape = [out_channel, filter_h, filter_w, in_channel] - X = tvm.placeholder(xshape, name='X', dtype=data_dtype) - W = tvm.placeholder(wshape, name='W', dtype=data_dtype) + X = te.placeholder(xshape, name='X', dtype=data_dtype) + W = te.placeholder(wshape, name='W', dtype=data_dtype) Y = cudnn.conv_forward(X, W, [pad_h, pad_w], @@ -60,7 +61,7 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0): conv_dtype=conv_dtype, algo=-1) yshape = [x.value for x in Y.shape] - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) @@ -120,8 +121,8 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0): xshape = [batch, in_channel, depth, height, weight] wshape = [out_channel, in_channel, filter_d, filter_h, filter_w] - X = tvm.placeholder(xshape, name='X', dtype=data_dtype) - W = tvm.placeholder(wshape, name='W', dtype=data_dtype) + X = te.placeholder(xshape, name='X', dtype=data_dtype) + W = te.placeholder(wshape, name='W', dtype=data_dtype) Y = cudnn.conv_forward(X, W, [pad_d, pad_h, pad_w], @@ -132,7 +133,7 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0): algo=-1, conv_dtype=conv_dtype) yshape = [x.value for x in Y.shape] - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) diff --git a/tests/python/contrib/test_dlpack.py b/tests/python/contrib/test_dlpack.py index f39595582f031..453556c83e180 100644 --- a/tests/python/contrib/test_dlpack.py +++ b/tests/python/contrib/test_dlpack.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib.dlpack import to_pytorch_func @@ -34,17 +35,17 @@ def test(): np.testing.assert_equal(y.asnumpy(), tvm_x.asnumpy()) np.testing.assert_equal(torch.utils.dlpack.from_dlpack(y.to_dlpack()).numpy(), tvm_x.asnumpy()) - n = tvm.convert(137) + n = tvm.runtime.convert(137) xx = torch.rand(137,137) yy = torch.rand(137,137) zz2 = torch.empty(137,137) zz = xx.mm(yy) - XX = tvm.placeholder((n,n), name='X') - YY = tvm.placeholder((n,n), name='Y') + XX = te.placeholder((n,n), name='X') + YY = te.placeholder((n,n), name='Y') - k = tvm.reduce_axis((0, n), name='k') - ZZ = tvm.compute((n,n), lambda i,j : tvm.sum(XX[i,k]*YY[k,j], axis=k)) - s = tvm.create_schedule(ZZ.op) + k = te.reduce_axis((0, n), name='k') + ZZ = te.compute((n,n), lambda i,j : te.sum(XX[i,k]*YY[k,j], axis=k)) + s = te.create_schedule(ZZ.op) f = tvm.build(s, [XX, YY, ZZ], target_host='llvm', name='f') f_pytorch = to_pytorch_func(f) diff --git a/tests/python/contrib/test_edgetpu_runtime.py b/tests/python/contrib/test_edgetpu_runtime.py index a5d9e34e2efb1..625dc94b7ec8d 100644 --- a/tests/python/contrib/test_edgetpu_runtime.py +++ b/tests/python/contrib/test_edgetpu_runtime.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te import numpy as np from tvm import rpc from tvm.contrib import util, tflite_runtime diff --git a/tests/python/contrib/test_gemm_acc16.py b/tests/python/contrib/test_gemm_acc16.py index d83ecdc35b6e9..1fd5974cd2dc1 100644 --- a/tests/python/contrib/test_gemm_acc16.py +++ b/tests/python/contrib/test_gemm_acc16.py @@ -16,6 +16,7 @@ # under the License. # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition import tvm +from tvm import te import numpy as np from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int16 @@ -25,8 +26,8 @@ def benchmark_fc_int8_acc16(): n = 128 k = 128 - X = tvm.placeholder((m, k), name='X', dtype="uint8") - W = tvm.placeholder((n, k), name='W', dtype="int8") + X = te.placeholder((m, k), name='X', dtype="uint8") + W = te.placeholder((n, k), name='W', dtype="int8") peak = 512/16*2*2*2 gops_per_mm = 2*n*m*k @@ -38,15 +39,15 @@ def verify(target="llvm -mcpu=skylake-avx512"): return ctx = tvm.context(target, 0) - X = tvm.placeholder((m, k), name='X', dtype="uint8") - W = tvm.placeholder((n, k), name='W', dtype="int8") + X = te.placeholder((m, k), name='X', dtype="uint8") + W = te.placeholder((n, k), name='W', dtype="int8") pc = dot_16x1x16_uint8_int8_int16() - ak = tvm.reduce_axis((0, k), name='k') + ak = te.reduce_axis((0, k), name='k') - packedW = tvm.placeholder((n//128, 128*(k//2), 2), name='packedW', dtype="int8") - t_fc = tvm.compute((m, n), lambda i, j: tvm.sum(X[i, ak].astype("int16") * packedW[j//128, (ak//2)*128+j%128, ak%2].astype("int16"), axis=ak), name="F") + packedW = te.placeholder((n//128, 128*(k//2), 2), name='packedW', dtype="int8") + t_fc = te.compute((m, n), lambda i, j: te.sum(X[i, ak].astype("int16") * packedW[j//128, (ak//2)*128+j%128, ak%2].astype("int16"), axis=ak), name="F") - t_sch = tvm.create_schedule(t_fc.op) + t_sch = te.create_schedule(t_fc.op) a_x, a_y = t_fc.op.axis a_k, = t_fc.op.reduce_axis diff --git a/tests/python/contrib/test_gemm_acc32_vnni.py b/tests/python/contrib/test_gemm_acc32_vnni.py index e810da7d3b07e..f723ccb1d235f 100644 --- a/tests/python/contrib/test_gemm_acc32_vnni.py +++ b/tests/python/contrib/test_gemm_acc32_vnni.py @@ -17,6 +17,7 @@ # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition import tvm +from tvm import te import numpy as np from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32_cascadelake from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32 @@ -29,8 +30,8 @@ def test_fc_int8_acc32(): n = 1024 k = 1024 - X = tvm.placeholder((m, k), name='X', dtype="uint8") - W = tvm.placeholder((n, k), name='W', dtype="int8") + X = te.placeholder((m, k), name='X', dtype="uint8") + W = te.placeholder((n, k), name='W', dtype="int8") peak = 280 print("Peak {} Gops/s".format(peak)) @@ -47,13 +48,13 @@ def verify(target="llvm -mcpu=cascadelake"): ctx = tvm.context(target, 0) pc = dot_16x1x16_uint8_int8_int32_cascadelake() - ak = tvm.reduce_axis((0, k), name='k') - packedW = tvm.placeholder( + ak = te.reduce_axis((0, k), name='k') + packedW = te.placeholder( (n // 16, 16 * (k // 4), 4), name='packedW', dtype="int8") - t_fc = tvm.compute((m, n), lambda i, j: tvm.sum(X[i, ak].astype( + t_fc = te.compute((m, n), lambda i, j: te.sum(X[i, ak].astype( "int32") * packedW[j / 16, (ak / 4) * 16 + j % 16, ak % 4].astype("int32"), axis=ak), name="F") - t_sch = tvm.create_schedule(t_fc.op) + t_sch = te.create_schedule(t_fc.op) a_x, a_y = t_fc.op.axis a_k, = t_fc.op.reduce_axis diff --git a/tests/python/contrib/test_miopen.py b/tests/python/contrib/test_miopen.py index d7a46e5d94ad8..b4bedd84e2e10 100644 --- a/tests/python/contrib/test_miopen.py +++ b/tests/python/contrib/test_miopen.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import miopen import numpy as np @@ -40,8 +41,8 @@ def test_conv2d(): return wshape = (out_channel, in_channel, filter_h, filter_w) - X = tvm.placeholder(xshape, name='X') - W = tvm.placeholder(wshape, name='W') + X = te.placeholder(xshape, name='X') + W = te.placeholder(wshape, name='W') Y = miopen.conv2d_forward(X, W, stride_h, diff --git a/tests/python/contrib/test_mps.py b/tests/python/contrib/test_mps.py index fc85290c56e74..b5243659c1d58 100644 --- a/tests/python/contrib/test_mps.py +++ b/tests/python/contrib/test_mps.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import mps @@ -25,19 +26,19 @@ def test_matmul(): n = 1024 l = 128 m = 256 - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((l, m), name='B') + A = te.placeholder((n, l), name='A') + B = te.placeholder((l, m), name='B') C = mps.matmul(A, B) - D = tvm.compute( + D = te.compute( C.shape, lambda *i: C(*i) + 1. ) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) yo, xo = D.op.axis - block_y = tvm.thread_axis("blockIdx.y") - block_x = tvm.thread_axis("blockIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_y = te.thread_axis("threadIdx.y") + thread_x = te.thread_axis("threadIdx.x") by, ty = s[D].split(yo, factor=16) bx, tx = s[D].split(xo, factor=16) s[D].bind(by, block_y) @@ -73,10 +74,10 @@ def test_conv2d(): kh = 3 kw = 3 stride = 2 - A = tvm.placeholder((n, h, w, ci), name="x") - B = tvm.placeholder((co, kh, kw, ci), name="w") + A = te.placeholder((n, h, w, ci), name="x") + B = te.placeholder((co, kh, kw, ci), name="w") C = mps.conv2d(A, B, 'SAME', 2) - s1 = tvm.create_schedule(C.op) + s1 = te.create_schedule(C.op) def verify(A, B, C, target="llvm"): if not tvm.get_global_func("tvm.contrib.mps.conv2d", True): diff --git a/tests/python/contrib/test_mxnet_bridge.py b/tests/python/contrib/test_mxnet_bridge.py index 9f1be7e2b9f1b..37c164483e186 100644 --- a/tests/python/contrib/test_mxnet_bridge.py +++ b/tests/python/contrib/test_mxnet_bridge.py @@ -24,17 +24,18 @@ def mxnet_check(): import mxnet as mx import topi import tvm + from tvm import te import numpy as np from tvm.contrib.mxnet import to_mxnet_func # build a TVM function through topi n = 20 shape = (20,) - scale = tvm.var("scale", dtype="float32") - x = tvm.placeholder(shape) - y = tvm.placeholder(shape) + scale = te.var("scale", dtype="float32") + x = te.placeholder(shape) + y = te.placeholder(shape) z = topi.broadcast_add(x, y) - zz = tvm.compute(shape, lambda *i: z(*i) * scale) + zz = te.compute(shape, lambda *i: z(*i) * scale) target = tvm.target.cuda() diff --git a/tests/python/contrib/test_nnpack.py b/tests/python/contrib/test_nnpack.py index af8ae133923dc..505199a557242 100644 --- a/tests/python/contrib/test_nnpack.py +++ b/tests/python/contrib/test_nnpack.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import scipy.signal from topi.nn.util import get_pad_tuple @@ -26,12 +27,12 @@ def test_fully_connected_inference(): n = 1024 l = 128 m = 235 - bias = tvm.var('bias', dtype=tvm.float32) - A = tvm.placeholder((l, ), name='A') - B = tvm.placeholder((m, l), name='B') + bias = te.var('bias', dtype="float32") + A = te.placeholder((l, ), name='A') + B = te.placeholder((m, l), name='B') C = nnpack.fully_connected_inference(A, B) - D = tvm.compute(C.shape, lambda i: C[i] + bias, name="D") - s = tvm.create_schedule(D.op) + D = te.compute(C.shape, lambda i: C[i] + bias, name="D") + s = te.create_schedule(D.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): @@ -98,9 +99,9 @@ def test_convolution_inference(): bshape = (OC, ) oshape = (BATCH, OC, OH, OW) - data = tvm.placeholder(dshape, name='data') - kernel = tvm.placeholder(kshape, name='kernel') - bias = tvm.placeholder(bshape, name='bias') + data = te.placeholder(dshape, name='data') + kernel = te.placeholder(kshape, name='kernel') + bias = te.placeholder(bshape, name='bias') def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): @@ -116,7 +117,7 @@ def verify(target="llvm", data, kernel, bias if with_bias else None, [PAD, PAD, PAD, PAD], [STRIDE, STRIDE], algorithm=algorithm) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) f = tvm.build(s, [data, kernel, bias, output], target) @@ -160,9 +161,9 @@ def test_convolution_inference_without_weight_transform(): bshape = (OC, ) oshape = (BATCH, OC, OH, OW) - data = tvm.placeholder(dshape, name='data') - kernel = tvm.placeholder(kshape, name='kernel') - bias = tvm.placeholder(bshape, name='bias') + data = te.placeholder(dshape, name='data') + kernel = te.placeholder(kshape, name='kernel') + bias = te.placeholder(bshape, name='bias') def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): @@ -181,7 +182,7 @@ def verify(target="llvm", [PAD, PAD, PAD, PAD], [STRIDE, STRIDE], algorithm=algorithm) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) f = tvm.build(s, [data, kernel, bias, output], target) diff --git a/tests/python/contrib/test_random.py b/tests/python/contrib/test_random.py index f86a42447d816..9efdc3e5a7631 100644 --- a/tests/python/contrib/test_random.py +++ b/tests/python/contrib/test_random.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import random @@ -22,7 +23,7 @@ def test_randint(): m = 1024 n = 1024 A = random.randint(-127, 128, size=(m, n), dtype='int32') - s = tvm.create_schedule(A.op) + s = te.create_schedule(A.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): @@ -46,7 +47,7 @@ def test_uniform(): m = 1024 n = 1024 A = random.uniform(0, 1, size=(m, n)) - s = tvm.create_schedule(A.op) + s = te.create_schedule(A.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): @@ -70,7 +71,7 @@ def test_normal(): m = 1024 n = 1024 A = random.normal(3, 4, size=(m, n)) - s = tvm.create_schedule(A.op) + s = te.create_schedule(A.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): diff --git a/tests/python/contrib/test_rocblas.py b/tests/python/contrib/test_rocblas.py index 2b6d001d243d6..af9d6ddf8dc9b 100644 --- a/tests/python/contrib/test_rocblas.py +++ b/tests/python/contrib/test_rocblas.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import rocblas @@ -22,10 +23,10 @@ def test_matmul_add(): n = 1024 l = 128 m = 235 - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((l, m), name='B') + A = te.placeholder((n, l), name='A') + B = te.placeholder((l, m), name='B') C = rocblas.matmul(A, B) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="rocm"): if not tvm.runtime.enabled(target): diff --git a/tests/python/contrib/test_rpc_proxy.py b/tests/python/contrib/test_rpc_proxy.py index df0ee2bb84785..6cd865e10ed3c 100644 --- a/tests/python/contrib/test_rpc_proxy.py +++ b/tests/python/contrib/test_rpc_proxy.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import logging import numpy as np import time diff --git a/tests/python/contrib/test_rpc_tracker.py b/tests/python/contrib/test_rpc_tracker.py index 11e7766f374b1..2443c708c5c0e 100644 --- a/tests/python/contrib/test_rpc_tracker.py +++ b/tests/python/contrib/test_rpc_tracker.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import logging import numpy as np import time diff --git a/tests/python/contrib/test_sort.py b/tests/python/contrib/test_sort.py index 87cdac01ce3a6..9297a32871fa6 100644 --- a/tests/python/contrib/test_sort.py +++ b/tests/python/contrib/test_sort.py @@ -15,18 +15,19 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_sort(): n = 2 l = 5 m = 3 - data = tvm.placeholder((n, l, m), name='data') - sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32") + data = te.placeholder((n, l, m), name='data') + sort_num = te.placeholder((n, m), name="sort_num", dtype="int32") axis = 1 is_ascend = False - out = tvm.extern(data.shape, [data, sort_num], - lambda ins, outs: tvm.call_packed( + out = te.extern(data.shape, [data, sort_num], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.sort.argsort_nms", ins[0], ins[1], outs[0], axis, is_ascend), dtype='int32', name="sort_tensor") @@ -38,7 +39,7 @@ def test_sort(): ctx = tvm.cpu(0) target = "llvm" - s = tvm.create_schedule(out.op) + s = te.create_schedule(out.op) f = tvm.build(s, [data, sort_num, out], target) a = tvm.nd.array(np.array(input).astype(data.dtype), ctx) b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx) @@ -51,17 +52,17 @@ def test_sort_np(): axis = 4 reduced_shape = (1, 2, 3, 4, 6) is_ascend = True - data = tvm.placeholder(dshape, name='data') - sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32") - out = tvm.extern(data.shape, [data, sort_num], - lambda ins, outs: tvm.call_packed( + data = te.placeholder(dshape, name='data') + sort_num = te.placeholder(reduced_shape, name="sort_num", dtype="int32") + out = te.extern(data.shape, [data, sort_num], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.sort.argsort_nms", ins[0], ins[1], outs[0], axis, is_ascend), dtype='int32', name="sort_tensor") ctx = tvm.cpu(0) target = "llvm" - s = tvm.create_schedule(out.op) + s = te.create_schedule(out.op) f = tvm.build(s, [data, sort_num, out], target) np_data = np.random.uniform(size=dshape) diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py index 7cc4a00cf16cc..5e0ca5cab104b 100644 --- a/tests/python/contrib/test_sparse.py +++ b/tests/python/contrib/test_sparse.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import tvm.contrib.sparse as tvmsp import tvm.runtime.ndarray as _nd import numpy as np @@ -25,18 +26,18 @@ def test_static_tensor(): stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) - m = tvm.size_var('m') - n = tvm.size_var('n') + m = te.size_var('m') + n = te.size_var('n') A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype) assert(A.stype == 'csr') n = 3 a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.) a = tvmsp.array(a, ctx) - A.data = tvm.placeholder(a.data.shape, dtype, name='A_data') - Ab = tvm.decl_buffer(a.data.shape, dtype, name='A_data') + A.data = te.placeholder(a.data.shape, dtype, name='A_data') + Ab = tvm.tir.decl_buffer(a.data.shape, dtype, name='A_data') binds = {A.data: Ab} - C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') - s = tvm.create_schedule(C.op) + C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') + s = te.create_schedule(C.op) f = tvm.build(s, [A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((n,n), dtype), ctx) c.data = tvm.nd.empty(a.data.shape, dtype) @@ -50,18 +51,18 @@ def test_dynamic_tensor(): stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) - nr, nc, n = tvm.size_var('nr'), tvm.size_var('nc'), tvm.size_var('n') + nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n') A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype) assert(A.stype == 'csr') - C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') - s = tvm.create_schedule(C.op) + C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') + s = te.create_schedule(C.op) _nr, _nc = 3, 5 a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.) a = tvmsp.array(a, ctx) assert a.data.dtype == a.dtype Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr']) - Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data') - Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') + Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data') + Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') binds = {A.data: Ab.data, A.indices: Ab.indices} f = tvm.build(s, [nr, A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx) @@ -76,11 +77,11 @@ def test_sparse_array_tuple(): stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) - nr, nc, n = tvm.size_var('nr'), tvm.size_var('nc'), tvm.size_var('n') + nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n') A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype) assert(A.stype == 'csr') - C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') - s = tvm.create_schedule(C.op) + C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') + s = te.create_schedule(C.op) _nr, _nc = 3, 5 a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.) # convert to sparse array tuple @@ -98,8 +99,8 @@ def test_sparse_array_tuple(): a = tvmsp.array(a_init, shape=source_array.shape, ctx=ctx) assert a.data.dtype == a.dtype Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr']) - Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data') - Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') + Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data') + Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') binds = {A.data: Ab.data, A.indices: Ab.indices} f = tvm.build(s, [nr, A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx) diff --git a/tests/python/contrib/test_tedd.py b/tests/python/contrib/test_tedd.py index d4d3ce464d44e..6e5f3a40fbcb9 100644 --- a/tests/python/contrib/test_tedd.py +++ b/tests/python/contrib/test_tedd.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import tvm +from tvm import te import numpy as np import re import topi @@ -31,10 +31,10 @@ def checkdepdency(): return not {'graphviz', 'ipython'} - {pkg.key for pkg in pkg_resources.working_set} def test_dfg(): - A = tvm.placeholder((1024, 4096), dtype='float32', name='A') + A = te.placeholder((1024, 4096), dtype='float32', name='A') B = topi.nn.softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) def verify(): from tvm.contrib import tedd @@ -49,7 +49,7 @@ def verify(): findany(r"Stage_2:O_0 -> Tensor_2_0", str) findany(r"Tensor_2_0 -> Stage_3:I_0", str) findany(r"Stage_3:O_0 -> Tensor_3_0", str) - findany(r"Tensor_2_0 -> Stage_4:I_0", str) + findany(r"Tensor_2_0 -> Stage_4:I_0", str) findany(r"Tensor_3_0 -> Stage_4:I_1", str) findany(r"Stage_4:O_0 -> Tensor_4_0", str) if checkdepdency(): @@ -57,13 +57,13 @@ def verify(): def test_itervar_relationship_graph(): - n = tvm.var("n") - m = tvm.var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k], axis=k), name="B") + n = te.var("n") + m = te.var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + B = te.compute((n, ), lambda i: te.sum(A[i, k], axis=k), name="B") - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].split(B.op.reduce_axis[0], factor=16) def verify(): @@ -89,18 +89,18 @@ def verify(): def test_schedule_tree(): - block_x = tvm.thread_axis('blockIdx.x') - thread_x = tvm.thread_axis('threadIdx.x') - n = tvm.var("n") - m = tvm.var("m") - l = tvm.var("l") - A = tvm.placeholder((n, m, l), name='A') - B = tvm.compute((n, m, l), lambda bi, bj, bk: A[bi, bj, bk] + 1, name='B') - r = tvm.reduce_axis((0, m), "r") - C = tvm.compute((n, m,), - lambda ci, cj: tvm.sum(B[ci, cj, r], axis=r), - name="C") - s = tvm.create_schedule(C.op) + block_x = te.thread_axis('blockIdx.x') + thread_x = te.thread_axis('threadIdx.x') + n = te.var("n") + m = te.var("m") + l = te.var("l") + A = te.placeholder((n, m, l), name='A') + B = te.compute((n, m, l), lambda bi, bj, bk: A[bi, bj, bk] + 1, name='B') + r = te.reduce_axis((0, m), "r") + C = te.compute((n, m,), + lambda ci, cj: te.sum(B[ci, cj, r], axis=r), + name="C") + s = te.create_schedule(C.op) s.cache_read(A, 'shared', [B]) s[B].vectorize(B.op.axis[-1]) s[C].reorder(C.op.reduce_axis[0], C.op.axis[0]) @@ -115,7 +115,7 @@ def verify(): str = tedd.viz_schedule_tree(s, False, '', True) findany(r"digraph \"Schedule Tree\"", str) findany(r"subgraph cluster_legend", str) - # Check the A_shared stage, including memory scope, itervars, + # Check the A_shared stage, including memory scope, itervars, # and compute findany(r"Stage_1.*A\.shared
Scope: shared.+>0.+>" \ r"ax0\(kDataPar\).+>1.+ax1\(kDataPar\).+>2.+>ax2\(kDataPar\).+>" \ @@ -134,4 +134,4 @@ def verify(): if __name__ == "__main__": test_dfg() test_itervar_relationship_graph() - test_schedule_tree() \ No newline at end of file + test_schedule_tree() diff --git a/tests/python/contrib/test_tflite_runtime.py b/tests/python/contrib/test_tflite_runtime.py index 9d396be858227..8c883b031a899 100644 --- a/tests/python/contrib/test_tflite_runtime.py +++ b/tests/python/contrib/test_tflite_runtime.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm import rpc from tvm.contrib import util, tflite_runtime @@ -28,7 +29,7 @@ def create_tflite_model(): root = tf.Module() root.const = tf.constant([1., 2.], tf.float32) root.f = tf.function(lambda x: root.const * x) - + input_signature = tf.TensorSpec(shape=[2, ], dtype=tf.float32) concrete_func = root.f.get_concrete_function(input_signature) converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) @@ -48,13 +49,13 @@ def check_local(): interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() - + input_shape = input_details[0]['shape'] tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], tflite_input) interpreter.invoke() tflite_output = interpreter.get_tensor(output_details[0]['index']) - + # inference via tvm tflite runtime with open(tflite_model_path, 'rb') as model_fin: runtime = tflite_runtime.create(model_fin.read(), tvm.cpu(0)) @@ -76,7 +77,7 @@ def check_remote(): interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() - + input_shape = input_details[0]['shape'] tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], tflite_input) diff --git a/tests/python/frontend/caffe2/test_forward.py b/tests/python/frontend/caffe2/test_forward.py index 92258bbc284e5..f05287216ec91 100644 --- a/tests/python/frontend/caffe2/test_forward.py +++ b/tests/python/frontend/caffe2/test_forward.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list from tvm import relay diff --git a/tests/python/frontend/coreml/test_forward.py b/tests/python/frontend/coreml/test_forward.py index b4ad300c34037..3a156385d510c 100644 --- a/tests/python/frontend/coreml/test_forward.py +++ b/tests/python/frontend/coreml/test_forward.py @@ -20,6 +20,7 @@ from coremltools.models import datatypes import tvm +from tvm import te from tvm.contrib import graph_runtime import topi import topi.testing diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py index 22dd08ab52ea8..fcaeaec79cb6c 100644 --- a/tests/python/frontend/darknet/test_forward.py +++ b/tests/python/frontend/darknet/test_forward.py @@ -23,6 +23,7 @@ """ import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm.contrib.download import download_testdata download_testdata.__test__ = False diff --git a/tests/python/frontend/keras/test_forward.py b/tests/python/frontend/keras/test_forward.py index f7dcb29b37aaa..db0c2c65e04f8 100644 --- a/tests/python/frontend/keras/test_forward.py +++ b/tests/python/frontend/keras/test_forward.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py index 504f70031e246..f676295b324d6 100644 --- a/tests/python/frontend/mxnet/test_forward.py +++ b/tests/python/frontend/mxnet/test_forward.py @@ -18,6 +18,7 @@ import operator import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list from tvm import relay diff --git a/tests/python/frontend/mxnet/test_graph.py b/tests/python/frontend/mxnet/test_graph.py index 6e870000a76b2..0008799caebb2 100644 --- a/tests/python/frontend/mxnet/test_graph.py +++ b/tests/python/frontend/mxnet/test_graph.py @@ -17,6 +17,7 @@ import mxnet as mx import tvm +from tvm import te from tvm import relay from tvm.relay import transform import model_zoo diff --git a/tests/python/frontend/mxnet/test_qnn_ops_utils.py b/tests/python/frontend/mxnet/test_qnn_ops_utils.py index 4ee5f2e3c3c32..32042562b2094 100644 --- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py +++ b/tests/python/frontend/mxnet/test_qnn_ops_utils.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 6243178dcb2be..20d7003e1353a 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -23,6 +23,7 @@ import topi import topi.testing import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 715ae7805cc39..ba1d7bbe67bc7 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -25,6 +25,7 @@ import torch from torch.nn import Module import tvm +from tvm import te import torchvision from tvm import relay @@ -720,7 +721,7 @@ def test_vgg11(): def test_vgg11_bn(): torch.set_grad_enabled(False) verify_model("vgg11_bn") - + #TODO: Need to update schedule in tophub file after PR #4787 updated workloads def test_mobilenet_v2(): torch.set_grad_enabled(False) diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 2340bd4e6318e..9cd978e2e1476 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -34,6 +34,7 @@ from tensorflow.python.ops import init_ops from distutils.version import LooseVersion import tvm +from tvm import te from tvm import relay import tvm.relay.testing.tf as tf_testing @@ -2717,7 +2718,7 @@ def test_forward_reduce_any(): in_data = tf.placeholder(tf.bool, (5, 7, 11), name="in_data") tf.reduce_any(in_data, name="any") compare_tf_with_tvm([np_data], ['in_data:0'], 'any:0') - + def test_forward_reduce_max(): def check_max(ishape, axis, keepdims, dtype): tf.reset_default_graph() diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py index 427d4bfe28106..51983a78bdd1c 100644 --- a/tests/python/frontend/tflite/test_forward.py +++ b/tests/python/frontend/tflite/test_forward.py @@ -24,6 +24,7 @@ from functools import partial import numpy as np import tvm +from tvm import te from tvm import relay import tensorflow as tf from tensorflow.python.framework import constant_op diff --git a/tests/python/integration/test_dot.py b/tests/python/integration/test_dot.py index f95787dd94a40..e7dc7e95eb2bb 100644 --- a/tests/python/integration/test_dot.py +++ b/tests/python/integration/test_dot.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def lower(s, args, name="mydot"): @@ -22,13 +23,13 @@ def lower(s, args, name="mydot"): arg_list = [] for x in args: - assert isinstance(x, tvm.tensor.Tensor) - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name) + assert isinstance(x, te.tensor.Tensor) + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name) binds[x] = buf arg_list.append(buf) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 16) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -43,12 +44,12 @@ def mybuild(fapi, target="llvm"): def test_dot(): nn = 12 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - k = tvm.reduce_axis((0, n), 'k') - C = tvm.compute((1,), lambda _: tvm.sum(A[k] * B[k], axis=k), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + k = te.reduce_axis((0, n), 'k') + C = te.compute((1,), lambda _: te.sum(A[k] * B[k], axis=k), name='C') + s = te.create_schedule(C.op) fapi = lower(s, [A, B, C]) def verify(target): diff --git a/tests/python/integration/test_ewise.py b/tests/python/integration/test_ewise.py index ecfc83ca010db..a8f2db19a9b0a 100644 --- a/tests/python/integration/test_ewise.py +++ b/tests/python/integration/test_ewise.py @@ -15,21 +15,22 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import nvcc import numpy as np import time def test_exp(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: te.exp(A(*i)), name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. num_thread = 8 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) # one line to build the function. def check_device(device, host="stackvm"): @@ -57,11 +58,11 @@ def check_device(device, host="stackvm"): def test_fmod(): # graph def run(dtype): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A', dtype=dtype) - B = tvm.placeholder((n,), name='B', dtype=dtype) - C = tvm.compute(A.shape, lambda *i: tvm.fmod(A(*i), B(*i)), name='C') - s = tvm.create_schedule(C.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A', dtype=dtype) + B = te.placeholder((n,), name='B', dtype=dtype) + C = te.compute(A.shape, lambda *i: te.fmod(A(*i), B(*i)), name='C') + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 8 bx, tx = s[C].split(C.op.axis[0], factor=num_thread) @@ -73,8 +74,8 @@ def check_device(device): return target = tvm.target.create(device) if "cpu" not in target.keys: - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) fmod = tvm.build(s, [A, B, C], device, name="myfmod") # launch the kernel. @@ -96,23 +97,23 @@ def check_device(device): def test_multiple_cache_write(): # graph - n = tvm.convert(1024) - A0 = tvm.placeholder((n,), name='A0', dtype = "float32") - A1 = tvm.placeholder((n,), name='A1', dtype = "float32") - B0, B1 = tvm.compute((n,), + n = tvm.runtime.convert(1024) + A0 = te.placeholder((n,), name='A0', dtype = "float32") + A1 = te.placeholder((n,), name='A1', dtype = "float32") + B0, B1 = te.compute((n,), lambda *i: (A0(*i) + A1(*i), A0(*i) * A1(*i)), name='B') - C = tvm.compute((n,), lambda *i: B0(*i) + B1(*i), + C = te.compute((n,), lambda *i: B0(*i) + B1(*i), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 8 B0_cache, B1_cache = s.cache_write([B0, B1], "local") bx, tx = s[C].split(C.op.axis[0], factor=num_thread) s[B0].compute_at(s[C], bx) s[B0_cache].compute_at(s[C], bx) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) # one line to build the function. def check_device(device, host="stackvm"): if not tvm.runtime.enabled(host): @@ -140,10 +141,10 @@ def check_device(device, host="stackvm"): def test_log_pow_llvm(): # graph - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: tvm.power(tvm.log(A(*i)), 2.0), name='B') - s = tvm.create_schedule(B.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: te.power(te.log(A(*i)), 2.0), name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. bx, tx = s[B].split(B.op.axis[0], factor=32) # one line to build the function. @@ -168,10 +169,10 @@ def test_log_pow_llvm(): def test_popcount(): def run(dtype): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A', dtype=dtype) - B = tvm.compute(A.shape, lambda *i: tvm.popcount(A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A', dtype=dtype) + B = te.compute(A.shape, lambda *i: tvm.tir.popcount(A(*i)), name='B') + s = te.create_schedule(B.op) # simple schedule num_thread = 8 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) @@ -183,8 +184,8 @@ def check_device(device): return target = tvm.target.create(device) if "cpu" not in target.keys: - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) func = tvm.build(s, [A, B], device) # launch the kernel. n = 1024 @@ -207,21 +208,21 @@ def check_device(device): def test_add(): def run(dtype): # graph - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A', dtype=dtype) - B = tvm.placeholder((n,), name='B', dtype=dtype) - bias = tvm.var("bias", dtype=dtype) - scale = tvm.var("scale", dtype=dtype) - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + n = te.size_var('n') + A = te.placeholder((n,), name='A', dtype=dtype) + B = te.placeholder((n,), name='B', dtype=dtype) + bias = te.var("bias", dtype=dtype) + scale = te.var("scale", dtype=dtype) + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 16 bx, x = s[C].split(C.op.axis[0], factor=num_thread*4) tx, x = s[C].split(x, nparts=num_thread) _, x = s[C].split(x, factor=4) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) s[C].vectorize(x) # one line to build the function. @@ -259,16 +260,16 @@ def check_device(device): def try_warp_memory(): """skip this in default test because it require higher arch""" m = 128 - A = tvm.placeholder((m,), name='A') - B = tvm.compute((m,), lambda i: A[i] + 3, name='B') + A = te.placeholder((m,), name='A') + B = te.compute((m,), lambda i: A[i] + 3, name='B') warp_size = 32 - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) AA = s.cache_read(A, "warp", [B]) xo, xi = s[B].split(B.op.axis[0], warp_size * 2) xi0, xi1 = s[B].split(xi, factor=warp_size) - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") s[B].bind(xi1, tx) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) s[AA].compute_at(s[B], xo) xo, xi = s[AA].split(s[AA].op.axis[0], warp_size) s[AA].bind(xi, tx) diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py index b2c783487074b..7883a4cc4dcea 100644 --- a/tests/python/integration/test_ewise_fpga.py +++ b/tests/python/integration/test_ewise_fpga.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import os @@ -29,13 +30,13 @@ def tvm_callback_vhls_postproc(code): def test_exp(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: te.exp(A(*i)), name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. px, x = s[B].split(B.op.axis[0], nparts=1) - s[B].bind(px, tvm.thread_axis("pipeline")) + s[B].bind(px, te.thread_axis("pipeline")) # one line to build the function. def check_device(device, host="llvm"): @@ -64,17 +65,17 @@ def check_device(device, host="llvm"): def test_multi_kernel(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - D = tvm.compute(A.shape, lambda *i: A(*i) + C(*i), name='D') - s = tvm.create_schedule(D.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + D = te.compute(A.shape, lambda *i: A(*i) + C(*i), name='D') + s = te.create_schedule(D.op) # create iter var and assign them tags. px, x = s[C].split(C.op.axis[0], nparts=1) - s[C].bind(px, tvm.thread_axis("pipeline")) + s[C].bind(px, te.thread_axis("pipeline")) px, x = s[D].split(D.op.axis[0], nparts=1) - s[D].bind(px, tvm.thread_axis("pipeline")) + s[D].bind(px, te.thread_axis("pipeline")) # one line to build the function. def check_device(device, host="llvm"): diff --git a/tests/python/integration/test_gemm.py b/tests/python/integration/test_gemm.py index d61335f68924c..12026da61394e 100644 --- a/tests/python/integration/test_gemm.py +++ b/tests/python/integration/test_gemm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import time @@ -22,26 +23,26 @@ def test_gemm(): # graph nn = 1024 - n = tvm.convert(nn) + n = tvm.runtime.convert(nn) m = n l = n - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute( + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + k = te.reduce_axis((0, l), name='k') + C = te.compute( (n, m), - lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k), name='CC') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xtile, ytile = 32, 32 scale = 8 num_thread = 8 block_factor = scale * num_thread - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_y = tvm.thread_axis("threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_y = te.thread_axis("threadIdx.y") CC = s.cache_write(C, "local") AA = s.cache_read(A, "shared", [CC]) diff --git a/tests/python/integration/test_reduce.py b/tests/python/integration/test_reduce.py index 62c029043084c..82ade4478beac 100644 --- a/tests/python/integration/test_reduce.py +++ b/tests/python/integration/test_reduce.py @@ -15,25 +15,26 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_reduce_prims(): def test_prim(reducer, np_reducer): # graph - n = tvm.size_var('n') - m = tvm.size_var('m') - A = tvm.placeholder((n, m), name='A') - R = tvm.compute((n, ), lambda i: tvm.tir.Select((i > 1), 1, 0), name='R') - k = tvm.reduce_axis((0, m)) - B = tvm.compute((n,), lambda i: reducer(A[i, k], axis=k, where=(R[i]==1)), name='B') + n = tvm.te.size_var('n') + m = tvm.te.size_var('m') + A = te.placeholder((n, m), name='A') + R = te.compute((n, ), lambda i: tvm.tir.Select((i > 1), 1, 0), name='R') + k = te.reduce_axis((0, m)) + B = te.compute((n,), lambda i: reducer(A[i, k], axis=k, where=(R[i]==1)), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) # create iter var and assign them tags. num_thread = 1 xo, xi = s[B].split(B.op.axis[0], factor=num_thread) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) s[R].compute_inline() # one line to build the function. @@ -64,18 +65,18 @@ def check_device(device, host="llvm"): check_device("vulkan") check_device("cuda") check_device("opencl") - test_prim(tvm.sum, np.sum) - test_prim(tvm.min, np.amin) - test_prim(tvm.max, np.amax) + test_prim(te.sum, np.sum) + test_prim(tvm.te.min, np.amin) + test_prim(tvm.te.max, np.amax) def test_rfactor(): - n = tvm.convert(1027) - A = tvm.placeholder((n,), name='A') - k = tvm.reduce_axis((0, n)) - B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k), name='B') + n = tvm.runtime.convert(1027) + A = te.placeholder((n,), name='A') + k = te.reduce_axis((0, n)) + B = te.compute((1,), lambda i: te.sum(A[k], axis=k), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) kf, ki = s[B].split(k, nparts=4) BF = s.rfactor(B, kf) s[BF].parallel(BF.op.axis[0]) @@ -100,12 +101,12 @@ def check_target(target="llvm"): check_target() def test_rfactor_factor_axis(): - n = tvm.convert(1027) - A = tvm.placeholder((n,), name='A') - k = tvm.reduce_axis((0, n)) - B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k), name='B') + n = tvm.runtime.convert(1027) + A = te.placeholder((n,), name='A') + k = te.reduce_axis((0, n)) + B = te.compute((1,), lambda i: te.sum(A[k], axis=k), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) kf, ki = s[B].split(k, nparts=4) BF = s.rfactor(B, kf, 1) s[BF].parallel(BF.op.axis[0]) @@ -133,21 +134,21 @@ def check_target(target="llvm"): def test_rfactor_threads(): nn = 1027 mm = 10 - n = tvm.convert(nn) - m = tvm.convert(mm) - A = tvm.placeholder((m, n), name='A') - k = tvm.reduce_axis((0, n)) + n = tvm.runtime.convert(nn) + m = tvm.runtime.convert(mm) + A = te.placeholder((m, n), name='A') + k = te.reduce_axis((0, n)) nthread = 16 - B = tvm.compute((m,), lambda i: tvm.sum(A[i, k], axis=k, where=(i>1)), name='B') + B = te.compute((m,), lambda i: te.sum(A[i, k], axis=k, where=(i>1)), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, kf = s[B].split(k, factor=nthread) BF = s.rfactor(B, kf) bx, ty = s[B].split(s[B].op.axis[0], factor=nthread) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(ty, tvm.thread_axis("threadIdx.y")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(ty, te.thread_axis("threadIdx.y")) tx = s[B].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[B].bind(tx, thread_x) s[BF].compute_at(s[B], tx) s[B].set_store_predicate(thread_x.var.equal(0)) @@ -183,23 +184,23 @@ def check_target(device, host="stackvm"): def test_rfactor_elemwise_threads(): n = 1025 m = 10 - A = tvm.placeholder((m, n), name='A') - k = tvm.reduce_axis((0, n)) + A = te.placeholder((m, n), name='A') + k = te.reduce_axis((0, n)) nthread = 16 - B = tvm.compute((m,), lambda i: tvm.sum(A[i, k], axis=k), name='B') - BB = tvm.compute((m,), lambda i: B[i] + 1, name='BB') - C = tvm.compute((m,), lambda i: BB[i] + 1, name='C') + B = te.compute((m,), lambda i: te.sum(A[i, k], axis=k), name='B') + BB = te.compute((m,), lambda i: B[i] + 1, name='BB') + C = te.compute((m,), lambda i: BB[i] + 1, name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[BB].compute_inline() bx, ty = s[C].split(s[C].op.axis[0], factor=nthread) ko, kf = s[B].split(k, factor=nthread) BF = s.rfactor(B, kf) s[B].compute_at(s[C], ty) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) tx = s[B].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[B].bind(tx, thread_x) s[BF].compute_at(s[B], tx) # Since thread_x is shared across reductions @@ -237,18 +238,18 @@ def fcombine(x, y): return lhs, rhs def fidentity(t0, t1): - return tvm.const(-1, t0), tvm.min_value(t1) + return tvm.tir.const(-1, t0), tvm.te.min_value(t1) - argmax = tvm.comm_reducer(fcombine, + argmax = te.comm_reducer(fcombine, fidentity, name='argmax') - m = tvm.size_var('m') - n = tvm.size_var('n') - idx = tvm.placeholder((m, n), name='idx', dtype='int32') - val = tvm.placeholder((m, n), name='val', dtype='float32') - k = tvm.reduce_axis((0, n), 'k') - T0, T1 = tvm.compute((m,), lambda i: argmax((idx[i,k], val[i,k]), axis=k), name='T') - s = tvm.create_schedule(T0.op) + m = te.size_var('m') + n = te.size_var('n') + idx = te.placeholder((m, n), name='idx', dtype='int32') + val = te.placeholder((m, n), name='val', dtype='float32') + k = te.reduce_axis((0, n), 'k') + T0, T1 = te.compute((m,), lambda i: argmax((idx[i,k], val[i,k]), axis=k), name='T') + s = te.create_schedule(T0.op) def check_target(): device = 'cpu' @@ -284,31 +285,31 @@ def fcombine(x, y): return lhs, rhs def fidentity(t0, t1): - return tvm.const(-1, t0), tvm.min_value(t1) + return tvm.tir.const(-1, t0), tvm.te.min_value(t1) - argmax = tvm.comm_reducer(fcombine, + argmax = te.comm_reducer(fcombine, fidentity, name='argmax') nn = 1027 mm = 10 - n = tvm.convert(nn) - m = tvm.convert(mm) - A0 = tvm.placeholder((m, n), name='A0', dtype='int32') - A1 = tvm.placeholder((m, n), name='A1', dtype='float32') - k = tvm.reduce_axis((0, n)) - B0, B1 = tvm.compute((m,), lambda i: argmax((A0[i, k], A1[i, k]), axis=k), name='B') + n = tvm.runtime.convert(nn) + m = tvm.runtime.convert(mm) + A0 = te.placeholder((m, n), name='A0', dtype='int32') + A1 = te.placeholder((m, n), name='A1', dtype='float32') + k = te.reduce_axis((0, n)) + B0, B1 = te.compute((m,), lambda i: argmax((A0[i, k], A1[i, k]), axis=k), name='B') # schedule - s = tvm.create_schedule(B0.op) + s = te.create_schedule(B0.op) nthread = 16 ko, kf = s[B0].split(k, factor=nthread) BF0, BF1 = s.rfactor(B0, kf) bx, ty = s[B0].split(s[B0].op.axis[0], factor=nthread) - s[B0].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B0].bind(ty, tvm.thread_axis("threadIdx.y")) + s[B0].bind(bx, te.thread_axis("blockIdx.x")) + s[B0].bind(ty, te.thread_axis("threadIdx.y")) tx = s[B0].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[B0].bind(tx, thread_x) s[BF0.op].compute_at(s[B0], tx) s[B0].set_store_predicate(thread_x.var.equal(0)) diff --git a/tests/python/integration/test_scan.py b/tests/python/integration/test_scan.py index 366ed3d4f1a56..99553c3579d50 100644 --- a/tests/python/integration/test_scan.py +++ b/tests/python/integration/test_scan.py @@ -15,24 +15,25 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_scan(): - m = tvm.size_var("m") - n = tvm.size_var("n") - X = tvm.placeholder((m, n), name="X") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: X[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) - scan = tvm.scan(s_init, s_update, s_state) + m = te.size_var("m") + n = te.size_var("n") + X = te.placeholder((m, n), name="X") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: X[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) + scan = tvm.te.scan(s_init, s_update, s_state) # test scan + compute case - res = tvm.compute((m, n), lambda i, j: scan[i, j]) + res = te.compute((m, n), lambda i, j: scan[i, j]) # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) num_thread = 256 - block_x = tvm.thread_axis(None, "blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") + block_x = te.thread_axis(None, "blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") xo, xi = s[s_init].split(s_init.op.axis[1], factor=num_thread) s[s_init].bind(xo, block_x) s[s_init].bind(xi, thread_x) diff --git a/tests/python/integration/test_tuning.py b/tests/python/integration/test_tuning.py index 99f8b47cce070..60a372c2be396 100644 --- a/tests/python/integration/test_tuning.py +++ b/tests/python/integration/test_tuning.py @@ -21,6 +21,7 @@ import time import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.tuner import RandomTuner @@ -30,20 +31,20 @@ def conv2d_no_batching(N, H, W, CI, CO, KH, KW): """An example template for testing""" assert N == 1, "Only consider batch_size = 1 in this template" - data = tvm.placeholder((N, CI, H, W), name='data') - kernel = tvm.placeholder((CO, CI, KH, KW), name='kernel') + data = te.placeholder((N, CI, H, W), name='data') + kernel = te.placeholder((CO, CI, KH, KW), name='kernel') - rc = tvm.reduce_axis((0, CI), name='rc') - ry = tvm.reduce_axis((0, KH), name='ry') - rx = tvm.reduce_axis((0, KW), name='rx') + rc = te.reduce_axis((0, CI), name='rc') + ry = te.reduce_axis((0, KH), name='ry') + rx = te.reduce_axis((0, KW), name='rx') - conv = tvm.compute( + conv = te.compute( (N, CO, H - KH + 1, W - KW + 1), - lambda nn, ff, yy, xx: tvm.sum( + lambda nn, ff, yy, xx: te.sum( data[nn, rc, yy + ry, xx + rx] * kernel[ff, rc, ry, rx], axis=[rc, ry, rx]), tag="conv2d_nchw") - s = tvm.create_schedule([conv.op]) + s = te.create_schedule([conv.op]) output = conv OL = s.cache_write(conv, 'local') @@ -65,15 +66,15 @@ def conv2d_no_batching(N, H, W, CI, CO, KH, KW): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) kernel_scope = n # this is the scope to attach global config inside this kernel - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -100,9 +101,9 @@ def conv2d_no_batching(N, H, W, CI, CO, KH, KW): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # tune unroll cfg.define_knob("auto_unroll_max_step", [0, 512, 1500]) diff --git a/tests/python/integration/test_winograd_nnpack.py b/tests/python/integration/test_winograd_nnpack.py index 5e45c612707a2..7dad2ca586d7c 100644 --- a/tests/python/integration/test_winograd_nnpack.py +++ b/tests/python/integration/test_winograd_nnpack.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity from tvm.contrib import nnpack @@ -32,9 +33,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/tests/python/nightly/quantization/test_quantization_accuracy.py b/tests/python/nightly/quantization/test_quantization_accuracy.py index f047952f3e6bc..4818cc651b94f 100644 --- a/tests/python/nightly/quantization/test_quantization_accuracy.py +++ b/tests/python/nightly/quantization/test_quantization_accuracy.py @@ -16,6 +16,7 @@ # under the License. from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm.relay import quantize as qtz import mxnet as mx diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py index 55d788756b5ce..1e9030c5d8e64 100644 --- a/tests/python/relay/benchmarking/benchmark_vm.py +++ b/tests/python/relay/benchmarking/benchmark_vm.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm import relay from tvm.runtime import container @@ -73,7 +74,7 @@ def get_vm_output(mod, data, params, target, ctx, dtype='float32', prof_res = np.array(ftimer("main", data).results) * 1000 print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) - + return result.asnumpy().astype(dtype) # random input diff --git a/tests/python/relay/test_adt.py b/tests/python/relay/test_adt.py index 8f631f8fd0478..deeb7330f9da6 100644 --- a/tests/python/relay/test_adt.py +++ b/tests/python/relay/test_adt.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.backend.interpreter import ConstructorValue from tvm.relay import create_executor diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 24176e4c41dda..aa81e3113b7f3 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay.loops import while_loop from tvm.relay.testing import run_infer_type as infer_type diff --git a/tests/python/relay/test_backend_compile_engine.py b/tests/python/relay/test_backend_compile_engine.py index 4e4122a28cf07..eb018fed96e72 100644 --- a/tests/python/relay/test_backend_compile_engine.py +++ b/tests/python/relay/test_backend_compile_engine.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import tvm.testing from tvm import relay from tvm import autotvm @@ -69,7 +70,7 @@ def _tmp_strategy(attrs, inputs, out_type, target): return strategy def _create_record(task_name, dshape, wshape, target, cost): - args = [tvm.placeholder(dshape), tvm.placeholder(wshape), (1, 1), (1, 1, 1, 1), + args = [te.placeholder(dshape), te.placeholder(wshape), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'] task = autotvm.task.create(task_name, args, target) cfg = autotvm.ConfigEntity(0, None, {}, []) @@ -89,7 +90,7 @@ def _get_impls(dshape, wshape): return relay.backend.compile_engine.get_valid_implementations( relay.op.get("nn.conv2d"), out.attrs, - [tvm.placeholder(dshape), tvm.placeholder(wshape)], + [te.placeholder(dshape), te.placeholder(wshape)], out.checked_type, target) @@ -110,7 +111,7 @@ def _select_impl(dshape, wshape, use_autotvm=False): return relay.backend.compile_engine.select_implementation( relay.op.get("nn.conv2d"), out.attrs, - [tvm.placeholder(dshape), tvm.placeholder(wshape)], + [te.placeholder(dshape), te.placeholder(wshape)], out.checked_type, target, use_autotvm) diff --git a/tests/python/relay/test_backend_graph_runtime.py b/tests/python/relay/test_backend_graph_runtime.py index d5d29b645cfa9..71428a6dbefdf 100644 --- a/tests/python/relay/test_backend_graph_runtime.py +++ b/tests/python/relay/test_backend_graph_runtime.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.scope_builder import ScopeBuilder diff --git a/tests/python/relay/test_backend_interpreter.py b/tests/python/relay/test_backend_interpreter.py index 9b548f12f65b8..360b6bd20416f 100644 --- a/tests/python/relay/test_backend_interpreter.py +++ b/tests/python/relay/test_backend_interpreter.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import tvm.testing from tvm import nd from tvm import relay diff --git a/tests/python/relay/test_change_batch.py b/tests/python/relay/test_change_batch.py index e822bbb059105..e53887b1c4087 100644 --- a/tests/python/relay/test_change_batch.py +++ b/tests/python/relay/test_change_batch.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.testing import resnet from tvm.relay import transform diff --git a/tests/python/relay/test_cpp_build_module.py b/tests/python/relay/test_cpp_build_module.py index 674e214df058b..171b6b0b77b02 100644 --- a/tests/python/relay/test_cpp_build_module.py +++ b/tests/python/relay/test_cpp_build_module.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib.nvcc import have_fp16 diff --git a/tests/python/relay/test_error_reporting.py b/tests/python/relay/test_error_reporting.py index aef93ad9f4dc9..d69744824faa1 100644 --- a/tests/python/relay/test_error_reporting.py +++ b/tests/python/relay/test_error_reporting.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay def check_type_err(expr, msg): diff --git a/tests/python/relay/test_expr_functor.py b/tests/python/relay/test_expr_functor.py index 5c923655d7b7b..ea7f8f6b411a7 100644 --- a/tests/python/relay/test_expr_functor.py +++ b/tests/python/relay/test_expr_functor.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import ExprFunctor, ExprMutator, ExprVisitor diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py index b086df07a835a..e3789988d2f33 100644 --- a/tests/python/relay/test_external_codegen.py +++ b/tests/python/relay/test_external_codegen.py @@ -20,6 +20,7 @@ import numpy as np import tvm +from tvm import te import tvm.relay.testing import tvm.relay.transform from tvm import relay diff --git a/tests/python/relay/test_external_runtime.py b/tests/python/relay/test_external_runtime.py index 713aca918883d..0942cbb941ea9 100644 --- a/tests/python/relay/test_external_runtime.py +++ b/tests/python/relay/test_external_runtime.py @@ -21,6 +21,7 @@ import numpy as np import tvm +from tvm import te import tvm.runtime._ffi_api from tvm import relay from tvm.contrib import util diff --git a/tests/python/relay/test_feature.py b/tests/python/relay/test_feature.py index 9066e85cf6da1..3ef53d3b88b14 100644 --- a/tests/python/relay/test_feature.py +++ b/tests/python/relay/test_feature.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import detect_feature from tvm.relay.transform import gradient diff --git a/tests/python/relay/test_ir_bind.py b/tests/python/relay/test_ir_bind.py index df280e2fa2482..45474b6cc4268 100644 --- a/tests/python/relay/test_ir_bind.py +++ b/tests/python/relay/test_ir_bind.py @@ -16,6 +16,7 @@ # under the License. """ test bind function.""" import tvm +from tvm import te from tvm import relay diff --git a/tests/python/relay/test_ir_module.py b/tests/python/relay/test_ir_module.py index 939672d42152f..bab82472263ab 100644 --- a/tests/python/relay/test_ir_module.py +++ b/tests/python/relay/test_ir_module.py @@ -16,6 +16,7 @@ # under the License. """Tests for module functionality.""" import tvm +from tvm import te from tvm import relay from tvm.relay.prelude import Prelude from tvm.relay.testing import add_nat_definitions diff --git a/tests/python/relay/test_ir_nodes.py b/tests/python/relay/test_ir_nodes.py index b7d7eb9f389cf..cc663a1614fe1 100644 --- a/tests/python/relay/test_ir_nodes.py +++ b/tests/python/relay/test_ir_nodes.py @@ -17,6 +17,7 @@ """ test ir""" import pytest import tvm +from tvm import te from tvm import relay from tvm.tir.expr import * from tvm.relay import op @@ -57,7 +58,7 @@ def test_span(): # Types def test_tensor_type(): - shape = tvm.convert([1, 2, 3]) + shape = tvm.runtime.convert([1, 2, 3]) dtype = 'float32' tt = relay.TensorType(shape, dtype) assert tt.dtype == dtype @@ -76,9 +77,9 @@ def test_type_param(): def test_func_type(): - type_params = tvm.convert([]) - type_constraints = tvm.convert([]) # TODO: fill me in - arg_types = tvm.convert([]) + type_params = tvm.runtime.convert([]) + type_constraints = tvm.runtime.convert([]) # TODO: fill me in + arg_types = tvm.runtime.convert([]) ret_type = relay.TensorType((1, 2, 3), 'float32') tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints) assert tf.type_params == type_params @@ -93,9 +94,9 @@ def test_func_type(): def test_tuple_type(): tp = relay.TypeVar('tp', relay.TypeKind.Type) - tf = relay.FuncType(tvm.convert([]), None, tvm.convert([]), tvm.convert([])) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - fields = tvm.convert([tp, tf, tt]) + tf = relay.FuncType(tvm.runtime.convert([]), None, tvm.runtime.convert([]), tvm.runtime.convert([])) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + fields = tvm.runtime.convert([tp, tf, tt]) tup_ty = relay.TupleType(fields) assert tup_ty.fields == fields @@ -105,9 +106,9 @@ def test_tuple_type(): def test_type_relation(): tp = relay.TypeVar('tp', relay.TypeKind.Type) - tf = relay.FuncType(tvm.convert([]), None, tvm.convert([]), tvm.convert([])) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - args = tvm.convert([tp, tf, tt]) + tf = relay.FuncType(tvm.runtime.convert([]), None, tvm.runtime.convert([]), tvm.runtime.convert([])) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + args = tvm.runtime.convert([tp, tf, tt]) num_inputs = 2 func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast") @@ -130,7 +131,7 @@ def test_constant(): def test_tuple(): - fields = tvm.convert([]) + fields = tvm.runtime.convert([]) tup = relay.Tuple(fields) assert tup.fields == fields assert tup.span == None @@ -163,10 +164,10 @@ def test_global_var(): def test_function(): param_names = ['a', 'b', 'c', 'd'] - params = tvm.convert([relay.Var(n) for n in param_names]) - ret_type = relay.TupleType(tvm.convert([])) - body = relay.Tuple(tvm.convert([])) - type_params = tvm.convert([]) + params = tvm.runtime.convert([relay.Var(n) for n in param_names]) + ret_type = relay.TupleType(tvm.runtime.convert([])) + body = relay.Tuple(tvm.runtime.convert([])) + type_params = tvm.runtime.convert([]) fn = relay.Function(params, body, ret_type, type_params) fn = fn.set_attribute("test_attribute", tvm.tir.StringImm("value")) assert fn.params == params @@ -180,10 +181,10 @@ def test_function(): @pytest.mark.skip(reason="AttrsEqualHandler doesn't handle Map so far.") def test_function_attrs(): param_names = ['a', 'b', 'c', 'd'] - params = tvm.convert([relay.var(n, shape=(5, 2)) for n in param_names]) - ret_type = relay.TupleType(tvm.convert([])) - body = relay.Tuple(tvm.convert([])) - type_params = tvm.convert([]) + params = tvm.runtime.convert([relay.var(n, shape=(5, 2)) for n in param_names]) + ret_type = relay.TupleType(tvm.runtime.convert([])) + body = relay.Tuple(tvm.runtime.convert([])) + type_params = tvm.runtime.convert([]) fn = relay.Function(params, body, ret_type, type_params) model_params = {} for param in params[:1]: @@ -210,7 +211,7 @@ def test_function_attrs(): def test_call(): op = relay.Var('f') arg_names = ['a', 'b', 'c', 'd'] - args = tvm.convert([relay.Var(n) for n in arg_names]) + args = tvm.runtime.convert([relay.Var(n) for n in arg_names]) call = relay.Call(op, args, None, None) assert call.op == op assert call.args == args diff --git a/tests/python/relay/test_ir_parser.py b/tests/python/relay/test_ir_parser.py index bcce9b4ba5dd6..ba1f8d884adc4 100644 --- a/tests/python/relay/test_ir_parser.py +++ b/tests/python/relay/test_ir_parser.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import graph_equal, assert_graph_equal from tvm.relay.analysis import alpha_equal, assert_alpha_equal diff --git a/tests/python/relay/test_ir_text_printer.py b/tests/python/relay/test_ir_text_printer.py index e2a0bdc205d66..3bdd803122f7e 100644 --- a/tests/python/relay/test_ir_text_printer.py +++ b/tests/python/relay/test_ir_text_printer.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay import tvm.relay.testing import numpy as np @@ -70,7 +71,7 @@ def test_env(): def test_meta_data(): - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", shape=(n, c, h, w)) w = relay.var("w") z = relay.nn.conv2d(x, w, diff --git a/tests/python/relay/test_ir_well_formed.py b/tests/python/relay/test_ir_well_formed.py index fbbfbd23a6c2d..db953d5762e3a 100644 --- a/tests/python/relay/test_ir_well_formed.py +++ b/tests/python/relay/test_ir_well_formed.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import well_formed from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_json_compact.py b/tests/python/relay/test_json_compact.py index 40b686a05c5e8..631679140c1b7 100644 --- a/tests/python/relay/test_json_compact.py +++ b/tests/python/relay/test_json_compact.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from tvm import relay import json diff --git a/tests/python/relay/test_memory_alloc.py b/tests/python/relay/test_memory_alloc.py index 18b1500dfc3cc..08fc39df9ad02 100644 --- a/tests/python/relay/test_memory_alloc.py +++ b/tests/python/relay/test_memory_alloc.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License import tvm +from tvm import te import numpy as np from tvm import relay from tvm.relay import memory_alloc diff --git a/tests/python/relay/test_op_grad_level1.py b/tests/python/relay/test_op_grad_level1.py index 3be62a3170fb5..0eb1cec916b62 100644 --- a/tests/python/relay/test_op_grad_level1.py +++ b/tests/python/relay/test_op_grad_level1.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay.testing import check_grad, ctx_list, run_infer_type from tvm.relay.transform import gradient diff --git a/tests/python/relay/test_op_grad_level2.py b/tests/python/relay/test_op_grad_level2.py index 57b1e2c676ac2..2b5a1c29e0ded 100644 --- a/tests/python/relay/test_op_grad_level2.py +++ b/tests/python/relay/test_op_grad_level2.py @@ -19,6 +19,7 @@ import topi import topi.testing import tvm +from tvm import te from tvm import relay from tvm.relay.testing import check_grad, ctx_list, run_infer_type from tvm.relay.transform import gradient @@ -92,8 +93,8 @@ def verify_global_avg_pool2d_grad(x_shape): data = np.random.rand(*x_shape).astype("float32") y_shape = topi.util.get_const_tuple(fwd_func.ret_type.shape) out_grad = np.ones(shape=y_shape) - ref_grad = topi.testing.pool_grad_nchw(data, out_grad, pool_size=(x_shape[2], x_shape[3]), - strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg', + ref_grad = topi.testing.pool_grad_nchw(data, out_grad, pool_size=(x_shape[2], x_shape[3]), + strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg', ceil_mode=False) for target, ctx in ctx_list(): diff --git a/tests/python/relay/test_op_grad_level3.py b/tests/python/relay/test_op_grad_level3.py index 430c3dde55056..d13687fbec72e 100644 --- a/tests/python/relay/test_op_grad_level3.py +++ b/tests/python/relay/test_op_grad_level3.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay.testing import check_grad, ctx_list, run_infer_type from tvm.relay.transform import gradient diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py index 194b095642882..0fa07499193a3 100644 --- a/tests/python/relay/test_op_level1.py +++ b/tests/python/relay/test_op_level1.py @@ -17,6 +17,7 @@ import numpy as np import pytest import tvm +from tvm import te import scipy from tvm import relay from tvm.relay import transform @@ -86,7 +87,7 @@ def inst(vars, sh): def check_binary_op(opfunc, ref, dtype): # TODO(@jroesch): this piece of code improperly uses type variables. - n = tvm.var("n") + n = te.var("n") s1 = (5, n, 5) s2 = (n, 1) t1 = relay.TensorType(s1) @@ -173,7 +174,7 @@ def test_bias_add(): def test_expand_dims_infer_type(): for dtype in ['float16', 'float32']: - n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100 + n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", shape=(n, t, d), dtype=dtype) y = relay.expand_dims(x, axis=2) assert "axis=2" in y.astext() @@ -223,23 +224,23 @@ def test_log_softmax(): def test_concatenate(): for dtype in ['float16', 'float32']: - n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100 + n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", shape=(n, t, d)) y = relay.var("y", shape=(n, t, d)) z = relay.concatenate((x, y), axis=-1) assert "axis=" in z.astext() zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, t, 200)) - + x = relay.exp(x) z = relay.concatenate((x, y), axis=2) zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, t, 200)) - + z = relay.concatenate((x, y), axis=1) zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, t + t, 100)) - + # check shape mismatches (the following case is expected to raise tvm._ffi.base.TVMError. try: x = relay.var('p1', shape=(2, 5)) @@ -251,7 +252,7 @@ def test_concatenate(): pass else: assert False - + x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) t = relay.var("z", shape=(), dtype=dtype) @@ -263,7 +264,7 @@ def test_concatenate(): y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) ref_res = np.concatenate((x_data, y_data), axis=1) + t_data - + for target, ctx in ctx_list(): if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version): continue @@ -276,7 +277,7 @@ def test_concatenate(): def test_dropout(): for dtype in ['float16', 'float32']: - n, t, d = tvm.size_var("n"), tvm.size_var("t"), tvm.size_var("d") + n, t, d = te.size_var("n"), te.size_var("t"), te.size_var("d") input_ty = relay.TensorType((n, t, d), dtype) x = relay.var("x", input_ty) y = relay.nn.dropout(x, rate=0.75) @@ -297,7 +298,7 @@ def test_batch_norm(): center=False, scale=False) yy = run_infer_type(y.astuple()) assert "center=" in yy.astext() - assert yy.checked_type == relay.ty.TupleType(tvm.convert([ + assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([ relay.TensorType((3, 2, 1), dtype), relay.TensorType((2,), dtype), relay.TensorType((2,), dtype) @@ -311,7 +312,7 @@ def test_batch_norm(): y = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var, axis=0, center=False, scale=False) yy = run_infer_type(y.astuple()) - assert yy.checked_type == relay.ty.TupleType(tvm.convert([ + assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((3, 2, 1), dtype), relay.ty.TensorType((3,), dtype), relay.ty.TensorType((3,), dtype) @@ -326,7 +327,7 @@ def test_batch_norm(): y = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var, axis=-1, center=False, scale=False) yy = run_infer_type(y.astuple()) - assert yy.checked_type == relay.ty.TupleType(tvm.convert([ + assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((1, 2, 3), dtype), relay.ty.TensorType((3,), dtype), relay.ty.TensorType((3,), dtype) @@ -348,7 +349,7 @@ def test_dense(): # Dense accuracy for float16 is poor if dtype == 'float16': return - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) w = relay.var("w", relay.TensorType((2, w), dtype)) y = relay.nn.dense(x, w, units=2) @@ -356,15 +357,15 @@ def test_dense(): yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, h, 2), dtype) - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), 2 + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), 2 x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) - wh, ww = tvm.size_var("wh"), tvm.size_var("ww") + wh, ww = te.size_var("wh"), te.size_var("ww") w = relay.var("w", relay.TensorType((ww, wh), dtype)) y = relay.nn.dense(x, w) yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, h, ww), dtype) - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), 2 + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), 2 x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) w = relay.var("w", relay.IncompleteType()) y = relay.nn.dense(x, w, units=2) @@ -394,7 +395,7 @@ def test_dense_dtype(): data_dtype = 'uint8' weight_dtype = 'int8' out_dtype = 'uint8' - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), data_dtype)) w = relay.var("w", relay.TensorType((2, w), weight_dtype)) y = relay.nn.dense(x, w, units=2, out_dtype=out_dtype) @@ -406,7 +407,7 @@ def test_dense_dtype(): def test_bitserial_dense(): - m, k = tvm.size_var("m"), tvm.size_var("k") + m, k = te.size_var("m"), te.size_var("k") x = relay.var("x", relay.TensorType((m, k), "int16")) w = relay.var("w", relay.TensorType((k, 32), "int16")) y = relay.nn.bitserial_dense(x, w, units=32) diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index c3033e9181cb2..1e4be742ff25f 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -18,6 +18,7 @@ """ import numpy as np import tvm +from tvm import te import topi.testing from tvm import relay from tvm.relay import transform @@ -250,7 +251,7 @@ def verify_slice_like(data, slice_like, axes, output, dtype="float32"): tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5) def test_slice_like(): - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3)) verify_slice_like(data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3)) verify_slice_like(data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1,2), output=(d2, d2, d3)) @@ -304,7 +305,7 @@ def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32"): tvm.testing.assert_allclose(z.asnumpy(), z_np, rtol=1e-5) def test_batch_matmul(): - b, m, n, k = tvm.size_var("b"), tvm.size_var("m"), tvm.size_var("n"), tvm.size_var("k") + b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k") x = relay.var("x", relay.TensorType((b, m, k), "float32")) y = relay.var("y", relay.TensorType((b, n, k), "float32")) z = relay.nn.batch_matmul(x, y) diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index d545d0c1635a7..7a42fc329e043 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -18,6 +18,7 @@ """ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay from tvm.relay import transform @@ -28,7 +29,7 @@ def test_conv1d_infer_type(): # symbolic in batch dimension - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.ty.TensorType((n, c, w), "float32")) w = relay.var("w") y = relay.nn.conv1d(x, w, @@ -42,7 +43,7 @@ def test_conv1d_infer_type(): (2, 10, 3), "float32") # infer by shape of w, mixed precision - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.TensorType((n, c, w), "int8")) w = relay.var("w", relay.TensorType((2, 10, 3), "int8")) y = relay.nn.conv1d(x, w, out_dtype="int32") @@ -52,7 +53,7 @@ def test_conv1d_infer_type(): (n, 2, 222), "int32") # infer shape in case of different dtypes for input and weight. - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.TensorType((n, c, w), "uint8")) w = relay.var("w", relay.TensorType((2, 10, 3), "int8")) y = relay.nn.conv1d(x, w, out_dtype="int32") @@ -122,7 +123,7 @@ def run_test_conv1d(dtype, out_dtype, scale, dshape, kshape, def test_conv2d_infer_type(): # symbolic in batch dimension - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.ty.TensorType((n, c, h, w), "float32")) w = relay.var("w") y = relay.nn.conv2d(x, w, @@ -136,7 +137,7 @@ def test_conv2d_infer_type(): (2, 10, 3, 3), "float32") # infer by shape of w, mixed precision - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), "int8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3), "int8")) y = relay.nn.conv2d(x, w, out_dtype="int32") @@ -146,7 +147,7 @@ def test_conv2d_infer_type(): (n, 2, 222, 222), "int32") # infer shape in case of different dtypes for input and weight. - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), "uint8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3), "int8")) y = relay.nn.conv2d(x, w, out_dtype="int32") @@ -385,7 +386,7 @@ def run_test_conv2d_cuda(dtype, out_dtype, scale, dshape, kshape, def test_conv3d_infer_type(): # symbolic in batch dimension - n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224 x = relay.var("x", relay.ty.TensorType((n, c, d, h, w), "float32")) w = relay.var("w") y = relay.nn.conv3d(x, w, @@ -399,7 +400,7 @@ def test_conv3d_infer_type(): (2, 10, 3, 3, 3), "float32") # infer by shape of w, mixed precision - n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224 x = relay.var("x", relay.TensorType((n, c, d, h, w), "int8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3, 3), "int8")) y = relay.nn.conv3d(x, w, out_dtype="int32") @@ -409,7 +410,7 @@ def test_conv3d_infer_type(): (n, 2, 222, 222, 222), "int32") # infer shape in case of different dtypes for input and weight. - n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224 x = relay.var("x", relay.TensorType((n, c, d, h, w), "uint8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3, 3), "int8")) y = relay.nn.conv3d(x, w, out_dtype="int32") @@ -524,7 +525,7 @@ def run_test_conv3d(dtype, out_dtype, scale, dshape, kshape, def test_conv2d_transpose_infer_type(): # symbolic in batch dimension - n, c, h, w = tvm.size_var("n"), 10, 10, 12 + n, c, h, w = te.size_var("n"), 10, 10, 12 x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) w = relay.var("w", relay.IncompleteType()) y = relay.nn.conv2d_transpose(x, w, @@ -539,7 +540,7 @@ def test_conv2d_transpose_infer_type(): (10, 15, 3, 3), "float32") # infer by shape of w, mixed precision - n, h, w, c = tvm.size_var("n"), 10, 10, 12 + n, h, w, c = te.size_var("n"), 10, 10, 12 x = relay.var("x", relay.TensorType((n, h, w, c), "float32")) w = relay.var("w", relay.TensorType((12, 11, 5, 5), "float32")) y = relay.nn.conv2d_transpose(x, w, @@ -624,41 +625,41 @@ def test_conv1d_transpose_ncw_run(): def test_upsampling_infer_type(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") - scale = tvm.const(2.0, "float64") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") + scale = tvm.tir.const(2.0, "float64") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.upsampling(x, scale_h=2, scale_w=2, layout="NCHW", method="bilinear") "method=\"BINLINEAR\"" in y.astext() yy = run_infer_type(y) - assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", tvm.round(h*scale)), - tvm.tir.Cast("int32", tvm.round(w*scale))), + assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", te.round(h*scale)), + tvm.tir.Cast("int32", te.round(w*scale))), "float32") - n, c = tvm.size_var("n"), tvm.size_var("c") + n, c = te.size_var("n"), te.size_var("c") x = relay.var("x", relay.TensorType((n, c, 100, 200), "float32")) y = relay.nn.upsampling(x, scale_h=2, scale_w=2, layout="NCHW", method="bilinear") yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, 200, 400), "float32") def test_upsampling3d_infer_type(): - n, c, d, h, w = tvm.size_var("n"), tvm.size_var("c"),\ - tvm.size_var("d"), tvm.size_var("h"), tvm.size_var("w") - scale = tvm.const(2.0, "float64") + n, c, d, h, w = te.size_var("n"), te.size_var("c"),\ + te.size_var("d"), te.size_var("h"), te.size_var("w") + scale = tvm.tir.const(2.0, "float64") x = relay.var("x", relay.TensorType((n, c, d, h, w), "float32")) y = relay.nn.upsampling3d(x, scale_d=2, scale_h=2, scale_w=2, layout="NCDHW", method="trilinear") yy = run_infer_type(y) - assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", tvm.round(d*scale)), - tvm.tir.Cast("int32", tvm.round(h*scale)), - tvm.tir.Cast("int32", tvm.round(w*scale))), + assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", te.round(d*scale)), + tvm.tir.Cast("int32", te.round(h*scale)), + tvm.tir.Cast("int32", te.round(w*scale))), "float32") - n, c = tvm.size_var("n"), tvm.size_var("c") + n, c = te.size_var("n"), te.size_var("c") x = relay.var("x", relay.TensorType((n, c, 100, 100, 200), "float32")) y = relay.nn.upsampling3d(x, scale_d=2, scale_h=2, scale_w=2, layout="NCDHW", method="trilinear") yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, 200, 200, 400), "float32") def _test_pool2d(opfunc, reffunc): - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = opfunc(x, pool_size=(1, 1)) assert "pool_size=" in y.astext() @@ -678,7 +679,7 @@ def _test_pool2d(opfunc, reffunc): tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5) def _test_pool2d_int(opfunc, reffunc, dtype): - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) y = opfunc(x, pool_size=(1, 1)) assert "pool_size=" in y.astext() @@ -698,13 +699,13 @@ def _test_pool2d_int(opfunc, reffunc, dtype): tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5) def _test_global_pool2d(opfunc, reffunc): - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), 224, 224 + n, c, h, w = te.size_var("n"), te.size_var("c"), 224, 224 x = relay.var("x", relay.TensorType((n, h, w, c), "float32")) y = opfunc(x, layout="NHWC") yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, 1, 1, c), "float32") - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = opfunc(x) yy = run_infer_type(y) @@ -735,7 +736,7 @@ def test_pool2d(): def test_pool1d(): def _test_pool1d(opfunc): - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.TensorType((n, c, w), "float32")) y = opfunc(x, pool_size=(1,)) assert "pool_size=" in y.astext() @@ -763,7 +764,7 @@ def _test_pool1d(opfunc): def test_pool3d(): def _test_pool3d(opfunc, padding=(0, 0, 0, 0, 0, 0), out_shape=(1, 3, 16, 16, 16)): - n, c, d, h, w = tvm.size_var("n"), 10, 5, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 5, 224, 224 x = relay.var("x", relay.TensorType((n, c, d, h, w), "float32")) y = opfunc(x, pool_size=(1, 1, 1)) assert "pool_size=" in y.astext() @@ -833,7 +834,7 @@ def test_avg_pool2d_no_count_pad(): tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5) def test_flatten_infer_type(): - d1, d2, d3, d4 = tvm.size_var("d1"), tvm.size_var("d2"), tvm.size_var("d3"), tvm.size_var("d4") + d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4") x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32")) y = relay.nn.batch_flatten(x) yy = run_infer_type(y) @@ -878,7 +879,7 @@ def test_pad_infer_type(): assert yy.checked_type == relay.TensorType((3, 6, 9, 12), "float32") # some symbolic values - n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w") + n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w") t = relay.var("t", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.pad(t, ((1, 1), (2, 2), (3, 3), (4, 4))) yy = run_infer_type(y) @@ -901,7 +902,7 @@ def _test_run(dtype): _test_run('int32') def test_lrn(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", shape=(n, c , h, w)) y = relay.nn.lrn(x, size=10, axis=2, bias=0.5, alpha=.00001, beta=0.75) "alpha=" in y.astext() @@ -932,7 +933,7 @@ def test_lrn(): tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) def test_l2_normalize(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", shape=(n, c , h, w)) y = relay.nn.l2_normalize(x, eps=0.001, axis=[1]) "axis=" in y.astext() @@ -982,7 +983,7 @@ def test_batch_flatten(): def _test_upsampling(layout, method, align_corners=False): - n, c, h, w = tvm.size_var("n"), 16, 32, 32 + n, c, h, w = te.size_var("n"), 16, 32, 32 scale_h = 2.0 scale_w = 2.0 dtype = "float32" @@ -1021,7 +1022,7 @@ def test_upsampling(): _test_upsampling("NHWC", "bilinear", True) def _test_upsampling3d(layout, method, coordinate_transformation_mode="half_pixel"): - n, c, d, h, w = tvm.size_var("n"), 8, 16, 16, 16 + n, c, d, h, w = te.size_var("n"), 8, 16, 16, 16 scale_d = 2.0 scale_h = 2.0 scale_w = 2.0 @@ -1220,7 +1221,7 @@ def test_depthwise_conv2d_int8(): def test_bitserial_conv2d_infer_type(): # Basic shape test with ambiguous batch. - n, c, h, w = tvm.size_var("n"), 32, 224, 224 + n, c, h, w = te.size_var("n"), 32, 224, 224 x = relay.var("x", relay.ty.TensorType((n, c, h, w), "int16")) w = relay.var("w", relay.ty.TensorType((32, 32, 3, 3), "int16")) y = relay.nn.bitserial_conv2d( diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index c5f340a843a30..7e5314dd82344 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -19,6 +19,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import create_executor, transform from tvm.relay.testing import ctx_list, check_grad, run_infer_type @@ -166,7 +167,7 @@ def verify_squeeze(shape, dtype, axis): def test_transpose_infer_type(): - n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100 + n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", relay.TensorType((n, t, d), "float32")) y = relay.transpose(x, axes=(1, 0, 2)) assert "axes=" in y.astext() @@ -274,7 +275,7 @@ def test_reshape_like_infer_type(): assert zz.checked_type == relay.TensorType((1, 6), "float32") # symbolic shape - n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w") + n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.var("y", relay.TensorType((1, 8, 8), "float32")) z = relay.reshape_like(x, y) @@ -313,8 +314,8 @@ def verify_take(dshape, indices_shape, oshape, axis=None): yy = run_infer_type(y) assert yy.checked_type == relay.TensorType(oshape, "float32") - d1, d2, d3 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3") - d4, d5, d6 = tvm.var("d4"), tvm.var("d5"), tvm.var("d6") + d1, d2, d3 = te.var("d1"), te.var("d2"), te.var("d3") + d4, d5, d6 = te.var("d4"), te.var("d5"), te.var("d6") verify_take((d1,), (1,), (1,), 0) verify_take((4,), (d1, d2), (d1, d2)) verify_take((3, 3, 3), (1, d2), (1, d2)) @@ -368,12 +369,12 @@ def verify_split(dshape, indices_or_sections, ret_type, axis=None): yy = run_infer_type(y.astuple()) assert yy.checked_type == ret_type - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") - axis = tvm.var("axis") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") + axis = te.var("axis") verify_split((5, 5, 2, 2), 5, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((5, 1, 2, 2), "float32"), relay.ty.TensorType((5, 1, 2, 2), "float32"), relay.ty.TensorType((5, 1, 2, 2), "float32"), @@ -381,7 +382,7 @@ def verify_split(dshape, indices_or_sections, ret_type, axis=None): relay.ty.TensorType((5, 1, 2, 2), "float32")])), axis=1) verify_split((5, 5, 2, 2), 5, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((1, 5, 2, 2), "float32"), relay.ty.TensorType((1, 5, 2, 2), "float32"), relay.ty.TensorType((1, 5, 2, 2), "float32"), @@ -389,19 +390,19 @@ def verify_split(dshape, indices_or_sections, ret_type, axis=None): relay.ty.TensorType((1, 5, 2, 2), "float32")])), axis=0) verify_split((d1, d2, d3, d4), 4, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"), relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"), relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"), relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32")])), axis=2) verify_split((d1, d2, d3, d4), 2, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((idxd(d1, 2), d2, d3, d4), "float32"), relay.ty.TensorType((idxd(d1, 2), d2, d3, d4), "float32")])), axis=0) verify_split((d1, d2, d3, d4), (2, 4, 7), - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((d1, 2, d3, d4), "float32"), relay.ty.TensorType((d1, 2, d3, d4), "float32"), relay.ty.TensorType((d1, 3, d3, d4), "float32"), @@ -447,7 +448,7 @@ def test_full_like_infer_type(): assert yy.checked_type == relay.TensorType((1, 2, 3), "float32") # symbolic shape - n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w") + n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w") base = relay.var("base", relay.TensorType((n, c, h, w), "float32")) fill = relay.var("fill", relay.TensorType((), "float32")) y = relay.full_like(base, fill) @@ -475,7 +476,7 @@ def verify_full_like(base, fill_value, dtype): def test_infer_type_leaky_relu(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.leaky_relu(x, alpha=0.1) "alpha=0.1" in y.astext() @@ -539,7 +540,7 @@ def verify_infer_type_prelu(data, alpha, axis, output, dtype="float32"): def test_infer_type_prelu(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") verify_infer_type_prelu((n, c, h, w), (c,), 1, (n, c, h, w)) verify_infer_type_prelu((n, h, w, c), (c,), 3, (n, h, w, c)) verify_infer_type_prelu((n, c, h, w), None, 1, (n, c, h, w)) diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 44b51f2c23676..473ae59a9dbec 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.relay import transform @@ -24,7 +25,7 @@ def test_binary_op(): def check_binary_op(opfunc, ref): - n = tvm.size_var("n") + n = te.size_var("n") t1 = relay.TensorType((5, n, 5)) t2 = relay.TensorType((n, 1)) x = relay.var("x", t1) @@ -193,7 +194,7 @@ def _wrapper(data, axis=None, keepdims=False): return func(data, axis=axis).reshape(out_shape) return _wrapper - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") for func in [[relay.sum, np.sum], [relay.max, np.max], [relay.min, np.min], @@ -282,7 +283,7 @@ def verify(dshape, begin, end, strides, output, test_ref=True): op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") verify((d1, d2, 3), [None, None, 1], [None, None, 2], None, (d1, d2, 1), False) verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3)) diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index e622a8ae01ab3..8fd05daf73c72 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -19,6 +19,7 @@ import math import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.testing import ctx_list, run_infer_type @@ -26,9 +27,9 @@ def test_resize_infer_type(): - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "int8")) - th, tw = tvm.var("th"), tvm.var("tw") + th, tw = te.var("th"), te.var("tw") z = relay.image.resize(x, (th, tw)) zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, c, th, tw), "int8") @@ -182,7 +183,7 @@ def verify_multibox_prior(x, dshape, ref_res, sizes=(1.0,), x = relay.var("x", relay.TensorType(dshape, "float32")) verify_multibox_prior(x, dshape, ref_res, sizes, ratios, steps, offsets, check_size=True) - y = relay.var("y", relay.TensorType((tvm.size_var("n"), 3, 56, 56), "float32")) + y = relay.var("y", relay.TensorType((te.size_var("n"), 3, 56, 56), "float32")) verify_multibox_prior(x, dshape, ref_res, sizes, ratios, steps, offsets, check_size=True, check_type_only=True) @@ -190,7 +191,7 @@ def verify_multibox_prior(x, dshape, ref_res, sizes=(1.0,), ref_res = get_ref_result(dshape, clip=False) x = relay.var("x", relay.TensorType(dshape, "float32")) verify_multibox_prior(x, dshape, ref_res, clip=False) - y = relay.var("y", relay.TensorType((tvm.size_var("n"), 24, 32, 32), "float32")) + y = relay.var("y", relay.TensorType((te.size_var("n"), 24, 32, 32), "float32")) verify_multibox_prior(x, dshape, ref_res, clip=False, check_type_only=True) @@ -280,7 +281,7 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, np_indices_result = np.array([[3, 0, -1, -1, -1]]) num_anchors = 5 - dshape = (tvm.size_var("n"), num_anchors, 6) + dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, force_suppress=True, top_k=2, check_type_only=True) dshape = (1, num_anchors, 6) @@ -291,7 +292,7 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, 1, -1, -1]]) - dshape = (tvm.size_var("n"), num_anchors, 6) + dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, check_type_only=True) dshape = (1, num_anchors, 6) @@ -331,7 +332,7 @@ def test_default_value(): cls_prob=cls_prob, loc_pred=loc_pred, anchor=anchors) ret = run_infer_type(mtl.astuple()) ref_type = relay.ty.TupleType( - tvm.convert([ + tvm.runtime.convert([ relay.ty.TensorType((1, num_anchors, 6), "float32"), relay.ty.TensorType((1, ), "int") ])) @@ -354,7 +355,7 @@ def test_default_value(): def test_threshold(): num_anchors = 5 num_classes = 5 - n = tvm.size_var("n") + n = te.size_var("n") cls_prob = relay.var( "cls_prob", relay.ty.TensorType((n, num_anchors, num_classes), "float32")) @@ -373,7 +374,7 @@ def test_threshold(): variances=variances) ret = run_infer_type(ret.astuple()) ref_type = relay.ty.TupleType( - tvm.convert([ + tvm.runtime.convert([ relay.ty.TensorType((n, num_anchors, 6), "float32"), relay.ty.TensorType((n, ), "int") ])) @@ -520,8 +521,8 @@ def verify_yolo_reorg(shape, stride, out_shape): assert "stride=" in z.astext() assert zz.checked_type == relay.ty.TensorType(out_shape, "float32") - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") - idxd = tvm.indexdiv + n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") + idxd = tvm.tir.indexdiv verify_yolo_reorg((n, c, 20, 20), 10, (n, c*10*10, 2, 2)) verify_yolo_reorg((n, c, h, w), 2, (n, c*2*2, idxd(h, 2), idxd(w, 2))) diff --git a/tests/python/relay/test_op_level6.py b/tests/python/relay/test_op_level6.py index 286776e3f7b23..287e80a0fab76 100644 --- a/tests/python/relay/test_op_level6.py +++ b/tests/python/relay/test_op_level6.py @@ -18,6 +18,7 @@ """ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.testing import ctx_list diff --git a/tests/python/relay/test_op_qnn_add.py b/tests/python/relay/test_op_qnn_add.py index e1f54ed4b78c9..bd0f6612d80d1 100644 --- a/tests/python/relay/test_op_qnn_add.py +++ b/tests/python/relay/test_op_qnn_add.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_concatenate.py b/tests/python/relay/test_op_qnn_concatenate.py index 35c2f971a7911..03ab9eeb13218 100644 --- a/tests/python/relay/test_op_qnn_concatenate.py +++ b/tests/python/relay/test_op_qnn_concatenate.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py index e827c722b255c..66acda8635960 100644 --- a/tests/python/relay/test_op_qnn_conv2d.py +++ b/tests/python/relay/test_op_qnn_conv2d.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_op_qnn_dense.py b/tests/python/relay/test_op_qnn_dense.py index 43600cbf60c57..3cfcfd165b46a 100644 --- a/tests/python/relay/test_op_qnn_dense.py +++ b/tests/python/relay/test_op_qnn_dense.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py index b1965c97ad0d0..febf5c5e6ecca 100644 --- a/tests/python/relay/test_op_qnn_dequantize.py +++ b/tests/python/relay/test_op_qnn_dequantize.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_mul.py b/tests/python/relay/test_op_qnn_mul.py index 959a02a976adc..6516871d3fb51 100644 --- a/tests/python/relay/test_op_qnn_mul.py +++ b/tests/python/relay/test_op_qnn_mul.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_quantize.py b/tests/python/relay/test_op_qnn_quantize.py index bdc7bc04d6daf..09b04d8925c6f 100644 --- a/tests/python/relay/test_op_qnn_quantize.py +++ b/tests/python/relay/test_op_qnn_quantize.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_requantize.py b/tests/python/relay/test_op_qnn_requantize.py index 8af778160ccb7..81233972cb28d 100644 --- a/tests/python/relay/test_op_qnn_requantize.py +++ b/tests/python/relay/test_op_qnn_requantize.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_param_dict.py b/tests/python/relay/test_param_dict.py index 4161b908c7a82..497a81881c874 100644 --- a/tests/python/relay/test_param_dict.py +++ b/tests/python/relay/test_param_dict.py @@ -17,6 +17,7 @@ import os import numpy as np import tvm +from tvm import te import json import base64 from tvm._ffi.base import py_str diff --git a/tests/python/relay/test_pass_alpha_equal.py b/tests/python/relay/test_pass_alpha_equal.py index 0319d0b1a3710..7e34f48ec7e1a 100644 --- a/tests/python/relay/test_pass_alpha_equal.py +++ b/tests/python/relay/test_pass_alpha_equal.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import analysis from tvm.relay.testing import run_opt_pass @@ -64,10 +65,10 @@ def test_type_param_alpha_equal(): # function types are the only way to put type params # in eq map - ft1 = relay.FuncType(tvm.convert([]), t1, tvm.convert([t1]), tvm.convert([])) - ft2 = relay.FuncType(tvm.convert([]), t3, tvm.convert([t3]), tvm.convert([])) + ft1 = relay.FuncType(tvm.runtime.convert([]), t1, tvm.runtime.convert([t1]), tvm.runtime.convert([])) + ft2 = relay.FuncType(tvm.runtime.convert([]), t3, tvm.runtime.convert([t3]), tvm.runtime.convert([])) # actually an invalid type because t2 is wrong kind - ft3 = relay.FuncType(tvm.convert([]), t2, tvm.convert([t2]), tvm.convert([])) + ft3 = relay.FuncType(tvm.runtime.convert([]), t2, tvm.runtime.convert([t2]), tvm.runtime.convert([])) assert ft1 == ft2 assert ft1 != ft3 # kinds still do not match @@ -85,51 +86,51 @@ def test_func_type_alpha_equal(): broadcast = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast") identity = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Identity") - tr1 = relay.TypeRelation(broadcast, tvm.convert([tp1, tp3]), 1, None) - tr2 = relay.TypeRelation(broadcast, tvm.convert([tp2, tp4]), 1, None) - tr3 = relay.TypeRelation(identity, tvm.convert([tp1, tp3]), 1, None) + tr1 = relay.TypeRelation(broadcast, tvm.runtime.convert([tp1, tp3]), 1, None) + tr2 = relay.TypeRelation(broadcast, tvm.runtime.convert([tp2, tp4]), 1, None) + tr3 = relay.TypeRelation(identity, tvm.runtime.convert([tp1, tp3]), 1, None) - ft = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1])) - translate_vars = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp2, tp4]), - tvm.convert([tr2])) + ft = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1])) + translate_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp2, tp4]), + tvm.runtime.convert([tr2])) assert ft == translate_vars - different_args = relay.FuncType(tvm.convert([t1]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1])) + different_args = relay.FuncType(tvm.runtime.convert([t1]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1])) assert ft != different_args - different_order = relay.FuncType(tvm.convert([t2, t1]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1])) + different_order = relay.FuncType(tvm.runtime.convert([t2, t1]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1])) assert ft != different_order - no_rel = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([])) + no_rel = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([])) assert ft != no_rel - more_vars = relay.FuncType(tvm.convert([t1, t2]), tp2, - tvm.convert([tp1, tp2, tp3]), - tvm.convert([tr1])) + more_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp2, + tvm.runtime.convert([tp1, tp2, tp3]), + tvm.runtime.convert([tr1])) assert ft != more_vars - all_the_vars = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp2, tp3, tp4]), - tvm.convert([tr1, tr2])) + all_the_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp2, tp3, tp4]), + tvm.runtime.convert([tr1, tr2])) assert ft != all_the_vars - different_rel = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr3])) + different_rel = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr3])) assert ft != different_rel - more_rels = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1, tr3])) + more_rels = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1, tr3])) assert ft != more_rels @@ -139,10 +140,10 @@ def test_tuple_type_alpha_equal(): tp1 = relay.TypeVar("v1", relay.TypeKind.Type) tp2 = relay.TypeVar("v2", relay.TypeKind.Type) - tup1 = relay.TupleType(tvm.convert([t1, t2, tp1])) - tup2 = relay.TupleType(tvm.convert([t1, t2, tp1])) - tup3 = relay.TupleType(tvm.convert([t2, t1, tp1])) - tup4 = relay.TupleType(tvm.convert([t1, t2, tp2])) + tup1 = relay.TupleType(tvm.runtime.convert([t1, t2, tp1])) + tup2 = relay.TupleType(tvm.runtime.convert([t1, t2, tp1])) + tup3 = relay.TupleType(tvm.runtime.convert([t2, t1, tp1])) + tup4 = relay.TupleType(tvm.runtime.convert([t1, t2, tp2])) # as long as types are alpha-equal and in same order, # tuples should be alpha-equal @@ -165,16 +166,16 @@ def test_type_relation_alpha_equal(): attr1_same = tvm.ir.make_node("attrs.TestAttrs", name="attr", padding=(3,4)) attr2 = tvm.ir.make_node("attrs.TestAttrs", name="attr", padding=(3,4,4)) - tr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1) - same = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1) - diff_func = relay.TypeRelation(identity, tvm.convert([t1, t2]), 1, attr1) - diff_order = relay.TypeRelation(broadcast, tvm.convert([t2, t1]), 1, attr1) - diff_args = relay.TypeRelation(broadcast, tvm.convert([t2, t3]), 1, attr1) - diff_attr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr2) - same_attr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1_same) + tr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1) + same = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1) + diff_func = relay.TypeRelation(identity, tvm.runtime.convert([t1, t2]), 1, attr1) + diff_order = relay.TypeRelation(broadcast, tvm.runtime.convert([t2, t1]), 1, attr1) + diff_args = relay.TypeRelation(broadcast, tvm.runtime.convert([t2, t3]), 1, attr1) + diff_attr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr2) + same_attr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1_same) - bigger = relay.TypeRelation(identity, tvm.convert([t1, t3, t2]), 2, attr1) - diff_num_inputs = relay.TypeRelation(identity, tvm.convert([t1, t3, t2]), 1, attr2) + bigger = relay.TypeRelation(identity, tvm.runtime.convert([t1, t3, t2]), 2, attr1) + diff_num_inputs = relay.TypeRelation(identity, tvm.runtime.convert([t1, t3, t2]), 1, attr2) # func, number of args, input count, and order should be the same assert tr == same diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py index df01310937ed2..eabe7584f013c 100644 --- a/tests/python/relay/test_pass_alter_op_layout.py +++ b/tests/python/relay/test_pass_alter_op_layout.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import transform, analysis from tvm.relay.testing.temp_op_attr import TempOpAttr diff --git a/tests/python/relay/test_pass_annotation.py b/tests/python/relay/test_pass_annotation.py index 3e7d916c96fa4..49e9883d8ee85 100644 --- a/tests/python/relay/test_pass_annotation.py +++ b/tests/python/relay/test_pass_annotation.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.expr_functor import ExprMutator diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 02438ef04f2ab..35d33b10f186e 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import testing diff --git a/tests/python/relay/test_pass_canonicalize_cast.py b/tests/python/relay/test_pass_canonicalize_cast.py index 672b4b192995c..e9ab67ff51665 100644 --- a/tests/python/relay/test_pass_canonicalize_cast.py +++ b/tests/python/relay/test_pass_canonicalize_cast.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import tvm.relay as relay import tvm.relay.transform as _transform diff --git a/tests/python/relay/test_pass_check_kind.py b/tests/python/relay/test_pass_check_kind.py index 62a92040ff166..06fe13a9a99a1 100644 --- a/tests/python/relay/test_pass_check_kind.py +++ b/tests/python/relay/test_pass_check_kind.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import check_kind import pytest @@ -33,9 +34,9 @@ def test_typevar_kind(): def test_tuple_kind(): # only contain type kinds tp = relay.TypeVar('tp', relay.TypeKind.Type) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - tf = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([])) - fields = tvm.convert([tp, tf, tt]) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + tf = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([])) + fields = tvm.runtime.convert([tp, tf, tt]) tup_ty = relay.TupleType(fields) assert check_kind(tup_ty) == relay.TypeKind.Type @@ -46,16 +47,16 @@ def test_func_kind(): tp1 = relay.TypeVar('tp1', relay.TypeKind.Type) tp2 = relay.TypeVar('tp2', relay.TypeKind.Type) - shape = tvm.convert([1, 2, 3]) + shape = tvm.runtime.convert([1, 2, 3]) dtype = 'float32' tensor_type = relay.TensorType(shape, dtype) - tr = relay.TypeRelation(None, tvm.convert([tensor_type, tp1]) , 1, None) + tr = relay.TypeRelation(None, tvm.runtime.convert([tensor_type, tp1]) , 1, None) - type_params = tvm.convert([tp1, tp2]) - type_constraints = tvm.convert([tr]) - arg_types = tvm.convert([tp1, tensor_type]) - ret_type = relay.TupleType(tvm.convert([tp2, tensor_type])) + type_params = tvm.runtime.convert([tp1, tp2]) + type_constraints = tvm.runtime.convert([tr]) + arg_types = tvm.runtime.convert([tp1, tensor_type]) + ret_type = relay.TupleType(tvm.runtime.convert([tp2, tensor_type])) tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints) assert check_kind(tf) == relay.TypeKind.Type @@ -63,8 +64,8 @@ def test_func_kind(): def test_ref_kind(): # only contain type kinds - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - ft = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([])) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + ft = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([])) rt1 = relay.RefType(tt) assert check_kind(rt1) == relay.TypeKind.Type @@ -77,9 +78,9 @@ def test_ref_kind(): def test_relation_kind(): # only have type kinds for arguments tp = relay.TypeVar('tp', relay.TypeKind.Type) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - tf = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([])) - args = tvm.convert([tf, tt, tp]) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + tf = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([])) + args = tvm.runtime.convert([tf, tt, tp]) tr = relay.TypeRelation(None, args, 2, None) assert check_kind(tr) == relay.TypeKind.Constraint @@ -115,7 +116,7 @@ def test_invalid_tuple_kind(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType) tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) - fields = tvm.convert([tp1, tp2, tp3]) + fields = tvm.runtime.convert([tp1, tp2, tp3]) tup_ty = relay.TupleType(fields) check_kind(tup_ty) @@ -127,9 +128,9 @@ def test_invalid_func_kind(): tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType) tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) - type_params = tvm.convert([tp1, tp2, tp3]) - type_constraints = tvm.convert([]) - arg_types = tvm.convert([tp1, tp2]) + type_params = tvm.runtime.convert([tp1, tp2, tp3]) + type_constraints = tvm.runtime.convert([]) + arg_types = tvm.runtime.convert([tp1, tp2]) ret_type = tp3 tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints) @@ -148,7 +149,7 @@ def test_invalid_relation_kind(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType) tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) - args = tvm.convert([tp1, tp2, tp3]) + args = tvm.runtime.convert([tp1, tp2, tp3]) func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast") tr = relay.TypeRelation(func, args, 2, None) @@ -187,7 +188,7 @@ def test_typecall_invalid_num_args(): def test_func_with_invalid_ret_type(): tp1 = relay.TypeVar('tp1', relay.TypeKind.Type) tp2 = relay.TypeVar('tp2', relay.TypeKind.ShapeVar) - tf = relay.FuncType(tvm.convert([tp1]), tp2, tvm.convert([tp1, tp2]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([tp1]), tp2, tvm.runtime.convert([tp1, tp2]), tvm.runtime.convert([])) check_kind(tf) @@ -196,7 +197,7 @@ def test_func_with_invalid_ret_type(): def test_func_with_invalid_arg_types(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) tp2 = relay.TypeVar('tp2', relay.TypeKind.Type) - tf = relay.FuncType(tvm.convert([tp1]), tp2, tvm.convert([tp1, tp2]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([tp1]), tp2, tvm.runtime.convert([tp1, tp2]), tvm.runtime.convert([])) check_kind(tf) @@ -205,9 +206,9 @@ def test_func_with_invalid_arg_types(): def test_func_with_invalid_tuple(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) - ret_type = relay.TupleType(tvm.convert([tp1, tp1, tp1])) + ret_type = relay.TupleType(tvm.runtime.convert([tp1, tp1, tp1])) - tf = relay.FuncType(tvm.convert([]), ret_type, tvm.convert([tp1]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([]), ret_type, tvm.runtime.convert([tp1]), tvm.runtime.convert([])) check_kind(tf) @@ -218,20 +219,20 @@ def test_func_with_invalid_relation(): tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Identity") - tr = relay.TypeRelation(func, tvm.convert([tp2, tp3]), 1, None) + tr = relay.TypeRelation(func, tvm.runtime.convert([tp2, tp3]), 1, None) - tf = relay.FuncType(tvm.convert([tp1]), tp1, tvm.convert([tp1, tp2, tp3]), tvm.convert([tr])) + tf = relay.FuncType(tvm.runtime.convert([tp1]), tp1, tvm.runtime.convert([tp1, tp2, tp3]), tvm.runtime.convert([tr])) check_kind(tf) @pytest.mark.xfail(raises=tvm.error.TVMError) def test_tuple_with_invalid_func(): - tensor_type = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') + tensor_type = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) - tf = relay.FuncType(tvm.convert([]), tp1, tvm.convert([tp1]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([]), tp1, tvm.runtime.convert([tp1]), tvm.runtime.convert([])) - tup_ty = relay.TupleType(tvm.convert([tensor_type, tf])) + tup_ty = relay.TupleType(tvm.runtime.convert([tensor_type, tf])) check_kind(tup_ty) diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index c10a7b8d1b392..ec9bcd9f2bc48 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_combine_parallel_dense.py b/tests/python/relay/test_pass_combine_parallel_dense.py index f693f30060d90..84d8211666d89 100644 --- a/tests/python/relay/test_pass_combine_parallel_dense.py +++ b/tests/python/relay/test_pass_combine_parallel_dense.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_convert_op_layout.py b/tests/python/relay/test_pass_convert_op_layout.py index 4b80d6ca120d3..f9e7ca9a4b582 100644 --- a/tests/python/relay/test_pass_convert_op_layout.py +++ b/tests/python/relay/test_pass_convert_op_layout.py @@ -16,6 +16,7 @@ # under the License. """Test alter op layout pass""" import tvm +from tvm import te from tvm import relay from tvm.relay.op import register_alter_op_layout diff --git a/tests/python/relay/test_pass_dead_code_elimination.py b/tests/python/relay/test_pass_dead_code_elimination.py index 3f1ec9efd5e1a..604ec8969ef76 100644 --- a/tests/python/relay/test_pass_dead_code_elimination.py +++ b/tests/python/relay/test_pass_dead_code_elimination.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import Function, transform from tvm.relay.analysis import alpha_equal, graph_equal, free_vars, assert_alpha_equal @@ -25,7 +26,7 @@ class env: def __init__(self): - self.shape = tvm.convert([1, 2, 3]) + self.shape = tvm.runtime.convert([1, 2, 3]) self.tt = relay.TensorType(self.shape, "float32") self.int32 = relay.TensorType([], "int32") self.float32 = relay.TensorType([], "float32") diff --git a/tests/python/relay/test_pass_eliminate_common_subexpr.py b/tests/python/relay/test_pass_eliminate_common_subexpr.py index e2fec6161c872..dddbef73e5646 100644 --- a/tests/python/relay/test_pass_eliminate_common_subexpr.py +++ b/tests/python/relay/test_pass_eliminate_common_subexpr.py @@ -16,6 +16,7 @@ # under the License. """Test eliminate common subexpr pass""" import tvm +from tvm import te from tvm import relay from tvm.relay.op import register_alter_op_layout diff --git a/tests/python/relay/test_pass_eta_expand.py b/tests/python/relay/test_pass_eta_expand.py index b9eb2a1e692d9..ad04e413b21b0 100644 --- a/tests/python/relay/test_pass_eta_expand.py +++ b/tests/python/relay/test_pass_eta_expand.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay import tvm.relay.transform as _transform diff --git a/tests/python/relay/test_pass_fold_constant.py b/tests/python/relay/test_pass_fold_constant.py index 08834f14e8512..5e1cd8d8b69df 100644 --- a/tests/python/relay/test_pass_fold_constant.py +++ b/tests/python/relay/test_pass_fold_constant.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.build_module import bind_params_by_name diff --git a/tests/python/relay/test_pass_fold_scale_axis.py b/tests/python/relay/test_pass_fold_scale_axis.py index bfc3caba45e31..4c094fb3e6e72 100644 --- a/tests/python/relay/test_pass_fold_scale_axis.py +++ b/tests/python/relay/test_pass_fold_scale_axis.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_fuse_ops.py b/tests/python/relay/test_pass_fuse_ops.py index e11b6aeb0a2cc..a66022275c96a 100644 --- a/tests/python/relay/test_pass_fuse_ops.py +++ b/tests/python/relay/test_pass_fuse_ops.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.testing import run_opt_pass diff --git a/tests/python/relay/test_pass_gradient.py b/tests/python/relay/test_pass_gradient.py index 6c2ea8ffa3b36..6f2a12589fb5e 100644 --- a/tests/python/relay/test_pass_gradient.py +++ b/tests/python/relay/test_pass_gradient.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import free_vars, free_type_vars, assert_alpha_equal from tvm.relay import create_executor, transform diff --git a/tests/python/relay/test_pass_lambda_lift.py b/tests/python/relay/test_pass_lambda_lift.py index a66c4c7d745ae..e388878295513 100644 --- a/tests/python/relay/test_pass_lambda_lift.py +++ b/tests/python/relay/test_pass_lambda_lift.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_legalize.py b/tests/python/relay/test_pass_legalize.py index e4e16c002abf8..9976eca28b290 100644 --- a/tests/python/relay/test_pass_legalize.py +++ b/tests/python/relay/test_pass_legalize.py @@ -17,6 +17,7 @@ """Test legalize pass""" import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_pass_mac_count.py b/tests/python/relay/test_pass_mac_count.py index 5ce0e41cfbac0..697aad8eedb7d 100644 --- a/tests/python/relay/test_pass_mac_count.py +++ b/tests/python/relay/test_pass_mac_count.py @@ -17,6 +17,7 @@ """Unit tests for MAC counter.""" import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import analysis, transform @@ -39,7 +40,7 @@ def test_gemm(): data2 = relay.var("data2", shape=dshape2) gemm = relay.nn.dense(data1, data2) func = relay.Function([data1, data2], - relay.Tuple(tvm.convert([gemm]))) + relay.Tuple(tvm.runtime.convert([gemm]))) func = run_opt_pass(func, transform.InferType()) compute_count = analysis.get_total_mac_number(func) expect_count = n * m * k @@ -66,7 +67,7 @@ def test_conv(): channels=output_channel, kernel_size=(kh, kw), padding=(h_padding, w_padding)) - func = relay.Function([data, weight], relay.Tuple(tvm.convert([conv2d]))) + func = relay.Function([data, weight], relay.Tuple(tvm.runtime.convert([conv2d]))) func = run_opt_pass(func, transform.InferType()) compute_count = analysis.get_total_mac_number(func) expect_count = batch_size * input_channel * oh * ow * output_channel * kh * kw @@ -99,7 +100,7 @@ def test_simple_network(): weight_dense) func = relay.Function([data1, data2, weight_conv, weight_dense], - relay.Tuple(tvm.convert([conv2d_1, conv2d_2, + relay.Tuple(tvm.runtime.convert([conv2d_1, conv2d_2, dense_1, add, flattened]))) # alter the CONV 2D data layout to test func = run_opt_pass(func, transform.AlterOpLayout()) @@ -127,7 +128,7 @@ def test_depthwise_conv2d(): groups=64) add = relay.add(depthwise_conv2d_1, depthwise_conv2d_2) func = relay.Function([data1, data2, weight_conv], - relay.Tuple(tvm.convert([depthwise_conv2d_1, + relay.Tuple(tvm.runtime.convert([depthwise_conv2d_1, depthwise_conv2d_2, add]))) func = run_opt_pass(func, transform.InferType()) @@ -156,7 +157,7 @@ def test_conv_2d_transpose(): kernel_size=(kh, kw), padding=(h_padding, w_padding)) func = relay.Function([data, weight], - relay.Tuple(tvm.convert([conv2d_transpose]))) + relay.Tuple(tvm.runtime.convert([conv2d_transpose]))) func = run_opt_pass(func, transform.InferType()) compute_count = analysis.get_total_mac_number(func) expect_count = batch_size * input_channel * oh * ow * output_channel * kh * kw diff --git a/tests/python/relay/test_pass_manager.py b/tests/python/relay/test_pass_manager.py index a13e5e93ea9c2..aed026996a214 100644 --- a/tests/python/relay/test_pass_manager.py +++ b/tests/python/relay/test_pass_manager.py @@ -19,6 +19,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import ExprFunctor from tvm.relay import Function, Call diff --git a/tests/python/relay/test_pass_partial_eval.py b/tests/python/relay/test_pass_partial_eval.py index 2bec98c173d94..f54dd6bf69c55 100644 --- a/tests/python/relay/test_pass_partial_eval.py +++ b/tests/python/relay/test_pass_partial_eval.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import alpha_equal, assert_alpha_equal from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 6f20278133d9f..9c3228f4ff486 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -21,6 +21,7 @@ import pytest import tvm +from tvm import te import tvm.relay.testing import tvm.relay.transform as transform from tvm import relay diff --git a/tests/python/relay/test_pass_qnn_legalize.py b/tests/python/relay/test_pass_qnn_legalize.py index dee19f766605c..7d3d9cc106c86 100644 --- a/tests/python/relay/test_pass_qnn_legalize.py +++ b/tests/python/relay/test_pass_qnn_legalize.py @@ -17,6 +17,7 @@ """Test legalize pass""" import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_pass_remove_unused_functions.py b/tests/python/relay/test_pass_remove_unused_functions.py index bacc3126c7c4e..33816344f562f 100644 --- a/tests/python/relay/test_pass_remove_unused_functions.py +++ b/tests/python/relay/test_pass_remove_unused_functions.py @@ -16,6 +16,7 @@ # under the License. import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_pass_to_a_normal_form.py b/tests/python/relay/test_pass_to_a_normal_form.py index 46bde4f490b86..f68f64874c78f 100644 --- a/tests/python/relay/test_pass_to_a_normal_form.py +++ b/tests/python/relay/test_pass_to_a_normal_form.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import alpha_equal, detect_feature from tvm.relay import op, create_executor, transform diff --git a/tests/python/relay/test_pass_to_cps.py b/tests/python/relay/test_pass_to_cps.py index 4645e20c74686..fe4959ed8ce3c 100644 --- a/tests/python/relay/test_pass_to_cps.py +++ b/tests/python/relay/test_pass_to_cps.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import alpha_equal, detect_feature from tvm.relay.transform import to_cps, un_cps diff --git a/tests/python/relay/test_pass_to_graph_normal_form.py b/tests/python/relay/test_pass_to_graph_normal_form.py index 5c5221f65a46d..dc47ad350fe5c 100644 --- a/tests/python/relay/test_pass_to_graph_normal_form.py +++ b/tests/python/relay/test_pass_to_graph_normal_form.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import op, create_executor, transform, Feature from tvm.relay.analysis import detect_feature diff --git a/tests/python/relay/test_pass_unmatched_cases.py b/tests/python/relay/test_pass_unmatched_cases.py index 1ac99a69a2499..42344bccabaaa 100644 --- a/tests/python/relay/test_pass_unmatched_cases.py +++ b/tests/python/relay/test_pass_unmatched_cases.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.prelude import Prelude from tvm.relay.analysis import unmatched_cases diff --git a/tests/python/relay/test_pass_vars.py b/tests/python/relay/test_pass_vars.py index d8b77ba356121..1aad74b930e83 100644 --- a/tests/python/relay/test_pass_vars.py +++ b/tests/python/relay/test_pass_vars.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import (free_vars, free_type_vars, bound_vars, bound_type_vars, diff --git a/tests/python/relay/test_py_converter.py b/tests/python/relay/test_py_converter.py index f489e9fcb04b1..f6b1b2432d921 100644 --- a/tests/python/relay/test_py_converter.py +++ b/tests/python/relay/test_py_converter.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.testing import to_python, run_as_python from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_type_functor.py b/tests/python/relay/test_type_functor.py index 854301bf714a4..9e023bc6b1e47 100644 --- a/tests/python/relay/test_type_functor.py +++ b/tests/python/relay/test_type_functor.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import TypeFunctor, TypeMutator, TypeVisitor from tvm.relay.analysis import assert_graph_equal @@ -53,7 +54,7 @@ def test_tensor_type(): def test_func_type(): tv = TypeVar('tv') - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') ft = FuncType([tt], tt, type_params=[tv]) check_visit(ft) diff --git a/tests/python/relay/test_type_infer.py b/tests/python/relay/test_type_infer.py index 892c91d9c43a3..74507baa1096a 100644 --- a/tests/python/relay/test_type_infer.py +++ b/tests/python/relay/test_type_infer.py @@ -18,6 +18,7 @@ for expressions. """ import tvm +from tvm import te from tvm import relay from tvm.relay import op, transform, analysis from tvm.relay.analysis import assert_alpha_equal diff --git a/tests/python/relay/test_type_solver.py b/tests/python/relay/test_type_solver.py index 118066e7cf529..d90fd29a7eb56 100644 --- a/tests/python/relay/test_type_solver.py +++ b/tests/python/relay/test_type_solver.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay import pytest diff --git a/tests/python/relay/test_typecall.py b/tests/python/relay/test_typecall.py index fa2601f30af10..491047deb4c67 100644 --- a/tests/python/relay/test_typecall.py +++ b/tests/python/relay/test_typecall.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index 8cac656ee5a12..02f1e5b753f83 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import runtime from tvm import relay from tvm.relay.scope_builder import ScopeBuilder diff --git a/tests/python/relay/test_vm_serialization.py b/tests/python/relay/test_vm_serialization.py index 9fed4955705f4..5d20651a81268 100644 --- a/tests/python/relay/test_vm_serialization.py +++ b/tests/python/relay/test_vm_serialization.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm.runtime import vm as _vm from tvm.relay import vm as rly_vm from tvm import relay diff --git a/tests/python/unittest/test_arith_canonical_simplify.py b/tests/python/unittest/test_arith_canonical_simplify.py index 35822d240b047..3d17bf1ad626b 100644 --- a/tests/python/unittest/test_arith_canonical_simplify.py +++ b/tests/python/unittest/test_arith_canonical_simplify.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te class CanonicalChecker: def __init__(self): @@ -27,21 +28,21 @@ def verify(self, data, expected): def test_mul_sum_simplify(): ck = CanonicalChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(2 + (3 * x + z + y + 1) * 4 + x, x * 13 + z * 4 + y * 4 +6) ck.verify(x * 3 - 4 * x + 1, 1 - x) ck.verify(y + x * 3 - 5 * x + 1 + y, y * 2 + 1 - x * 2) - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # trucdiv ck.verify(tdiv(x + y + x + y * 3, 2), y * 2 + x) ck.verify(tmod(x + y + x + y * 3, 2), 0) # floordiv - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.verify(flm(x + x + y * 3, 2), flm(y * 3, 2)) ck.verify(fld(x + y + x + y * 3, 2), y * 2 + x) ck.verify(flm(x + y + x + y * 3, 2), 0) @@ -50,11 +51,11 @@ def test_mul_sum_simplify(): def test_split_index_simplify(): ck = CanonicalChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") # trucdiv - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # split div const ck.verify(tdiv(x, 3) *3 + tmod(x, 3), x) @@ -80,8 +81,8 @@ def test_split_index_simplify(): ck.verify(tdiv(x * 4 + y, 2) * 2 + tmod(x * 4 + y, 2), x * 4 + y) # floordiv - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.verify(fld(x, 3) * 3 + flm(x, 3), x) ck.verify(fld(x, 6) * 6 + flm(fld(x, 3), 2) * 3 + flm(x, 3), x) ck.verify(fld(fld(flm(x, 16), 2) * 2, 4), fld(flm(x, 16), 4)) @@ -95,8 +96,8 @@ def test_split_index_simplify(): def test_div_simplify(): ck = CanonicalChecker() - x = tvm.var("x") - tdiv = tvm.truncdiv + x = te.var("x") + tdiv = tvm.tir.truncdiv # truc div ck.verify(tdiv(16+48*x,16), x*3 + 1) @@ -110,7 +111,7 @@ def test_div_simplify(): ck.verify(tdiv(17 + 47 * x, 16), tdiv(x * 47 + 17, 16)) # floordiv - fld = tvm.floordiv + fld = tvm.te.floordiv ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 10000), True) ck.verify(fld(16+48*x, 16), x*3 + 1) ck.verify(fld(17+48*x, 16), x * 3 + 1) @@ -119,8 +120,8 @@ def test_div_simplify(): def test_floormod_simplify(): ck = CanonicalChecker() - flm = tvm.floormod - x, y = tvm.var("x"), tvm.var("y") + flm = tvm.te.floormod + x, y = te.var("x"), te.var("y") ck.verify(flm(flm((x*4) + y - 466036, 24528) - 24512, 16), flm((x*4) + y + 12, 16)) @@ -128,59 +129,59 @@ def test_floormod_simplify(): def test_canonical_mixed(): ck = CanonicalChecker() - x = tvm.var("x") - z = tvm.const(3, "int32") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x = te.var("x") + z = tvm.tir.const(3, "int32") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.verify(tdiv(x, (z*z)) - tdiv(x, (z*z)), 0) ck.verify(tdiv(x, (z+z)) - tdiv(x, (z+z)), 0) ck.verify(x - 2 < 3, x < 5) - ck.verify(tvm.max(x, 1) - tvm.max(x, 1), 0) - ck.verify(tvm.min(x, 1) - tvm.min(x, 1), 0) + ck.verify(tvm.te.max(x, 1) - tvm.te.max(x, 1), 0) + ck.verify(tvm.te.min(x, 1) - tvm.te.min(x, 1), 0) ck.verify(x * x - x * x, 0) - fld = tvm.floordiv + fld = tvm.te.floordiv ck.verify(fld(x, (z*z)) - fld(x, (z*z)), 0) ck.verify(fld(x, (z+z)) - fld(x, (z+z)), 0) def test_reduce_combiner_simplify(): ck = CanonicalChecker() - dummy = tvm.var('dummy') - comm_reducer = tvm.comm_reducer - prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.const(1, t0)) + dummy = te.var('dummy') + comm_reducer = te.comm_reducer + prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.tir.const(1, t0)) sum_or_prod = comm_reducer( lambda x, y: tvm.tir.Select(dummy < 0, x + y, x*y), lambda t0: tvm.tir.Select(dummy < 0, - tvm.const(0, t0), tvm.const(1, t0))) + tvm.tir.const(0, t0), tvm.tir.const(1, t0))) sum_and_prod = comm_reducer( lambda x, y: (x[0] + y[0], x[1]*y[1]), - lambda t0, t1: (tvm.const(0, t0), - tvm.const(5, t0) - tvm.const(4, t0))) + lambda t0, t1: (tvm.tir.const(0, t0), + tvm.tir.const(5, t0) - tvm.tir.const(4, t0))) some_reducer1 = comm_reducer( lambda x, y: (x[0] + y[0], x[0] + y[0] + x[1] + y[1], x[0]*y[2] + y[0]*x[2], x[1] + y[2], 4.0), - lambda t0, t1, t2, t3, t4: (tvm.const(0, t0), - tvm.const(1, t1), - tvm.const(2, t2), - tvm.const(3, t3), - tvm.const(4, t4))) - - k = tvm.reduce_axis((0, 10), name="k") - A = tvm.placeholder((10,), name='A') + lambda t0, t1, t2, t3, t4: (tvm.tir.const(0, t0), + tvm.tir.const(1, t1), + tvm.tir.const(2, t2), + tvm.tir.const(3, t3), + tvm.tir.const(4, t4))) + + k = te.reduce_axis((0, 10), name="k") + A = te.placeholder((10,), name='A') # Test that SimplifyCombiner makes use of vranges ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, -4)) - ck.verify(sum_or_prod(A[k], k), tvm.sum(A[k], k)) + ck.verify(sum_or_prod(A[k], k), te.sum(A[k], k)) ck.analyzer.update(dummy, tvm.arith.ConstIntBound(5, 9), True) ck.verify(sum_or_prod(A[k], k), prod(A[k], k)) ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, 100), True) - ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], tvm.sum(A[k], k)) + ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], te.sum(A[k], k)) ck.verify(sum_and_prod((A[k], A[10-k]), k)[1], prod(A[10-k], k)) reference_simplified_sources = [[A[0]], @@ -203,65 +204,65 @@ def test_reduce_combiner_simplify(): ck.verify(sum_and_prod((A[k], side_effect(A[10-k])), k)[0], sum_and_prod((A[k], side_effect(A[10-k])), k)[0]) ck.verify(sum_and_prod((side_effect(A[k]), A[10-k]), k)[0], - tvm.sum(side_effect(A[k]), k)) + te.sum(side_effect(A[k]), k)) def test_reduce_simplify(): ck = CanonicalChecker() - k = tvm.reduce_axis((0, 10), name="k") - j = tvm.reduce_axis((-5, 3), name="j") - A = tvm.placeholder((10,), name='A') - ck.verify(tvm.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]), - tvm.sum(k + j, [k, j])) - ck.verify(tvm.sum(A[3], []), A[3]) + k = te.reduce_axis((0, 10), name="k") + j = te.reduce_axis((-5, 3), name="j") + A = te.placeholder((10,), name='A') + ck.verify(te.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]), + te.sum(k + j, [k, j])) + ck.verify(te.sum(A[3], []), A[3]) # The rule below is not typical, removed for now - ck.verify(tvm.sum(tvm.div(k, 10), k), tvm.sum(tvm.const(0, "int32"), k)) + ck.verify(te.sum(te.div(k, 10), k), te.sum(tvm.tir.const(0, "int32"), k)) def test_simplify_if_then_else(): ck = CanonicalChecker() - x = tvm.var("x") - y = tvm.var("y") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x = te.var("x") + y = te.var("y") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # simplification that takes condition into account. - res = tvm.if_then_else((x * 4 + y) >= 466036, - tvm.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), + res = tvm.tir.if_then_else((x * 4 + y) >= 466036, + tvm.tir.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), tmod(tmod(((x*4) + y) - 466036, 24528) -24512, 16), x), y) - res2 = tvm.if_then_else((x * 4) >= 466036 - y, - tvm.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), + res2 = tvm.tir.if_then_else((x * 4) >= 466036 - y, + tvm.tir.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), tmod(tmod(((x*4) + y) - 466036, 24528) -24512, 16), x), y) - expected = tvm.if_then_else( + expected = tvm.tir.if_then_else( tvm.tir.LE(466036, (x * 4 + y)), - tvm.if_then_else(tvm.tir.LE(24512, tmod(((x*4) + y) - 4, 24528)), + tvm.tir.if_then_else(tvm.tir.LE(24512, tmod(((x*4) + y) - 4, 24528)), tmod(((x*4) + y) - 4, 16), x), y) ck.verify(res, expected) ck.verify(res2, expected) # can only simplify if condition - res = tvm.tir.Select(tvm.all(x >= -1, y >= 0), tmod(x + y + 100, 3), tmod(x + 100, 3)) - expected = tvm.tir.Select(tvm.all(x >= -1, y >= 0), tmod(x + y + 1, 3), tmod(x + 100, 3)) + res = tvm.tir.Select(tvm.tir.all(x >= -1, y >= 0), tmod(x + y + 100, 3), tmod(x + 100, 3)) + expected = tvm.tir.Select(tvm.tir.all(x >= -1, y >= 0), tmod(x + y + 1, 3), tmod(x + 100, 3)) ck.verify(res, ck.analyzer.canonical_simplify(expected)) res = tvm.tir.Select(x >= 10, - tvm.if_then_else(tdiv(x, 3) > 2, x, 0), 0) + tvm.tir.if_then_else(tdiv(x, 3) > 2, x, 0), 0) expected = tvm.tir.Select(x >= 10, x, 0) ck.verify(res, ck.analyzer.canonical_simplify(expected)) res = tvm.tir.Select(x >= 10, - tvm.if_then_else(tdiv(x, 3) < 2, x, 0), 0) + tvm.tir.if_then_else(tdiv(x, 3) < 2, x, 0), 0) ck.verify(res, 0) def test_complex_cases(): ck = CanonicalChecker() - x = tvm.var("x") - y = tvm.var("y") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x = te.var("x") + y = te.var("y") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod res2 = (tdiv(tdiv(tmod(x*128 + y, 1296),36)*2 + 1,2)*36 + tdiv(tmod((x*128) + y, 36)*2 + 1,2) - tmod((x*128) + y, 1296) + 1) diff --git a/tests/python/unittest/test_arith_const_int_bound.py b/tests/python/unittest/test_arith_const_int_bound.py index aba56ac6c0c52..4829b97c348e0 100644 --- a/tests/python/unittest/test_arith_const_int_bound.py +++ b/tests/python/unittest/test_arith_const_int_bound.py @@ -15,21 +15,22 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_dtype_bound(): analyzer = tvm.arith.Analyzer() - x = tvm.var("x", dtype="int64") + x = te.var("x", dtype="int64") bd = analyzer.const_int_bound(x) assert bd.min_value == bd.NEG_INF assert bd.max_value == bd.POS_INF - x = tvm.var("x", dtype="int8") + x = te.var("x", dtype="int8") bd = analyzer.const_int_bound(x) assert bd.min_value == -128 assert bd.max_value == 127 - x = tvm.var("x", dtype="uint8") + x = te.var("x", dtype="uint8") bd = analyzer.const_int_bound(x) assert bd.min_value == 0 assert bd.max_value == 255 @@ -37,8 +38,8 @@ def test_dtype_bound(): def test_cast_bound(): analyzer = tvm.arith.Analyzer() - x = tvm.var("x", dtype="int8") - tmod = tvm.truncmod + x = te.var("x", dtype="int8") + tmod = tvm.tir.truncmod bd = analyzer.const_int_bound(tmod(x, 3).astype("uint32")) assert bd.min_value == 0 assert bd.max_value == 2 @@ -51,7 +52,7 @@ def test_cast_bound(): def test_add_sub_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x", "int64"), tvm.var("y", "int64") + x, y = te.var("x", "int64"), te.var("y", "int64") bd = analyzer.const_int_bound(x + y) assert bd.min_value == bd.NEG_INF assert bd.max_value == bd.POS_INF @@ -78,7 +79,7 @@ def test_add_sub_bound(): def test_mul_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-2, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -101,8 +102,8 @@ def test_mul_bound(): def test_truncdiv_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - tdiv = tvm.truncdiv + x, y = te.var("x"), te.var("y") + tdiv = tvm.tir.truncdiv analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -124,9 +125,9 @@ def test_truncdiv_bound(): def test_truncmod_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") - tmod = tvm.truncmod + tmod = tvm.tir.truncmod analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -149,8 +150,8 @@ def test_truncmod_bound(): def test_floordiv_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - fld = tvm.floordiv + x, y = te.var("x"), te.var("y") + fld = tvm.te.floordiv analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) bd = analyzer.const_int_bound(fld(x, y)) @@ -171,8 +172,8 @@ def test_floordiv_bound(): def test_floormod_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - flm = tvm.floormod + x, y = te.var("x"), te.var("y") + flm = tvm.te.floormod analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -195,34 +196,34 @@ def test_floormod_bound(): def test_min_max_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-9, 11)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) - bd = analyzer.const_int_bound(tvm.min(x, y)) + bd = analyzer.const_int_bound(tvm.te.min(x, y)) assert bd.min_value == -9 assert bd.max_value == 10 analyzer.update(x, tvm.arith.ConstIntBound(bd.NEG_INF, bd.POS_INF), override=True) analyzer.update(y, tvm.arith.ConstIntBound(4, 10), override=True) - bd = analyzer.const_int_bound(tvm.min(x, y)) + bd = analyzer.const_int_bound(tvm.te.min(x, y)) assert bd.min_value == bd.NEG_INF assert bd.max_value == 10 - bd = analyzer.const_int_bound(tvm.max(x, y)) + bd = analyzer.const_int_bound(tvm.te.max(x, y)) assert bd.min_value == 4 assert bd.max_value == bd.POS_INF analyzer.update(x, tvm.arith.ConstIntBound(1, bd.POS_INF), override=True) analyzer.update(y, tvm.arith.ConstIntBound(4, 10), override=True) - bd = analyzer.const_int_bound(tvm.max(x, y)) + bd = analyzer.const_int_bound(tvm.te.max(x, y)) assert bd.min_value == 4 assert bd.max_value == bd.POS_INF def test_select_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-9, 11)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -235,7 +236,7 @@ def test_select_bound(): def test_shift_and_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-9, 11)) analyzer.update(y, tvm.arith.ConstIntBound(2, 10)) @@ -256,9 +257,9 @@ def test_shift_and_bound(): def test_mix_index_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y = te.var("x"), te.var("y") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod analyzer.update(x, tvm.arith.ConstIntBound(0, 24 - 1)) analyzer.update(y, tvm.arith.ConstIntBound(0, 3 - 1)) @@ -277,7 +278,7 @@ def test_mix_index_bound(): def test_size_var_bound(): analyzer = tvm.arith.Analyzer() - x = tvm.size_var("x") + x = te.size_var("x") bd = analyzer.const_int_bound(x) assert bd.min_value == 0 assert bd.max_value == bd.POS_INF diff --git a/tests/python/unittest/test_arith_deduce_bound.py b/tests/python/unittest/test_arith_deduce_bound.py index 5e08635cd53fd..3d5a3298f19eb 100644 --- a/tests/python/unittest/test_arith_deduce_bound.py +++ b/tests/python/unittest/test_arith_deduce_bound.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def assert_expr_equal(a, b): @@ -25,17 +26,17 @@ def assert_expr_equal(a, b): def test_deduce(): - a = tvm.var('a') - b = tvm.var('b') - c = tvm.var('c') - d = tvm.var('d') + a = te.var('a') + b = te.var('b') + c = te.var('c') + d = te.var('d') b_s = tvm.arith.IntervalSet(2, 3) c_s = tvm.arith.IntervalSet(10, 15) d_s = tvm.arith.IntervalSet(-3, -1) - zero = tvm.const(0, "int32") + zero = tvm.tir.const(0, "int32") - fdiv = tvm.floordiv + fdiv = tvm.te.floordiv e0 = (-b)*a+c-d res0 = tvm.arith.deduce_bound(a, e0>=0, {b: b_s, c: c_s, d: d_s}, {}) @@ -68,13 +69,13 @@ def test_deduce(): assert_expr_equal(res1.max_value, ans1) - e2 = (tvm.max(5, a * 4) < 0) + e2 = (tvm.te.max(5, a * 4) < 0) res2 = tvm.arith.deduce_bound(a, e2, {b: b_s, c: c_s, d: d_s}, {}) assert str(res2.max_value) == "neg_inf" assert str(res2.min_value) == "pos_inf" # expression containing variable a is on rhs - e2 = (zero < tvm.max(5, a * 4)) + e2 = (zero < tvm.te.max(5, a * 4)) res2 = tvm.arith.deduce_bound(a, e2, {b: b_s, c: c_s, d: d_s}, {}) assert str(res2.max_value) == "neg_inf" assert str(res2.min_value) == "pos_inf" @@ -127,10 +128,10 @@ def test_deduce(): def test_check(): - a = tvm.var('a') - b = tvm.var('b') - c = tvm.var('c') - d = tvm.var('d') + a = te.var('a') + b = te.var('b') + c = te.var('c') + d = te.var('d') b_s = tvm.arith.IntervalSet(2, 3) c_s = tvm.arith.IntervalSet(5, 7) @@ -150,8 +151,8 @@ def test_check(): def test_deduce_basic(): def test_basic(a1, a2, coff): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') b_s = tvm.arith.IntervalSet(a1, a2) e0 = b + a*coff + 3 @@ -160,12 +161,12 @@ def test_basic(a1, a2, coff): assert (tvm.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(17, "int32") < e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32") < e0, {b: b_s}, {b: b_s}) [x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value] assert (tvm.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(17, "int32")>= e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32")>= e0, {b: b_s}, {b: b_s}) [x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value] assert (tvm.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1 @@ -182,8 +183,8 @@ def test_basic(a1, a2, coff): def test_deduce_complex(): def test_complex(a1, a2, coff): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') b_s = tvm.arith.IntervalSet(a1, a2) e0 = (b*3 + a* coff) * 4 @@ -192,7 +193,7 @@ def test_complex(a1, a2, coff): assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(63, "int32")>= e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32")>= e0, {b: b_s}, {b: b_s}) [t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value] assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1 @@ -201,7 +202,7 @@ def test_complex(a1, a2, coff): assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(63, "int32") <= e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32") <= e0, {b: b_s}, {b: b_s}) [t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value] assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1 diff --git a/tests/python/unittest/test_arith_detect_clip_bound.py b/tests/python/unittest/test_arith_detect_clip_bound.py index 44ae24cb6815c..217169f194e57 100644 --- a/tests/python/unittest/test_arith_detect_clip_bound.py +++ b/tests/python/unittest/test_arith_detect_clip_bound.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_basic(): - a = tvm.var("a") - b = tvm.var("b") - c = tvm.var("c") - m = tvm.arith.detect_clip_bound(tvm.all(a * 1 < b * 6, + a = te.var("a") + b = te.var("b") + c = te.var("c") + m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6, a - 1 > 0), [a]) assert tvm.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0 assert m[0].value == 2 - m = tvm.arith.detect_clip_bound(tvm.all(a * 1 < b * 6, + m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6, a - 1 > 0), [a, b]) assert len(m) == 0 - m = tvm.arith.detect_clip_bound(tvm.all(a + 10 * c <= 20, + m = tvm.arith.detect_clip_bound(tvm.tir.all(a + 10 * c <= 20, b - 1 > 0), [a, b]) assert tvm.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0 assert tvm.ir_pass.Simplify(m[2] - 2).value == 0 diff --git a/tests/python/unittest/test_arith_detect_linear_equation.py b/tests/python/unittest/test_arith_detect_linear_equation.py index 3b103026aec3d..6a80bf08899cd 100644 --- a/tests/python/unittest/test_arith_detect_linear_equation.py +++ b/tests/python/unittest/test_arith_detect_linear_equation.py @@ -15,10 +15,11 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_basic(): - a = tvm.var("a") - b = tvm.var("b") + a = te.var("a") + b = te.var("b") m = tvm.arith.detect_linear_equation(a * 4 + b * 6 + 7, [a]) assert m[0].value == 4 assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0 @@ -41,8 +42,8 @@ def test_basic(): assert tvm.ir_pass.Simplify(m[0] - b * 7).value == 0 def test_multivariate(): - v = [tvm.var("v%d" % i) for i in range(4)] - b = tvm.var("b") + v = [te.var("v%d" % i) for i in range(4)] + b = te.var("b") m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8, v) assert(tvm.ir_pass.Equal(tvm.ir_pass.Simplify(m[0]), b + 5)) assert(m[1].value == 8) diff --git a/tests/python/unittest/test_arith_domain_touched.py b/tests/python/unittest/test_arith_domain_touched.py index 7876fb6c4d37a..0d769aabf247a 100644 --- a/tests/python/unittest/test_arith_domain_touched.py +++ b/tests/python/unittest/test_arith_domain_touched.py @@ -15,14 +15,15 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_domain_touched(): - i = tvm.var('i') - j = tvm.var('j') - n = tvm.convert(100) - m = tvm.var('m') - a = tvm.placeholder((n, m), name = 'a') - b = tvm.placeholder((n, m), name = 'b') + i = te.var('i') + j = te.var('j') + n = tvm.runtime.convert(100) + m = te.var('m') + a = te.placeholder((n, m), name = 'a') + b = te.placeholder((n, m), name = 'b') ir = tvm.tir.For( i, 0, n, 0, 0, tvm.tir.For(j, 0, m, 0, 0, diff --git a/tests/python/unittest/test_arith_intset.py b/tests/python/unittest/test_arith_intset.py index dad2fa705b0f6..f248ef082651a 100644 --- a/tests/python/unittest/test_arith_intset.py +++ b/tests/python/unittest/test_arith_intset.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te class IntSetChecker: @@ -52,7 +53,7 @@ def test_vector(): def test_add_sub(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") ck.verify(x + y, {x : tvm.arith.IntervalSet(0, 10)}, (y, 10 + y)) ck.verify(x + y, {x : tvm.arith.IntervalSet(0, 10), y : tvm.arith.IntervalSet(1, 11)}, @@ -63,9 +64,9 @@ def test_add_sub(): def test_mul_div(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv ck.analyzer.update(y, tvm.arith.ConstIntBound(1, 100), override=True) ck.verify(x * y, {x : tvm.arith.IntervalSet(0, 10)}, (0, 10 * y)) ck.verify(x * 2, {x : tvm.arith.IntervalSet(1, 10)}, (2, 20)) @@ -74,35 +75,35 @@ def test_mul_div(): ck.verify(tdiv(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, tdiv(10, y))) ck.verify(tdiv(x, 2), {x : tvm.arith.IntervalSet(1, 10)}, (0, 5)) - fld = tvm.floordiv + fld = tvm.te.floordiv ck.verify(fld(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, fld(10, y))) ck.verify(fld(x, 2), {x : tvm.arith.IntervalSet(-1, 10)}, (-1, 5)) def test_mod(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") - tmod = tvm.truncmod + x, y = te.var("x"), te.var("y") + tmod = tvm.tir.truncmod ck.analyzer.update(y, tvm.arith.ConstIntBound(1, 100), override=True) ck.verify(tmod(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, y - 1)) ck.verify(tmod(x, 10), {x : tvm.arith.IntervalSet(1, 10)}, (0, 9)) - flm = tvm.floormod + flm = tvm.te.floormod ck.verify(flm(x, 10), {x : tvm.arith.IntervalSet(-10, 10)}, (0, 9)) def test_max_min(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") - ck.verify(tvm.max(x, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (1, 11)) - ck.verify(tvm.min(x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 9)) - ck.verify(tvm.min(x, y), {}, (tvm.min(x, y), tvm.min(x, y))) - ck.verify(tvm.max(x, y), {}, (tvm.max(x, y), tvm.max(x, y))) + x, y = te.var("x"), te.var("y") + ck.verify(tvm.te.max(x, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (1, 11)) + ck.verify(tvm.te.min(x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 9)) + ck.verify(tvm.te.min(x, y), {}, (tvm.te.min(x, y), tvm.te.min(x, y))) + ck.verify(tvm.te.max(x, y), {}, (tvm.te.max(x, y), tvm.te.max(x, y))) def test_select(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") ck.verify(tvm.tir.Select(x > 0, x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 11)) diff --git a/tests/python/unittest/test_arith_modular_set.py b/tests/python/unittest/test_arith_modular_set.py index 6bb86e4c4717c..01180d2efb694 100644 --- a/tests/python/unittest/test_arith_modular_set.py +++ b/tests/python/unittest/test_arith_modular_set.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_cast(): analyzer = tvm.arith.Analyzer() - x = tvm.var("x", dtype="int8") + x = te.var("x", dtype="int8") m = analyzer.modular_set((x * 3).astype("uint32")) assert m.coeff == 3 assert m.base == 0 @@ -31,7 +32,7 @@ def test_cast(): def test_add_sub(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x", "int64"), tvm.var("y", "int64") + x, y = te.var("x", "int64"), te.var("y", "int64") m = analyzer.modular_set(x * 6 + y * 4) assert m.coeff == 2 assert m.base == 0 @@ -44,7 +45,7 @@ def test_add_sub(): def test_mul(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") m = analyzer.modular_set((x * 4 + 2) * (y * 6 + 1)) assert m.coeff == 4 assert m.base == 2 @@ -52,9 +53,9 @@ def test_mul(): def test_div_shift(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") # not sure if x is non-negative - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv m = analyzer.modular_set(tdiv(x * 4 + 2, 2)) assert m.coeff == 1 assert m.base == 0 @@ -62,7 +63,7 @@ def test_div_shift(): m = analyzer.modular_set((x * 4 + 2) >> 1) assert m.coeff == 2 assert m.base == 1 - fld = tvm.floordiv + fld = tvm.te.floordiv m = analyzer.modular_set(fld(x * 4 + 2, 2)) assert m.coeff == 2 assert m.base == 1 @@ -75,12 +76,12 @@ def test_div_shift(): def test_min_max_select(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - m = analyzer.modular_set(tvm.min(x * 3, y * 9)) + x, y = te.var("x"), te.var("y") + m = analyzer.modular_set(tvm.te.min(x * 3, y * 9)) assert m.coeff == 3 assert m.base == 0 - m = analyzer.modular_set(tvm.max(x * 3 + 1, y * 9 + 4)) + m = analyzer.modular_set(tvm.te.max(x * 3 + 1, y * 9 + 4)) assert m.coeff == 3 assert m.base == 1 @@ -90,10 +91,10 @@ def test_min_max_select(): def test_mix_index(): - a = tvm.var("a") - b = tvm.var("b") + a = te.var("a") + b = te.var("b") analyzer = tvm.arith.Analyzer() - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv m = analyzer.modular_set(a * 4 + b * 6 + 7) assert m.coeff == 2 assert m.base == 1 @@ -114,16 +115,16 @@ def test_mix_index(): assert m.coeff == 3 assert m.base == 2 - m = analyzer.modular_set(a * 12 + tvm.min(b * 3 * 7, 2)) + m = analyzer.modular_set(a * 12 + tvm.te.min(b * 3 * 7, 2)) assert m.coeff == 1 assert m.base == 0 def test_constraint_scope(): - a = tvm.var("a") - b = tvm.var("b") + a = te.var("a") + b = te.var("b") analyzer = tvm.arith.Analyzer() - tmod = tvm.truncmod + tmod = tvm.tir.truncmod with analyzer.constraint_scope(tmod(b, 4) == 2): m = analyzer.modular_set(b + 1) @@ -142,9 +143,9 @@ def test_constraint_scope(): assert m.base == 0 def test_intersect(): - a = tvm.var("a") + a = te.var("a") analyzer = tvm.arith.Analyzer() - tmod = tvm.truncmod + tmod = tvm.tir.truncmod with analyzer.constraint_scope(tmod(a, 4) == 1): with analyzer.constraint_scope(tmod(a, 3) == 1): m = analyzer.modular_set(a) diff --git a/tests/python/unittest/test_arith_rewrite_simplify.py b/tests/python/unittest/test_arith_rewrite_simplify.py index 84560e8c1f9d6..9f58ef4506fa8 100644 --- a/tests/python/unittest/test_arith_rewrite_simplify.py +++ b/tests/python/unittest/test_arith_rewrite_simplify.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te class RewriteChecker: def __init__(self): @@ -27,7 +28,7 @@ def verify(self, data, expected): def test_vector_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") # Add rules ck.verify(tvm.tir.Ramp(x, 1, 4) + tvm.tir.Ramp(y, 2, 4), tvm.tir.Ramp(x + y, 3, 4)) @@ -56,8 +57,8 @@ def test_vector_simplify(): tvm.tir.Ramp(x * 2, 8, 4)) ## DivMod rules - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # truc div ck.verify(tdiv(y.astype("int32x2"), x.astype("int32x2")), tdiv(y, x).astype("int32x2")) @@ -78,8 +79,8 @@ def test_vector_simplify(): tmod(tvm.tir.Ramp(1, 15, 4), 8)) # floor div - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.analyzer.update(x, tvm.arith.ConstIntBound(-10, 1000), override=True) ck.verify(fld(y.astype("int32x2"), x.astype("int32x2")), fld(y, x).astype("int32x2")) @@ -99,16 +100,16 @@ def test_vector_simplify(): flm(tvm.tir.Ramp(1, 15, 4), 8)) # Min/Max rules - vx = tvm.var("vx", dtype="int32x2") - vc = tvm.var("vc", dtype="uint1") - ck.verify(tvm.min(y.astype("int32x2"), x.astype("int32x2")), - tvm.min(y, x).astype("int32x2")) - ck.verify(tvm.min(tvm.min(vx, y.astype("int32x2")), x.astype("int32x2")), - tvm.min(vx, tvm.min(y, x).astype("int32x2"))) - ck.verify(tvm.max(y.astype("int32x2"), x.astype("int32x2")), - tvm.max(y, x).astype("int32x2")) - ck.verify(tvm.max(tvm.max(vx, y.astype("int32x2")), x.astype("int32x2")), - tvm.max(vx, tvm.max(y, x).astype("int32x2"))) + vx = te.var("vx", dtype="int32x2") + vc = te.var("vc", dtype="uint1") + ck.verify(tvm.te.min(y.astype("int32x2"), x.astype("int32x2")), + tvm.te.min(y, x).astype("int32x2")) + ck.verify(tvm.te.min(tvm.te.min(vx, y.astype("int32x2")), x.astype("int32x2")), + tvm.te.min(vx, tvm.te.min(y, x).astype("int32x2"))) + ck.verify(tvm.te.max(y.astype("int32x2"), x.astype("int32x2")), + tvm.te.max(y, x).astype("int32x2")) + ck.verify(tvm.te.max(tvm.te.max(vx, y.astype("int32x2")), x.astype("int32x2")), + tvm.te.max(vx, tvm.te.max(y, x).astype("int32x2"))) ## Logical rules ck.verify(y.astype("int32x2").equal(x.astype("int32x2")), @@ -131,7 +132,7 @@ def test_vector_simplify(): def test_select_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") # Add rules ck.verify(tvm.tir.Select(x < 0, y, 0) + tvm.tir.Select(x < 0, 1, z), tvm.tir.Select(x < 0, y + 1, z)) @@ -141,10 +142,10 @@ def test_select_simplify(): tvm.tir.Select(x < 0, 0, z - y)) ck.verify(tvm.tir.Select(x < 0, y, z) - z, tvm.tir.Select(x < 0, y - z, 0)) - ck.verify(tvm.min(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), - tvm.tir.Select(x < 0, tvm.min(y, 1), tvm.min(0, z))) - ck.verify(tvm.max(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), - tvm.tir.Select(x < 0, tvm.max(y, 1), tvm.max(0, z))) + ck.verify(tvm.te.min(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), + tvm.tir.Select(x < 0, tvm.te.min(y, 1), tvm.te.min(0, z))) + ck.verify(tvm.te.max(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), + tvm.tir.Select(x < 0, tvm.te.max(y, 1), tvm.te.max(0, z))) ck.verify(tvm.tir.Select(x * 3 + 1 != 0, y, z), y) ck.verify(tvm.tir.Select(x * 3 + 1 == 0, y, z), z) @@ -153,30 +154,30 @@ def test_select_simplify(): def test_add_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(x + (y - x), y) ck.verify(x - (y + 1) + (y + 1), x) ck.verify((x - 10) + (10 - z), x - z) ck.verify((x - y) + (z - x), z - y) - ck.verify(tvm.min(x, y - z) + z, tvm.min(x + z, y)) - ck.verify(tvm.min(x - z, y) + z, tvm.min(x, y + z)) - ck.verify(tvm.max(x, y - 10) + 10, tvm.max(x + 10, y)) - ck.verify(tvm.max(x - 11, y) + 11, tvm.max(x, y + 11)) + ck.verify(tvm.te.min(x, y - z) + z, tvm.te.min(x + z, y)) + ck.verify(tvm.te.min(x - z, y) + z, tvm.te.min(x, y + z)) + ck.verify(tvm.te.max(x, y - 10) + 10, tvm.te.max(x + 10, y)) + ck.verify(tvm.te.max(x - 11, y) + 11, tvm.te.max(x, y + 11)) - ck.verify(tvm.max(x, y * 2) + tvm.min(x, y * 2), x + y * 2); - ck.verify(tvm.min(x, y * 2) + tvm.max(x, y * 2), x + y * 2); + ck.verify(tvm.te.max(x, y * 2) + tvm.te.min(x, y * 2), x + y * 2); + ck.verify(tvm.te.min(x, y * 2) + tvm.te.max(x, y * 2), x + y * 2); - ck.verify(tvm.max(x, y + 2) + (-2), tvm.max(x + (-2), y)); - ck.verify(tvm.min(x, y + 2) + (-2), tvm.min(x + (-2), y)); - ck.verify(tvm.min(x + 2, y + 3) + (-2), tvm.min(x, y + 1)); + ck.verify(tvm.te.max(x, y + 2) + (-2), tvm.te.max(x + (-2), y)); + ck.verify(tvm.te.min(x, y + 2) + (-2), tvm.te.min(x + (-2), y)); + ck.verify(tvm.te.min(x + 2, y + 3) + (-2), tvm.te.min(x, y + 1)); - ck.verify(tvm.max(0, 1 - x * 4) + x * 4, tvm.max(x * 4, 1)) - ck.verify(tvm.max(2 - x * 4, 0) + x * 4, tvm.max(x * 4, 2)) + ck.verify(tvm.te.max(0, 1 - x * 4) + x * 4, tvm.te.max(x * 4, 1)) + ck.verify(tvm.te.max(2 - x * 4, 0) + x * 4, tvm.te.max(x * 4, 2)) - ck.verify(tvm.min(0, 1 - x * 4) + x * 4, tvm.min(x * 4, 1)) - ck.verify(tvm.min(2 - x * 4, 0) + x * 4, tvm.min(x * 4, 2)) + ck.verify(tvm.te.min(0, 1 - x * 4) + x * 4, tvm.te.min(x * 4, 1)) + ck.verify(tvm.te.min(2 - x * 4, 0) + x * 4, tvm.te.min(x * 4, 2)) ck.verify(x * y + x * 10, x * (y + 10)) ck.verify(y * x + x * 10, x * (y + 10)) @@ -189,16 +190,16 @@ def test_add_index_simplify(): ck.verify(x + 2 + 3 + 4 + x * 3, x * 4 + 9); # DivMod rules - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # truc div ck.verify(y * tmod(x, 8) + 10 * tmod(x, 8), tmod(x, 8) * (y + 10)) ck.analyzer.update(x, tvm.arith.ConstIntBound(-1, 1000), override=True) ck.verify(tdiv(x, 8) * 8 + tmod(x, 8), x) # floor div - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.verify(y * flm(x, 8) + 10 * flm(x, 8), flm(x, 8) * (y + 10)) ck.verify(fld(x, 8) * 8 + flm(x, 8), x) @@ -206,22 +207,22 @@ def test_add_index_simplify(): def test_sub_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(x + y - y, x) ck.verify(x + y - x, y) ck.verify(x - (y + x), 0 - y) ck.verify(x - (x + y), 0 - y) - ck.verify(tvm.min(x, y) - x, tvm.min(0, y - x)) - ck.verify(tvm.min(x, y) - y, tvm.min(x - y, 0)) - ck.verify(tvm.max(x, y) - x, tvm.max(0, y - x)) - ck.verify(tvm.max(x, y) - y, tvm.max(x - y, 0)) + ck.verify(tvm.te.min(x, y) - x, tvm.te.min(0, y - x)) + ck.verify(tvm.te.min(x, y) - y, tvm.te.min(x - y, 0)) + ck.verify(tvm.te.max(x, y) - x, tvm.te.max(0, y - x)) + ck.verify(tvm.te.max(x, y) - y, tvm.te.max(x - y, 0)) - ck.verify(x - tvm.min(x, y), tvm.max(0, x - y)) - ck.verify(y - tvm.min(x, y), tvm.max(y - x, 0)) - ck.verify(x - tvm.max(x, y), tvm.min(0, x - y)) - ck.verify(y - tvm.max(x, y), tvm.min(y - x, 0)) + ck.verify(x - tvm.te.min(x, y), tvm.te.max(0, x - y)) + ck.verify(y - tvm.te.min(x, y), tvm.te.max(y - x, 0)) + ck.verify(x - tvm.te.max(x, y), tvm.te.min(0, x - y)) + ck.verify(y - tvm.te.max(x, y), tvm.te.min(y - x, 0)) # mul co-efficient foldng ck.verify(x - x, 0) @@ -238,30 +239,30 @@ def test_sub_index_simplify(): ck.verify((x + y) - (z + x), y - z) ck.verify((y + x) - (z + x), y - z) - ck.verify(tvm.min(x + y, z) - x, tvm.min(y, z - x)) - ck.verify(tvm.min(y + x, z) - x, tvm.min(y, z - x)) - ck.verify(tvm.min(z, x + y) - x, tvm.min(z - x, y)) - ck.verify(tvm.min(z, y + x) - x, tvm.min(z - x, y)) + ck.verify(tvm.te.min(x + y, z) - x, tvm.te.min(y, z - x)) + ck.verify(tvm.te.min(y + x, z) - x, tvm.te.min(y, z - x)) + ck.verify(tvm.te.min(z, x + y) - x, tvm.te.min(z - x, y)) + ck.verify(tvm.te.min(z, y + x) - x, tvm.te.min(z - x, y)) - ck.verify(tvm.max(x + y, z) - x, tvm.max(y, z - x)) - ck.verify(tvm.max(y + x, z) - x, tvm.max(y, z - x)) - ck.verify(tvm.max(z, x + y) - x, tvm.max(z - x, y)) - ck.verify(tvm.max(z, y + x) - x, tvm.max(z - x, y)) + ck.verify(tvm.te.max(x + y, z) - x, tvm.te.max(y, z - x)) + ck.verify(tvm.te.max(y + x, z) - x, tvm.te.max(y, z - x)) + ck.verify(tvm.te.max(z, x + y) - x, tvm.te.max(z - x, y)) + ck.verify(tvm.te.max(z, y + x) - x, tvm.te.max(z - x, y)) - ck.verify(x - tvm.min(x + y, z), tvm.max(0 - y, x - z)) - ck.verify(x - tvm.min(y + x, z), tvm.max(0 - y, x - z)) - ck.verify(x - tvm.min(z, x + y), tvm.max(x - z, 0 - y)) - ck.verify(x - tvm.min(z, y + x), tvm.max(x - z, 0 - y)) + ck.verify(x - tvm.te.min(x + y, z), tvm.te.max(0 - y, x - z)) + ck.verify(x - tvm.te.min(y + x, z), tvm.te.max(0 - y, x - z)) + ck.verify(x - tvm.te.min(z, x + y), tvm.te.max(x - z, 0 - y)) + ck.verify(x - tvm.te.min(z, y + x), tvm.te.max(x - z, 0 - y)) - ck.verify(tvm.min(x, y) - tvm.min(y, x), 0) - ck.verify(tvm.max(x, y) - tvm.max(y, x), 0) - ck.verify(tvm.min(x, y) - tvm.min(x + 10, y + 10), -10) - ck.verify(tvm.min(x + 10, y + 1) - tvm.min(x, y - 9), 10) + ck.verify(tvm.te.min(x, y) - tvm.te.min(y, x), 0) + ck.verify(tvm.te.max(x, y) - tvm.te.max(y, x), 0) + ck.verify(tvm.te.min(x, y) - tvm.te.min(x + 10, y + 10), -10) + ck.verify(tvm.te.min(x + 10, y + 1) - tvm.te.min(x, y - 9), 10) # DivMod patterns # truc div - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True) ck.verify(x - tdiv(x, 3) * 3, tmod(x, 3)) @@ -289,8 +290,8 @@ def test_sub_index_simplify(): ck.verify(tdiv(y - z, 3) * 6 - 2 * y, (0 - tmod(y - z, 3) - z) * 2) # floor div - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), override=True) ck.analyzer.update(y, tvm.arith.ConstIntBound(-1000, 1000), override=True) ck.verify(x - fld(x, 3) * 3, flm(x, 3)) @@ -318,19 +319,19 @@ def test_sub_index_simplify(): def test_mul_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify((x + 2) * 3, x * 3 + 6) ck.verify((x * 2) * 3, x * 6) - ck.verify(tvm.min(x, y) * tvm.max(x, y), x * y) - ck.verify(tvm.max(x, y) * tvm.min(x, y), x * y) + ck.verify(tvm.te.min(x, y) * tvm.te.max(x, y), x * y) + ck.verify(tvm.te.max(x, y) * tvm.te.min(x, y), x * y) ck.verify((x - y) * (-2), (y - x) * 2) def test_div_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.verify(tdiv(x, x), 1) ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True) @@ -343,12 +344,12 @@ def test_div_index_simplify(): ck.verify(tdiv(x * 4, 2), x * 2) ck.verify(tdiv(x * 4 + y, 2), x * 2 + tdiv(y, 2)) - ck.verify(tdiv(tvm.min(x * 6, y), 2), tvm.min(x * 3, tdiv(y, 2))) - ck.verify(tdiv(tvm.max(x * 6, y), 2), tvm.max(x * 3, tdiv(y, 2))) + ck.verify(tdiv(tvm.te.min(x * 6, y), 2), tvm.te.min(x * 3, tdiv(y, 2))) + ck.verify(tdiv(tvm.te.max(x * 6, y), 2), tvm.te.max(x * 3, tdiv(y, 2))) ck.verify(tdiv(y + x * 4, 2), tdiv(y, 2) + x * 2) - ck.verify(tdiv(tvm.min(y, x * 6), 2), tvm.min(tdiv(y, 2), x * 3)) - ck.verify(tdiv(tvm.max(y, x * 6), 2), tvm.max(tdiv(y, 2), x * 3)) + ck.verify(tdiv(tvm.te.min(y, x * 6), 2), tvm.te.min(tdiv(y, 2), x * 3)) + ck.verify(tdiv(tvm.te.max(y, x * 6), 2), tvm.te.max(tdiv(y, 2), x * 3)) # 3-operands ck.verify(tdiv(x * 6 + y + z, 2), x * 3 + tdiv(y + z, 2)) @@ -375,9 +376,9 @@ def test_div_index_simplify(): def test_floordiv_index_simplify(): # short name for floordiv - fld = tvm.floordiv + fld = tvm.te.floordiv ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(fld(fld(x, 2), 3), fld(x, 6)) ck.verify(fld(fld(x, 2) + 1, 3), fld(x + 2, 6)) @@ -386,12 +387,12 @@ def test_floordiv_index_simplify(): ck.verify(fld(x * 4, 2), x * 2) ck.verify(fld(x * 4 + y, 2), x * 2 + fld(y, 2)) - ck.verify(fld(tvm.min(x * 6, y), 2), tvm.min(x * 3, fld(y, 2))) - ck.verify(fld(tvm.max(x * 6, y), 2), tvm.max(x * 3, fld(y, 2))) + ck.verify(fld(tvm.te.min(x * 6, y), 2), tvm.te.min(x * 3, fld(y, 2))) + ck.verify(fld(tvm.te.max(x * 6, y), 2), tvm.te.max(x * 3, fld(y, 2))) ck.verify(fld(y + x * 4, 2), fld(y, 2) + x * 2) - ck.verify(fld(tvm.min(y, x * 6), 2), tvm.min(fld(y, 2), x * 3)) - ck.verify(fld(tvm.max(y, x * 6), 2), tvm.max(fld(y, 2), x * 3)) + ck.verify(fld(tvm.te.min(y, x * 6), 2), tvm.te.min(fld(y, 2), x * 3)) + ck.verify(fld(tvm.te.max(y, x * 6), 2), tvm.te.max(fld(y, 2), x * 3)) # 3-operands ck.verify(fld(x * 6 + y + z, 2), x * 3 + fld(y + z, 2)) @@ -420,13 +421,13 @@ def test_floordiv_index_simplify(): def test_mod_index_simplify(): ck = RewriteChecker() - x, y, nx, ny, z = tvm.var("x"), tvm.var("y"), tvm.var("nx"), tvm.var("ny"), tvm.var("z") + x, y, nx, ny, z = te.var("x"), te.var("y"), te.var("nx"), te.var("ny"), te.var("z") ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True) ck.analyzer.update(y, tvm.arith.ConstIntBound(0, 1000), override=True) ck.analyzer.update(nx, tvm.arith.ConstIntBound(-1000, 0), override=True) ck.analyzer.update(ny, tvm.arith.ConstIntBound(-1000, 0), override=True) - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.verify(tmod(x * 10, 2), 0) ck.verify(tmod(x * 10 + y, 2), tmod(y, 2)) @@ -456,11 +457,11 @@ def test_mod_index_simplify(): def test_floormod_index_simplify(): # short name for floordiv - flm = tvm.floormod + flm = tvm.te.floormod ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck = RewriteChecker() - x, y, nx, ny, z = tvm.var("x"), tvm.var("y"), tvm.var("nx"), tvm.var("ny"), tvm.var("z") + x, y, nx, ny, z = te.var("x"), te.var("y"), te.var("nx"), te.var("ny"), te.var("z") ck.verify(flm(x * 10, 2), 0) ck.verify(flm(x * 10 + y, 2), flm(y, 2)) @@ -475,172 +476,172 @@ def test_floormod_index_simplify(): def test_min_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - fld = tvm.floordiv - flm = tvm.floormod - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + fld = tvm.te.floordiv + flm = tvm.te.floormod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # const int bound - ck.verify(tvm.min(tmod(x, 2), tmod(y, 2) + 10), tmod(x, 2)) - ck.verify(tvm.min(flm(x, 2), flm(y, 2) + 10), flm(x, 2)) - - ck.verify(tvm.min(x + 1, x + 10), x + 1) - ck.verify(tvm.min(x + 111, x + 10), x + 10) - ck.verify(tvm.min(x + 1, x), x) - ck.verify(tvm.min(x, x + 2), x) - ck.verify(tvm.min(1 - x, 2 - x), 1 - x) - ck.verify(tvm.min(3 - x, 2 - x), 2 - x) - - ck.verify(tvm.min(tvm.max(x, y), tvm.min(x, y)), tvm.min(x, y)) - ck.verify(tvm.min(tvm.max(x, y), tvm.min(y, x)), tvm.min(x, y)) - - ck.verify(tvm.min(tvm.max(x, y), x), x) - ck.verify(tvm.min(tvm.max(y, x), x), x) - ck.verify(tvm.min(tvm.min(x, y), x), tvm.min(x, y)) - ck.verify(tvm.min(tvm.min(x, y), y), tvm.min(x, y)) - - ck.verify(tvm.min(x, tvm.max(x, y)), x) - ck.verify(tvm.min(x, tvm.max(y, x)), x) - ck.verify(tvm.min(x, tvm.min(x, y)), tvm.min(x, y)) - ck.verify(tvm.min(y, tvm.min(x, y)), tvm.min(x, y)) - - ck.verify(tvm.min(tvm.min(tvm.min(x, y), z), y), - tvm.min(tvm.min(x, y), z)) - ck.verify(tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), y), - tvm.min(tvm.min(tvm.min(x, y), z), x * 2)) - ck.verify(tvm.min(tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), z * 2), y), - tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), z * 2)) - - ck.verify(tvm.min(tvm.max(x, y), tvm.max(x, z)), tvm.max(tvm.min(y, z), x)) - ck.verify(tvm.min(tvm.max(x, y), tvm.max(z, x)), tvm.max(tvm.min(y, z), x)) - ck.verify(tvm.min(tvm.max(y, x), tvm.max(x, z)), tvm.max(tvm.min(y, z), x)) - ck.verify(tvm.min(tvm.max(y, x), tvm.max(z, x)), tvm.max(tvm.min(y, z), x)) - - ck.verify(tvm.min(y + x, z + x), tvm.min(y, z) + x) - ck.verify(tvm.min(y + x, x + z), tvm.min(y, z) + x) - ck.verify(tvm.min(x + y, z + x), tvm.min(y, z) + x) - ck.verify(tvm.min(x + y, x + z), tvm.min(y, z) + x) - - ck.verify(tvm.min(x - y, x - z), x - tvm.max(y, z)) - ck.verify(tvm.min(y - x, z - x), tvm.min(y, z) - x) - - ck.verify(tvm.min(tvm.min(x, 1), 10), tvm.min(x, 1)) - ck.verify(tvm.min(tvm.min(x, 11), 10), tvm.min(x, 10)) - - ck.verify(tvm.min(x * 3, 9), tvm.min(x, 3) * 3) - ck.verify(tvm.min(3 - x, 2), 3 - tvm.max(x, 1)) + ck.verify(tvm.te.min(tmod(x, 2), tmod(y, 2) + 10), tmod(x, 2)) + ck.verify(tvm.te.min(flm(x, 2), flm(y, 2) + 10), flm(x, 2)) + + ck.verify(tvm.te.min(x + 1, x + 10), x + 1) + ck.verify(tvm.te.min(x + 111, x + 10), x + 10) + ck.verify(tvm.te.min(x + 1, x), x) + ck.verify(tvm.te.min(x, x + 2), x) + ck.verify(tvm.te.min(1 - x, 2 - x), 1 - x) + ck.verify(tvm.te.min(3 - x, 2 - x), 2 - x) + + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.min(x, y)), tvm.te.min(x, y)) + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.min(y, x)), tvm.te.min(x, y)) + + ck.verify(tvm.te.min(tvm.te.max(x, y), x), x) + ck.verify(tvm.te.min(tvm.te.max(y, x), x), x) + ck.verify(tvm.te.min(tvm.te.min(x, y), x), tvm.te.min(x, y)) + ck.verify(tvm.te.min(tvm.te.min(x, y), y), tvm.te.min(x, y)) + + ck.verify(tvm.te.min(x, tvm.te.max(x, y)), x) + ck.verify(tvm.te.min(x, tvm.te.max(y, x)), x) + ck.verify(tvm.te.min(x, tvm.te.min(x, y)), tvm.te.min(x, y)) + ck.verify(tvm.te.min(y, tvm.te.min(x, y)), tvm.te.min(x, y)) + + ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), y), + tvm.te.min(tvm.te.min(x, y), z)) + ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), y), + tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2)) + ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), z * 2), y), + tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), z * 2)) + + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.max(x, z)), tvm.te.max(tvm.te.min(y, z), x)) + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.max(z, x)), tvm.te.max(tvm.te.min(y, z), x)) + ck.verify(tvm.te.min(tvm.te.max(y, x), tvm.te.max(x, z)), tvm.te.max(tvm.te.min(y, z), x)) + ck.verify(tvm.te.min(tvm.te.max(y, x), tvm.te.max(z, x)), tvm.te.max(tvm.te.min(y, z), x)) + + ck.verify(tvm.te.min(y + x, z + x), tvm.te.min(y, z) + x) + ck.verify(tvm.te.min(y + x, x + z), tvm.te.min(y, z) + x) + ck.verify(tvm.te.min(x + y, z + x), tvm.te.min(y, z) + x) + ck.verify(tvm.te.min(x + y, x + z), tvm.te.min(y, z) + x) + + ck.verify(tvm.te.min(x - y, x - z), x - tvm.te.max(y, z)) + ck.verify(tvm.te.min(y - x, z - x), tvm.te.min(y, z) - x) + + ck.verify(tvm.te.min(tvm.te.min(x, 1), 10), tvm.te.min(x, 1)) + ck.verify(tvm.te.min(tvm.te.min(x, 11), 10), tvm.te.min(x, 10)) + + ck.verify(tvm.te.min(x * 3, 9), tvm.te.min(x, 3) * 3) + ck.verify(tvm.te.min(3 - x, 2), 3 - tvm.te.max(x, 1)) # DivMod rules # truc div ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000)) - ck.verify(tvm.min(tdiv(x + 3, 4) * 4, x), x) - ck.verify(tvm.min(tdiv(x + 3, 4) * 4, tvm.max(x, 4)), tvm.max(x, 4)) - ck.verify(tvm.min(x, tdiv(x + 3, 4) * 4), x) - ck.verify(tvm.min(tvm.max(x, 4), tdiv(x + 3, 4) * 4), tvm.max(x, 4)) + ck.verify(tvm.te.min(tdiv(x + 3, 4) * 4, x), x) + ck.verify(tvm.te.min(tdiv(x + 3, 4) * 4, tvm.te.max(x, 4)), tvm.te.max(x, 4)) + ck.verify(tvm.te.min(x, tdiv(x + 3, 4) * 4), x) + ck.verify(tvm.te.min(tvm.te.max(x, 4), tdiv(x + 3, 4) * 4), tvm.te.max(x, 4)) ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), True) - ck.verify(tvm.min(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.min(x, y), 10)) - ck.verify(tvm.min(tdiv(x, (-10)), tdiv(y, (-10))), - tdiv(tvm.max(x, y), (-10))) + ck.verify(tvm.te.min(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.te.min(x, y), 10)) + ck.verify(tvm.te.min(tdiv(x, (-10)), tdiv(y, (-10))), + tdiv(tvm.te.max(x, y), (-10))) # floor div ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), True) - ck.verify(tvm.min(fld(x + 3, 4) * 4, x), x) - ck.verify(tvm.min(fld(x + 3, 4) * 4, tvm.max(x, 4)), tvm.max(x, 4)) - ck.verify(tvm.min(x, fld(x + 3, 4) * 4), x) - ck.verify(tvm.min(x, fld(x, 4) * 4), fld(x, 4) * 4) - ck.verify(tvm.min(tvm.max(x, 4), fld(x + 3, 4) * 4), tvm.max(x, 4)) - ck.verify(tvm.min(fld(x, 10), fld(y, 10)), fld(tvm.min(x, y), 10)) - ck.verify(tvm.min(fld(x, (-10)), fld(y, (-10))), fld(tvm.max(x, y), (-10))) + ck.verify(tvm.te.min(fld(x + 3, 4) * 4, x), x) + ck.verify(tvm.te.min(fld(x + 3, 4) * 4, tvm.te.max(x, 4)), tvm.te.max(x, 4)) + ck.verify(tvm.te.min(x, fld(x + 3, 4) * 4), x) + ck.verify(tvm.te.min(x, fld(x, 4) * 4), fld(x, 4) * 4) + ck.verify(tvm.te.min(tvm.te.max(x, 4), fld(x + 3, 4) * 4), tvm.te.max(x, 4)) + ck.verify(tvm.te.min(fld(x, 10), fld(y, 10)), fld(tvm.te.min(x, y), 10)) + ck.verify(tvm.te.min(fld(x, (-10)), fld(y, (-10))), fld(tvm.te.max(x, y), (-10))) def test_max_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - flm = tvm.floormod - fld = tvm.floordiv - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + flm = tvm.te.floormod + fld = tvm.te.floordiv + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # const int bound - ck.verify(tvm.max(tmod(x, 2), tmod(y, 2) + 10), tmod(y, 2) + 10) - ck.verify(tvm.max(flm(x, 2), flm(y, 2) + 10), flm(y, 2) + 10) - - ck.verify(tvm.max(x + 1, x + 10), x + 10) - ck.verify(tvm.max(x + 111, x + 10), x + 111) - ck.verify(tvm.max(x + 1, x), x + 1) - ck.verify(tvm.max(x, x + 2), x + 2) - ck.verify(tvm.max(1 - x, 2 - x), 2 - x) - ck.verify(tvm.max(3 - x, 2 - x), 3 - x) - - ck.verify(tvm.max(tvm.min(x, y), tvm.max(x, y)), tvm.max(x, y)) - ck.verify(tvm.max(tvm.min(x, y), tvm.max(y, x)), tvm.max(x, y)) - - ck.verify(tvm.max(tvm.min(x, y), x), x) - ck.verify(tvm.max(tvm.min(y, x), x), x) - ck.verify(tvm.max(tvm.max(x, y), x), tvm.max(x, y)) - ck.verify(tvm.max(tvm.max(x, y), y), tvm.max(x, y)) - - ck.verify(tvm.max(x, tvm.min(x, y)), x) - ck.verify(tvm.max(x, tvm.min(y, x)), x) - ck.verify(tvm.max(x, tvm.max(x, y)), tvm.max(x, y)) - ck.verify(tvm.max(y, tvm.max(x, y)), tvm.max(x, y)) - - ck.verify(tvm.max(tvm.max(tvm.max(x, y), z), y), - tvm.max(tvm.max(x, y), z)) - ck.verify(tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), y), - tvm.max(tvm.max(tvm.max(x, y), z), x * 2)) - ck.verify(tvm.max(tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), z * 2), y), - tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), z * 2)) - - ck.verify(tvm.max(tvm.min(x, y), tvm.min(x, z)), tvm.min(tvm.max(y, z), x)) - ck.verify(tvm.max(tvm.min(x, y), tvm.min(z, x)), tvm.min(tvm.max(y, z), x)) - ck.verify(tvm.max(tvm.min(y, x), tvm.min(x, z)), tvm.min(tvm.max(y, z), x)) - ck.verify(tvm.max(tvm.min(y, x), tvm.min(z, x)), tvm.min(tvm.max(y, z), x)) - - ck.verify(tvm.max(y + x, z + x), tvm.max(y, z) + x) - ck.verify(tvm.max(y + x, x + z), tvm.max(y, z) + x) - ck.verify(tvm.max(x + y, z + x), tvm.max(y, z) + x) - ck.verify(tvm.max(x + y, x + z), tvm.max(y, z) + x) - - ck.verify(tvm.max(x - y, x - z), x - tvm.min(y, z)) - ck.verify(tvm.max(y - x, z - x), tvm.max(y, z) - x) - - ck.verify(tvm.max(tvm.max(x, 1), 10), tvm.max(x, 10)) - ck.verify(tvm.max(tvm.max(x, 11), 10), tvm.max(x, 11)) - - ck.verify(tvm.max(x * 3, 9), tvm.max(x, 3) * 3) - ck.verify(tvm.max(3 - x, 1), 3 - tvm.min(x, 2)) + ck.verify(tvm.te.max(tmod(x, 2), tmod(y, 2) + 10), tmod(y, 2) + 10) + ck.verify(tvm.te.max(flm(x, 2), flm(y, 2) + 10), flm(y, 2) + 10) + + ck.verify(tvm.te.max(x + 1, x + 10), x + 10) + ck.verify(tvm.te.max(x + 111, x + 10), x + 111) + ck.verify(tvm.te.max(x + 1, x), x + 1) + ck.verify(tvm.te.max(x, x + 2), x + 2) + ck.verify(tvm.te.max(1 - x, 2 - x), 2 - x) + ck.verify(tvm.te.max(3 - x, 2 - x), 3 - x) + + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.max(x, y)), tvm.te.max(x, y)) + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.max(y, x)), tvm.te.max(x, y)) + + ck.verify(tvm.te.max(tvm.te.min(x, y), x), x) + ck.verify(tvm.te.max(tvm.te.min(y, x), x), x) + ck.verify(tvm.te.max(tvm.te.max(x, y), x), tvm.te.max(x, y)) + ck.verify(tvm.te.max(tvm.te.max(x, y), y), tvm.te.max(x, y)) + + ck.verify(tvm.te.max(x, tvm.te.min(x, y)), x) + ck.verify(tvm.te.max(x, tvm.te.min(y, x)), x) + ck.verify(tvm.te.max(x, tvm.te.max(x, y)), tvm.te.max(x, y)) + ck.verify(tvm.te.max(y, tvm.te.max(x, y)), tvm.te.max(x, y)) + + ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), y), + tvm.te.max(tvm.te.max(x, y), z)) + ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), y), + tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2)) + ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), z * 2), y), + tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), z * 2)) + + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.min(x, z)), tvm.te.min(tvm.te.max(y, z), x)) + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.min(z, x)), tvm.te.min(tvm.te.max(y, z), x)) + ck.verify(tvm.te.max(tvm.te.min(y, x), tvm.te.min(x, z)), tvm.te.min(tvm.te.max(y, z), x)) + ck.verify(tvm.te.max(tvm.te.min(y, x), tvm.te.min(z, x)), tvm.te.min(tvm.te.max(y, z), x)) + + ck.verify(tvm.te.max(y + x, z + x), tvm.te.max(y, z) + x) + ck.verify(tvm.te.max(y + x, x + z), tvm.te.max(y, z) + x) + ck.verify(tvm.te.max(x + y, z + x), tvm.te.max(y, z) + x) + ck.verify(tvm.te.max(x + y, x + z), tvm.te.max(y, z) + x) + + ck.verify(tvm.te.max(x - y, x - z), x - tvm.te.min(y, z)) + ck.verify(tvm.te.max(y - x, z - x), tvm.te.max(y, z) - x) + + ck.verify(tvm.te.max(tvm.te.max(x, 1), 10), tvm.te.max(x, 10)) + ck.verify(tvm.te.max(tvm.te.max(x, 11), 10), tvm.te.max(x, 11)) + + ck.verify(tvm.te.max(x * 3, 9), tvm.te.max(x, 3) * 3) + ck.verify(tvm.te.max(3 - x, 1), 3 - tvm.te.min(x, 2)) # DivMod rules # truc div - ck.verify(tvm.max(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.max(x, y), 10)) - ck.verify(tvm.max(tdiv(x, (-10)), tdiv(y, (-10))), tdiv(tvm.min(x, y), (-10))) - ck.verify(tvm.max(tdiv(x + 3, 4) * 4, x), tdiv(x + 3, 4) * 4) + ck.verify(tvm.te.max(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.te.max(x, y), 10)) + ck.verify(tvm.te.max(tdiv(x, (-10)), tdiv(y, (-10))), tdiv(tvm.te.min(x, y), (-10))) + ck.verify(tvm.te.max(tdiv(x + 3, 4) * 4, x), tdiv(x + 3, 4) * 4) # floordiv - ck.verify(tvm.max(fld(x, 10), fld(y, 10)), fld(tvm.max(x, y), 10)) - ck.verify(tvm.max(fld(x, (-10)), fld(y, (-10))), fld(tvm.min(x, y), (-10))) - ck.verify(tvm.max(fld(x + 3, 4) * 4, x), fld(x + 3, 4) * 4) - ck.verify(tvm.max(fld(x, 4) * 4, x), x) - ck.verify(tvm.max(x, fld(x, 4) * 4), x) + ck.verify(tvm.te.max(fld(x, 10), fld(y, 10)), fld(tvm.te.max(x, y), 10)) + ck.verify(tvm.te.max(fld(x, (-10)), fld(y, (-10))), fld(tvm.te.min(x, y), (-10))) + ck.verify(tvm.te.max(fld(x + 3, 4) * 4, x), fld(x + 3, 4) * 4) + ck.verify(tvm.te.max(fld(x, 4) * 4, x), x) + ck.verify(tvm.te.max(x, fld(x, 4) * 4), x) def test_cmp_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - flm = tvm.floormod - fld = tvm.floordiv - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + flm = tvm.te.floormod + fld = tvm.te.floordiv + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # const int bound - ck.verify((tmod(x, 2) + 10).equal(0), tvm.const(0, "bool")) - ck.verify(tvm.tir.NE(tmod(x, 2) + 10, 0), tvm.const(1, "bool")) - ck.verify(tmod(x, 2) + 10 > 1, tvm.const(1, "bool")) - ck.verify(tmod(x, 2) + 10 <= 1, tvm.const(0, "bool")) - ck.verify(flm(x, 2) + 2 > 1, tvm.const(1, "bool")) - ck.verify(flm(x, 2) + 10 <= 1, tvm.const(0, "bool")) + ck.verify((tmod(x, 2) + 10).equal(0), tvm.tir.const(0, "bool")) + ck.verify(tvm.tir.NE(tmod(x, 2) + 10, 0), tvm.tir.const(1, "bool")) + ck.verify(tmod(x, 2) + 10 > 1, tvm.tir.const(1, "bool")) + ck.verify(tmod(x, 2) + 10 <= 1, tvm.tir.const(0, "bool")) + ck.verify(flm(x, 2) + 2 > 1, tvm.tir.const(1, "bool")) + ck.verify(flm(x, 2) + 10 <= 1, tvm.tir.const(0, "bool")) - ck.verify(x * 3 + 10 == 0, tvm.const(0, "bool")) - ck.verify(x * 3 + 10 != 0, tvm.const(1, "bool")) + ck.verify(x * 3 + 10 == 0, tvm.tir.const(0, "bool")) + ck.verify(x * 3 + 10 != 0, tvm.tir.const(1, "bool")) # canonicalization ck.verify((x - 10).equal(0), x.equal(10)) @@ -750,88 +751,88 @@ def test_cmp_simplify(): ck.verify(fld(x + 2, 4) * 4 >= x - y, tvm.tir.LE(flm(x + 2, 4) + (-2), y)) # End DivMod Rules - ck.verify(tvm.min(x, 11) < 10, x < 10) - ck.verify(tvm.min(x, 8) < 10, tvm.const(1, "bool")) - ck.verify(tvm.max(8, x) > 10, tvm.tir.LT(10, x)) - ck.verify(x + 1 < tvm.max(8, x), x < 7) + ck.verify(tvm.te.min(x, 11) < 10, x < 10) + ck.verify(tvm.te.min(x, 8) < 10, tvm.tir.const(1, "bool")) + ck.verify(tvm.te.max(8, x) > 10, tvm.tir.LT(10, x)) + ck.verify(x + 1 < tvm.te.max(8, x), x < 7) ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 10), override=True) ck.analyzer.update(y, tvm.arith.ConstIntBound(-10, 0), override=True) ck.analyzer.update(z, tvm.arith.ConstIntBound(-5, 5), override=True) - ck.verify(x < 11, tvm.const(1, "bool")) - ck.verify(x <= 10, tvm.const(1, "bool")) - ck.verify(z <= 5, tvm.const(1, "bool")) - ck.verify(x + y <= 10, tvm.const(1, "bool")) - ck.verify(x + y >= -10, tvm.const(1, "bool")) - ck.verify(z - 5 <= y + 10, tvm.const(1, "bool")) - ck.verify(tvm.all(x > -1, z <= x + 5), tvm.const(1, "bool")) - ck.verify(x*y <= 0, tvm.const(1, "bool")) - ck.verify((x + 1)*(y - 1) < 0, tvm.const(1, "bool")) - ck.verify(y*y >= 0, tvm.const(1, "bool")) - ck.verify(x*6 <= -3, tvm.const(0, "bool")) + ck.verify(x < 11, tvm.tir.const(1, "bool")) + ck.verify(x <= 10, tvm.tir.const(1, "bool")) + ck.verify(z <= 5, tvm.tir.const(1, "bool")) + ck.verify(x + y <= 10, tvm.tir.const(1, "bool")) + ck.verify(x + y >= -10, tvm.tir.const(1, "bool")) + ck.verify(z - 5 <= y + 10, tvm.tir.const(1, "bool")) + ck.verify(tvm.tir.all(x > -1, z <= x + 5), tvm.tir.const(1, "bool")) + ck.verify(x*y <= 0, tvm.tir.const(1, "bool")) + ck.verify((x + 1)*(y - 1) < 0, tvm.tir.const(1, "bool")) + ck.verify(y*y >= 0, tvm.tir.const(1, "bool")) + ck.verify(x*6 <= -3, tvm.tir.const(0, "bool")) ck.verify(tmod(y - 1, 3) == 0, tmod(y + (-1), 3) == 0) def test_logical_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(tvm.tir.And(tvm.tir.EQ(x, y), tvm.tir.NE(x, y)), - tvm.const(False, "bool")) + tvm.tir.const(False, "bool")) ck.verify(tvm.tir.And(tvm.tir.NE(x, y), tvm.tir.EQ(x, y)), - tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x > 1, tvm.tir.Not(x > 1)), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x <= y, y < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(y < x, x <= y), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x < 1, 0 < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x < 0, 1 < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x < 1, 1 <= x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x <= 1, 1 < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(1 <= x, x < 1), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(1 < x, x <= 1), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x <= 1, 2 <= x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(2 <= x, x <= 1), tvm.const(False, "bool")) + tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x > 1, tvm.tir.Not(x > 1)), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x <= y, y < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(y < x, x <= y), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x < 1, 0 < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x < 0, 1 < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x < 1, 1 <= x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x <= 1, 1 < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(1 <= x, x < 1), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(1 < x, x <= 1), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x <= 1, 2 <= x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(2 <= x, x <= 1), tvm.tir.const(False, "bool")) ck.verify(tvm.tir.And(x == 1, x != 2), x == 1) ck.verify(tvm.tir.Or(tvm.tir.EQ(x, y), tvm.tir.NE(x, y)), - tvm.const(True, "bool")) + tvm.tir.const(True, "bool")) ck.verify(tvm.tir.Or(tvm.tir.NE(x, y), tvm.tir.EQ(x, y)), - tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(x > y, tvm.tir.Not(x > y)), tvm.const(True, "bool")) + tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(x > y, tvm.tir.Not(x > y)), tvm.tir.const(True, "bool")) - ck.verify(tvm.tir.Or(x <= y, y < x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(y < x, y >= x), tvm.const(True, "bool")) + ck.verify(tvm.tir.Or(x <= y, y < x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(y < x, y >= x), tvm.tir.const(True, "bool")) - ck.verify(tvm.tir.Or(x < 1, 0 < x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(0 < x, x < 1), tvm.const(True, "bool")) + ck.verify(tvm.tir.Or(x < 1, 0 < x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(0 < x, x < 1), tvm.tir.const(True, "bool")) - ck.verify(tvm.tir.Or(x < 1, 1 <= x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(x <= 1, 1 < x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(1 <= x, x < 1), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(1 < x, x <= 1), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(x <= 1, 2 <= x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(2 <= x, x <= 1), tvm.const(True, "bool")) + ck.verify(tvm.tir.Or(x < 1, 1 <= x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(x <= 1, 1 < x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(1 <= x, x < 1), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(1 < x, x <= 1), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(x <= 1, 2 <= x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(2 <= x, x <= 1), tvm.tir.const(True, "bool")) ck.verify(tvm.tir.Or(x != 1, x == 2), x != 1) def test_let_simplify(): ck = RewriteChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") z = tvm.tir.Let(x, 1, x + 1) ck.verify(z + z, 4) def test_cast_simplify(): ck = RewriteChecker() - x = tvm.var("x") + x = te.var("x") dtypes = ["float32", "float16", "int32", "int8", "bool"] for dtype1 in dtypes: - ck.verify(tvm.tir.Cast(dtype1, x - x), tvm.const(0, dtype1)) - ck.verify(tvm.tir.Cast(dtype1, x == x), tvm.const(1, dtype1)) + ck.verify(tvm.tir.Cast(dtype1, x - x), tvm.tir.const(0, dtype1)) + ck.verify(tvm.tir.Cast(dtype1, x == x), tvm.tir.const(1, dtype1)) for dtype2 in dtypes: for i in [0, 1, 2, 3]: - ck.verify(tvm.tir.Cast(dtype1, tvm.const(i, dtype2)), tvm.const(i, dtype1)) + ck.verify(tvm.tir.Cast(dtype1, tvm.tir.const(i, dtype2)), tvm.tir.const(i, dtype1)) if __name__ == "__main__": test_floordiv_index_simplify() diff --git a/tests/python/unittest/test_arith_stmt_simplify.py b/tests/python/unittest/test_arith_stmt_simplify.py index 58b60836539f1..12a60db855f73 100644 --- a/tests/python/unittest/test_arith_stmt_simplify.py +++ b/tests/python/unittest/test_arith_stmt_simplify.py @@ -15,12 +15,13 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_stmt_simplify(): ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") - n = tvm.size_var("n") + n = te.size_var("n") with ib.for_range(0, n, name="i") as i: with ib.if_scope(i < 12): A[i] = C[i] @@ -34,9 +35,9 @@ def test_thread_extent_simplify(): ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") - n = tvm.size_var("n") - tx = tvm.thread_axis("threadIdx.x") - ty = tvm.thread_axis("threadIdx.y") + n = te.size_var("n") + tx = te.thread_axis("threadIdx.x") + ty = te.thread_axis("threadIdx.y") ib.scope_attr(tx, "thread_extent", n) ib.scope_attr(tx, "thread_extent", n) ib.scope_attr(ty, "thread_extent", 1) @@ -48,17 +49,17 @@ def test_thread_extent_simplify(): def test_basic_likely_elimination(): - n = tvm.size_var('n') - X = tvm.placeholder(shape=(n,), name="x") - W = tvm.placeholder(shape=(n + 1,), dtype="int32", name="w") + n = te.size_var('n') + X = te.placeholder(shape=(n,), name="x") + W = te.placeholder(shape=(n + 1,), dtype="int32", name="w") def f(i): start = W[i] extent = W[i+1] - W[i] - rv = tvm.reduce_axis((0, extent)) - return tvm.sum(X[rv + start], axis=rv) - Y = tvm.compute(X.shape, f, name="y") - s = tvm.create_schedule([Y.op]) + rv = te.reduce_axis((0, extent)) + return te.sum(X[rv + start], axis=rv) + Y = te.compute(X.shape, f, name="y") + s = te.create_schedule([Y.op]) stmt = tvm.lower(s, [X, W, Y], simple_mode=True) assert('if' not in str(stmt)) @@ -68,10 +69,10 @@ def cumsum(X): Y[i] = sum(X[:i]) """ (m, ) = X.shape - s_state = tvm.placeholder((m + 1, ), dtype="int32", name="state") - s_init = tvm.compute((1, ), lambda _: tvm.const(0, "int32")) - s_update = tvm.compute((m + 1, ), lambda l: s_state[l - 1] + X[l - 1]) - return tvm.scan(s_init, s_update, s_state, inputs=[X], name="cumsum") + s_state = te.placeholder((m + 1, ), dtype="int32", name="state") + s_init = te.compute((1, ), lambda _: tvm.tir.const(0, "int32")) + s_update = te.compute((m + 1, ), lambda l: s_state[l - 1] + X[l - 1]) + return tvm.te.scan(s_init, s_update, s_state, inputs=[X], name="cumsum") def sparse_lengths_sum(data, indices, lengths): oshape = list(data.shape) @@ -79,21 +80,21 @@ def sparse_lengths_sum(data, indices, lengths): length_offsets = cumsum(lengths) def sls(n, d): - gg = tvm.reduce_axis((0, lengths[n])) + gg = te.reduce_axis((0, lengths[n])) indices_idx = length_offsets[n] + gg data_idx = indices[indices_idx] data_val = data[data_idx, d] - return tvm.sum(data_val, axis=gg) + return te.sum(data_val, axis=gg) - return tvm.compute(oshape, sls) + return te.compute(oshape, sls) - m, n, d, i, l = tvm.size_var('m'), tvm.size_var('n'), tvm.size_var('d'),\ - tvm.size_var('i'), tvm.size_var('l') - data_ph = tvm.placeholder((m, d * 32), name="data") - indices_ph = tvm.placeholder((i,), name="indices", dtype="int32") - lengths_ph = tvm.placeholder((n,), name="lengths", dtype="int32") + m, n, d, i, l = te.size_var('m'), te.size_var('n'), te.size_var('d'),\ + te.size_var('i'), te.size_var('l') + data_ph = te.placeholder((m, d * 32), name="data") + indices_ph = te.placeholder((i,), name="indices", dtype="int32") + lengths_ph = te.placeholder((n,), name="lengths", dtype="int32") Y = sparse_lengths_sum(data_ph, indices_ph, lengths_ph) - s = tvm.create_schedule([Y.op]) + s = te.create_schedule([Y.op]) (n, d) = s[Y].op.axis (do, di) = s[Y].split(d, factor=32) (gg,) = s[Y].op.reduce_axis diff --git a/tests/python/unittest/test_autotvm_common.py b/tests/python/unittest/test_autotvm_common.py index 83bbd5492619f..a2f9b1da42aed 100644 --- a/tests/python/unittest/test_autotvm_common.py +++ b/tests/python/unittest/test_autotvm_common.py @@ -20,6 +20,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm import MeasureInput, MeasureResult from tvm.autotvm.measure.measure import Runner @@ -38,12 +39,12 @@ def get_build_kwargs(self): @autotvm.register_customized_task("testing/matmul") def matmul(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -66,12 +67,12 @@ def matmul(N, L, M, dtype): @autotvm.register_customized_task("testing/bad_matmul") def bad_matmul(N, L, M, dtype): if 'bad_device' in tvm.target.Target.current().keys: - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L-1), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L-1), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis diff --git a/tests/python/unittest/test_autotvm_feature.py b/tests/python/unittest/test_autotvm_feature.py index e0736c280dc48..59ad464f7cea2 100644 --- a/tests/python/unittest/test_autotvm_feature.py +++ b/tests/python/unittest/test_autotvm_feature.py @@ -19,20 +19,21 @@ import numpy as np import tvm +from tvm import te from tvm.autotvm import feature def test_iter_feature_gemm(): N = 128 - k = tvm.reduce_axis((0, N), 'k') - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') - C = tvm.compute( + k = te.reduce_axis((0, N), 'k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') + C = te.compute( A.shape, - lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k), + lambda y, x: te.sum(A[y, k] * B[k, x], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) feas = feature.get_itervar_feature(s, [A, B, C], take_log=False) @@ -64,15 +65,15 @@ def test_iter_feature_gemm(): def test_curve_feature_gemm(): N = 128 - k = tvm.reduce_axis((0, N), 'k') - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') - C = tvm.compute( + k = te.reduce_axis((0, N), 'k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') + C = te.compute( A.shape, - lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k), + lambda y, x: te.sum(A[y, k] * B[k, x], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) feas = feature.get_buffer_curve_sample_flatten(s, [A, B, C], sample_n=30) # sample_n * #buffers * #curves * 2 numbers per curve @@ -85,13 +86,13 @@ def test_feature_shape(): n_sample = 100 def get_gemm_feature(target): - k = tvm.reduce_axis((0, N), 'k') - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') - C = tvm.compute(A.shape, lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k), + k = te.reduce_axis((0, N), 'k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') + C = te.compute(A.shape, lambda y, x: te.sum(A[y, k] * B[k, x], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) y, x = s[C].op.axis axes = list(s[C].tile(y, x, 8, 8)) + [k] @@ -105,9 +106,9 @@ def get_gemm_feature(target): for i in range(len(perm)): if perm[i] != 4: pick.append(axes[i]) - s[C].bind(pick[0], tvm.thread_axis("blockIdx.x")) - s[C].bind(pick[1], tvm.thread_axis("vthread")) - s[C].bind(pick[2], tvm.thread_axis("threadIdx.y")) + s[C].bind(pick[0], te.thread_axis("blockIdx.x")) + s[C].bind(pick[1], te.thread_axis("vthread")) + s[C].bind(pick[2], te.thread_axis("threadIdx.y")) with target: feas = feature.get_itervar_feature(s, [A, B, C]) diff --git a/tests/python/unittest/test_autotvm_flop_calculator.py b/tests/python/unittest/test_autotvm_flop_calculator.py index 5cafd02c45bf0..e06010b948ad1 100644 --- a/tests/python/unittest/test_autotvm_flop_calculator.py +++ b/tests/python/unittest/test_autotvm_flop_calculator.py @@ -17,6 +17,7 @@ """Test flop calculation""" import tvm +from tvm import te import numpy as np from tvm.autotvm.task.task import compute_flop @@ -30,24 +31,24 @@ def test_conv(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, acc_dtype) = random_dtypes() - D = tvm.placeholder((N, CI, H, W), dtype=input_dtype) - K = tvm.placeholder((CO, CI, KH, KW), dtype=input_dtype) + D = te.placeholder((N, CI, H, W), dtype=input_dtype) + K = te.placeholder((CO, CI, KH, KW), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) - ci = tvm.reduce_axis((0, CI)) - kh = tvm.reduce_axis((0, KH)) - kw = tvm.reduce_axis((0, KW)) + ci = te.reduce_axis((0, CI)) + kh = te.reduce_axis((0, KH)) + kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 - C = tvm.compute((N, CO, OH, OW), lambda n, co, h, w: - tvm.sum(D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype), + C = te.compute((N, CO, OH, OW), lambda n, co, h, w: + te.sum(D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype), axis=[ci, kh, kw])) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * CO * OH * OW * CI * KH * KW @@ -55,55 +56,55 @@ def test_pack_gemm(): for i in range(5): N, L, M = [np.random.randint(10, 128) * 4 for _ in range(3)] (input_dtype, acc_dtype) = random_dtypes() - A = tvm.placeholder((N, L), dtype=input_dtype) - B = tvm.placeholder((M, L), dtype=input_dtype) - k = tvm.reduce_axis((0, L)) + A = te.placeholder((N, L), dtype=input_dtype) + B = te.placeholder((M, L), dtype=input_dtype) + k = te.reduce_axis((0, L)) bn = 4 - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod - A_pack = tvm.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j]) - B_pack = tvm.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j]) - C_pack = tvm.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj: - tvm.sum(A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k])) - C = tvm.compute((N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)]) + A_pack = te.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j]) + B_pack = te.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j]) + C_pack = te.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj: + te.sum(A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k])) + C = te.compute((N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)]) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * L * M def test_outer_dot(): for i in range(5): N, M = [np.random.randint(10, 128) * 4 for _ in range(2)] (input_dtype, acc_dtype) = random_dtypes() - A = tvm.placeholder((N,), dtype=input_dtype) - B = tvm.placeholder((M,), dtype=input_dtype) + A = te.placeholder((N,), dtype=input_dtype) + B = te.placeholder((M,), dtype=input_dtype) - C = tvm.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype)) + C = te.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype)) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == N * M def test_max_pool(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, _) = random_dtypes() - D = tvm.placeholder((N, CI, H, W), dtype=input_dtype) + D = te.placeholder((N, CI, H, W), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) - kh = tvm.reduce_axis((0, KH)) - kw = tvm.reduce_axis((0, KW)) + kh = te.reduce_axis((0, KH)) + kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 - C = tvm.compute( + C = te.compute( (N, CO, OH, OW), - lambda n, co, h, w: tvm.max(D[n][co][h + kh][w + kw], axis=[kh, kw])) + lambda n, co, h, w: tvm.te.max(D[n][co][h + kh][w + kw], axis=[kh, kw])) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == N * CO * OH * OW * KH * KW @@ -111,24 +112,24 @@ def test_average_pool(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, acc_dtype) = random_dtypes() - D = tvm.placeholder((N, CI, H, W), dtype=input_dtype) + D = te.placeholder((N, CI, H, W), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) - kh = tvm.reduce_axis((0, KH)) - kw = tvm.reduce_axis((0, KW)) + kh = te.reduce_axis((0, KH)) + kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 - C = tvm.compute( + C = te.compute( (N, CO, OH, OW), - lambda n, co, h, w: tvm.sum( - tvm.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw])) + lambda n, co, h, w: te.sum( + te.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw])) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * CO * OH * OW * KH * KW @@ -136,9 +137,9 @@ def test_move(): """No float number operation in simple move. So the estimator should raise an error """ N = 1024 - A = tvm.placeholder((N,)) - C = tvm.compute((N,), lambda i: A[i]) - s = tvm.create_schedule([C.op]) + A = te.placeholder((N,)) + C = te.compute((N,), lambda i: A[i]) + s = te.create_schedule([C.op]) try: compute_flop(s) diff --git a/tests/python/unittest/test_autotvm_measure.py b/tests/python/unittest/test_autotvm_measure.py index 0899f6f5bbff2..f96d333ddbc3c 100644 --- a/tests/python/unittest/test_autotvm_measure.py +++ b/tests/python/unittest/test_autotvm_measure.py @@ -21,6 +21,7 @@ import numpy as np import tvm +from tvm import te from test_autotvm_common import DummyRunner, bad_matmul, get_sample_task from tvm import autotvm from tvm.autotvm.measure.measure import MeasureErrorNo, MeasureResult diff --git a/tests/python/unittest/test_autotvm_record.py b/tests/python/unittest/test_autotvm_record.py index 0839ad9b68cfd..bcc9a93a5b7a8 100644 --- a/tests/python/unittest/test_autotvm_record.py +++ b/tests/python/unittest/test_autotvm_record.py @@ -18,6 +18,7 @@ import time import tvm +from tvm import te from tvm.contrib import util from tvm import autotvm diff --git a/tests/python/unittest/test_autotvm_space.py b/tests/python/unittest/test_autotvm_space.py index 95f3201c5eb47..2694c49d69254 100644 --- a/tests/python/unittest/test_autotvm_space.py +++ b/tests/python/unittest/test_autotvm_space.py @@ -17,16 +17,17 @@ """Test space definition primitives""" import tvm +from tvm import te from tvm.autotvm.task.space import ConfigSpace, FallbackConfigEntity def gemm_func(cfg, N): - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') - k = tvm.reduce_axis((0, N), name='k') - C = tvm.compute((N, N), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=[k]), name='C') + k = te.reduce_axis((0, N), name='k') + C = te.compute((N, N), lambda i, j: te.sum(A[i, k] * B[k, j], axis=[k]), name='C') - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) y, x = s[C].op.axis diff --git a/tests/python/unittest/test_autotvm_xgboost_model.py b/tests/python/unittest/test_autotvm_xgboost_model.py index 24677c566c665..214a600be10bd 100644 --- a/tests/python/unittest/test_autotvm_xgboost_model.py +++ b/tests/python/unittest/test_autotvm_xgboost_model.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm import MeasureInput, MeasureResult from tvm.autotvm.tuner.xgboost_cost_model import XGBoostCostModel diff --git a/tests/python/unittest/test_build_lower.py b/tests/python/unittest/test_build_lower.py index 58312dc83932d..736030bd548dc 100644 --- a/tests/python/unittest/test_build_lower.py +++ b/tests/python/unittest/test_build_lower.py @@ -15,28 +15,29 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_lower_rfactor(): - n = tvm.size_var("n") - m = tvm.size_var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") - s = tvm.create_schedule(B.op) + n = te.size_var("n") + m = te.size_var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") + s = te.create_schedule(B.op) ko, ki = s[B].split(B.op.reduce_axis[0], factor=16) BF = s.rfactor(B, ki) xo, xi = s[B].split(s[B].op.axis[0], factor=32) - s[B.op].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B.op].bind(xi, tvm.thread_axis("threadIdx.y")) - s[B].bind(s[B].op.reduce_axis[0], tvm.thread_axis("threadIdx.x")) + s[B.op].bind(xo, te.thread_axis("blockIdx.x")) + s[B.op].bind(xi, te.thread_axis("threadIdx.y")) + s[B].bind(s[B].op.reduce_axis[0], te.thread_axis("threadIdx.x")) s[BF].compute_at(s[B], s[B].op.reduce_axis[0]) fapi = tvm.lower(s, [A, B]) def test_dependent_output_shape(): - n, m, x = tvm.size_var('n'), tvm.size_var('m'), tvm.size_var('x') - A = tvm.placeholder((n, m)) - B = tvm.compute((m, n//x), lambda i, j: A[i,j] , name='B') - s = tvm.create_schedule(B.op) + n, m, x = te.size_var('n'), te.size_var('m'), te.size_var('x') + A = te.placeholder((n, m)) + B = te.compute((m, n//x), lambda i, j: A[i,j] , name='B') + s = te.create_schedule(B.op) mod = tvm.build(s, [A, B, x]) if __name__ == "__main__": diff --git a/tests/python/unittest/test_codegen_arm.py b/tests/python/unittest/test_codegen_arm.py index 8e2ad7aa76e03..65d82b0146fb9 100644 --- a/tests/python/unittest/test_codegen_arm.py +++ b/tests/python/unittest/test_codegen_arm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import re import os import ctypes @@ -23,10 +24,10 @@ def test_popcount(): target = 'llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon' def check_correct_assembly(type, elements, counts): - n = tvm.convert(elements) - A = tvm.placeholder(n, dtype=type, name='A') - B = tvm.compute(A.shape, lambda i: tvm.popcount(A[i]), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(elements) + A = te.placeholder(n, dtype=type, name='A') + B = te.compute(A.shape, lambda i: tvm.tir.popcount(A[i]), name='B') + s = te.create_schedule(B.op) s[B].vectorize(s[B].op.axis[0]) f = tvm.build(s, [A, B], target) @@ -47,13 +48,13 @@ def test_vmlal_s16(): target = 'llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon' def check_correct_assembly(N): - K = tvm.size_var("K") - A = tvm.placeholder((K, N), dtype="int8", name='A') - B = tvm.placeholder((K, N), dtype="int8", name='B') - k = tvm.reduce_axis((0, K)) - C = tvm.compute((N, ), lambda n: tvm.sum( + K = te.size_var("K") + A = te.placeholder((K, N), dtype="int8", name='A') + B = te.placeholder((K, N), dtype="int8", name='B') + k = te.reduce_axis((0, K)) + C = te.compute((N, ), lambda n: te.sum( A[k, n].astype("int32") * B[k, n].astype("int32"), axis=[k]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].vectorize(s[C].op.axis[0]) f = tvm.build(s, [A, B, C], target) @@ -67,14 +68,14 @@ def check_correct_assembly(N): check_correct_assembly(64) def check_broadcast_correct_assembly(N): - K = tvm.size_var("K") - A = tvm.placeholder((K, N), dtype="int8", name='A') - B = tvm.placeholder((K,), dtype="int8", name='B') - k = tvm.reduce_axis((0, K)) - C = tvm.compute((N, ), lambda n: tvm.sum( + K = te.size_var("K") + A = te.placeholder((K, N), dtype="int8", name='A') + B = te.placeholder((K,), dtype="int8", name='B') + k = te.reduce_axis((0, K)) + C = te.compute((N, ), lambda n: te.sum( A[k, n].astype("int32") * B[k].astype("int32"), axis=[k]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].vectorize(s[C].op.axis[0]) f = tvm.build(s, [A, B, C], target) diff --git a/tests/python/unittest/test_codegen_blob.py b/tests/python/unittest/test_codegen_blob.py index c14607d0c0b70..62043e344d96f 100644 --- a/tests/python/unittest/test_codegen_blob.py +++ b/tests/python/unittest/test_codegen_blob.py @@ -20,6 +20,7 @@ from tvm.relay import testing from tvm.contrib import graph_runtime import tvm +from tvm import te import ctypes def test_resnet18(): @@ -74,13 +75,13 @@ def test_system_lib(): print("skip because %s is not enabled..." % device) return nn = 12 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=4) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) from tvm.contrib import util temp = util.tempdir() diff --git a/tests/python/unittest/test_codegen_bool.py b/tests/python/unittest/test_codegen_bool.py index 33711cbcdb630..cdb343f3530b0 100644 --- a/tests/python/unittest/test_codegen_bool.py +++ b/tests/python/unittest/test_codegen_bool.py @@ -17,21 +17,22 @@ """codegen related to bool types""" import tvm +from tvm import te import numpy as np def test_cmp_load_store(): n = 32 - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) > B(*i), name='C') - D = tvm.compute(C.shape, lambda *i: tvm.all(C(*i), + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) > B(*i), name='C') + D = te.compute(C.shape, lambda *i: tvm.tir.all(C(*i), A(*i) > 1).astype('float32'), name="D") def check_llvm(): if not tvm.runtime.enabled("llvm"): return - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) xo, xi = s[C].split(C.op.axis[0], factor=4) xo1, xo2 = s[C].split(xo, factor=13) s[C].parallel(xo2) @@ -50,11 +51,11 @@ def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: return - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) for stage in [C, D]: xo, xi = s[stage].split(stage.op.axis[0], factor=4) - s[stage].bind(xo, tvm.thread_axis("blockIdx.x")) - s[stage].bind(xi, tvm.thread_axis("threadIdx.x")) + s[stage].bind(xo, te.thread_axis("blockIdx.x")) + s[stage].bind(xi, te.thread_axis("threadIdx.x")) f = tvm.build(s, [A, B, D], device) a_np = np.random.uniform(size=n).astype(A.dtype) a = tvm.nd.array(a_np, ctx) diff --git a/tests/python/unittest/test_codegen_c_host.py b/tests/python/unittest/test_codegen_c_host.py index a126c07c8ac14..daf5b0eec5e2a 100644 --- a/tests/python/unittest/test_codegen_c_host.py +++ b/tests/python/unittest/test_codegen_c_host.py @@ -15,16 +15,17 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import util def test_add(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) def check_c(): mhost = tvm.build(s, [A, B, C], "c", name="fadd") @@ -47,14 +48,14 @@ def check_c(): def test_add_pipeline(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - AA = tvm.compute((n,), lambda *i: A(*i), name='A') - BB = tvm.compute((n,), lambda *i: B(*i), name='B') - T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') - C = tvm.compute(A.shape, lambda *i: T(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + AA = te.compute((n,), lambda *i: A(*i), name='A') + BB = te.compute((n,), lambda *i: B(*i), name='B') + T = te.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') + C = te.compute(A.shape, lambda *i: T(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) xo1, xo2 = s[C].split(xo, factor=13) s[C].parallel(xo2) @@ -65,9 +66,9 @@ def test_add_pipeline(): def check_c(): # Specifically allow offset to test codepath when offset is available - Ab = tvm.decl_buffer( + Ab = tvm.tir.decl_buffer( A.shape, A.dtype, - elem_offset=tvm.size_var('Aoffset'), + elem_offset=te.size_var('Aoffset'), offset_factor=8, name='A') binds = {A : Ab} @@ -97,10 +98,10 @@ def check_c(): def test_reinterpret(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A', dtype="int32") - B = tvm.compute(A.shape, lambda *i: tvm.call_pure_intrin("float32", "reinterpret", A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A', dtype="int32") + B = te.compute(A.shape, lambda *i: tvm.tir.call_pure_intrin("float32", "reinterpret", A(*i)), name='B') + s = te.create_schedule(B.op) def check_c(): mhost = tvm.build(s, [A, B], "c", name="reinterpret") diff --git a/tests/python/unittest/test_codegen_cross_llvm.py b/tests/python/unittest/test_codegen_cross_llvm.py index 1827ccf63d79d..cb3986eaf20f0 100644 --- a/tests/python/unittest/test_codegen_cross_llvm.py +++ b/tests/python/unittest/test_codegen_cross_llvm.py @@ -16,6 +16,7 @@ # under the License. """Test cross compilation""" import tvm +from tvm import te import os import struct from tvm import rpc @@ -24,11 +25,11 @@ def test_llvm_add_pipeline(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) diff --git a/tests/python/unittest/test_codegen_cuda.py b/tests/python/unittest/test_codegen_cuda.py index 8652817c21ce1..27968c69380cd 100644 --- a/tests/python/unittest/test_codegen_cuda.py +++ b/tests/python/unittest/test_codegen_cuda.py @@ -16,14 +16,15 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import topi import unittest from tvm.contrib.nvcc import have_fp16, have_int8 from tvm.contrib import nvcc -tx = tvm.thread_axis("threadIdx.x") -bx = tvm.thread_axis("blockIdx.x") +tx = te.thread_axis("threadIdx.x") +bx = te.thread_axis("blockIdx.x") def test_cuda_vectorize_add(): num_thread = 8 @@ -37,9 +38,9 @@ def check_cuda(dtype, n, lanes): if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version): print("skip because gpu does not support int8") return - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i] + tvm.const(1, A.dtype), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i] + tvm.tir.const(1, A.dtype), name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(xo, bx) s[B].bind(xi, tx) @@ -69,12 +70,12 @@ def check_cuda(dtype, n, lanes): if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version): print("skip because gpu does not support int8") return - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes)) - C = tvm.placeholder((n,), name='C', dtype="int32") - D = tvm.compute((n,), - lambda i: tvm.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D') - s = tvm.create_schedule(D.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes)) + C = te.placeholder((n,), name='C', dtype="int32") + D = te.compute((n,), + lambda i: tvm.tir.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D') + s = te.create_schedule(D.op) xo, xi = s[D].split(D.op.axis[0], factor=num_thread) s[D].bind(xo, bx) s[D].bind(xi, tx) @@ -99,9 +100,9 @@ def check_cuda(dtype, n, lanes): print("skip because cuda is not enabled..") return ctx = tvm.gpu(0) - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i], name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i], name='B') + s = te.create_schedule(B.op) block, thread = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(block, bx) s[B].bind(thread, tx) @@ -122,8 +123,8 @@ def check_cuda(n, value): lanes = 4 dtype = 'int8' ctx = tvm.gpu(0) - A = tvm.compute((n, lanes), lambda i,j: tvm.const(value, dtype=dtype)) - s = tvm.create_schedule(A.op) + A = te.compute((n, lanes), lambda i,j: tvm.tir.const(value, dtype=dtype)) + s = te.create_schedule(A.op) y, x = s[A].op.axis s[A].vectorize(x) s[A].bind(y, bx) @@ -140,10 +141,10 @@ def check_cuda(n, value): def test_cuda_inf_nan(): target = 'cuda' def check_inf_nan(ctx, n, value, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - inf_value = tvm.const(value, dtype=dtype) - C = tvm.compute((n,), lambda i: inf_value, name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype=dtype) + inf_value = tvm.tir.const(value, dtype=dtype) + C = te.compute((n,), lambda i: inf_value, name='C') + s = te.create_schedule(C.op) s[C].bind(s[C].op.axis[0], tx) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) @@ -170,30 +171,30 @@ def test_cuda_shuffle(): print("skip because cuda is not enabled..") return - idxm = tvm.indexmod - a = tvm.placeholder((64, ), 'int32') - b = tvm.placeholder((64, ), 'int32') - c = tvm.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))]) - sch = tvm.create_schedule(c.op) + idxm = tvm.tir.indexmod + a = te.placeholder((64, ), 'int32') + b = te.placeholder((64, ), 'int32') + c = te.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))]) + sch = te.create_schedule(c.op) x = c.op.axis[0] xo, xi = sch[c].split(x, 4) - thrx = tvm.thread_axis("threadIdx.x") + thrx = te.thread_axis("threadIdx.x") sch[c].bind(xo, thrx) sch[c].vectorize(xi) def my_vectorize(stmt): def vectorizer(op): if op.for_type == tvm.tir.For.Vectorized: - four = tvm.const(4, 'int32') - idx = tvm.tir.Ramp(thrx.var * four, tvm.const(1, 'int32'), 4) - all_ones = tvm.const(1, 'int32x4') + four = tvm.tir.const(4, 'int32') + idx = tvm.tir.Ramp(thrx.var * four, tvm.tir.const(1, 'int32'), 4) + all_ones = tvm.tir.const(1, 'int32x4') store = op.body value = store.value new_a = tvm.tir.Load('int32x4', value.a.buffer_var, idx, all_ones) bs, ids = [], [] for i in range(4): - bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.const(i, 'int32'))) - ids.append(tvm.const(3 - i, 'int32')) + bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.tir.const(i, 'int32'))) + ids.append(tvm.tir.const(3 - i, 'int32')) new_b = tvm.tir.Shuffle(bs, ids) return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones) return None @@ -215,17 +216,17 @@ def test_cuda_reducition_binding(): print("skip because cuda is not enabled..") return - k = tvm.reduce_axis((0, 32), 'k') - A = tvm.placeholder((96, 32), name='A') - B = tvm.compute( (96,), lambda m: - tvm.sum(A[m, k], axis=k), + k = te.reduce_axis((0, 32), 'k') + A = te.placeholder((96, 32), name='A') + B = te.compute( (96,), lambda m: + te.sum(A[m, k], axis=k), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].reorder(B.op.reduce_axis[0], B.op.axis[0]) mo, _ = s[B].split(B.op.axis[0], 32) - s[B].bind(mo, tvm.thread_axis("blockIdx.x")) + s[B].bind(mo, te.thread_axis("blockIdx.x")) fcuda = tvm.build(s, [A, B], "cuda") @@ -234,15 +235,15 @@ def test_rfactor_predicates(): print("skip because cuda is not enabled..") return - n = tvm.reduce_axis((0, 129), 'n') - A = tvm.placeholder((129,), name='A') - B = tvm.compute( (1, ), lambda b: - tvm.sum(A[n], + n = te.reduce_axis((0, 129), 'n') + A = te.placeholder((129,), name='A') + B = te.compute( (1, ), lambda b: + te.sum(A[n], axis=n), name='B' ) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) _, ni = s[B].split(s[B].op.reduce_axis[0], factor=8) @@ -270,15 +271,15 @@ def test_cuda_const_float_to_half(): # otherwise it is found that the code gen is done by nvrtc. from tvm import autotvm shape = (2, 3, 4) - a = tvm.placeholder(shape, dtype='float16', name='a') - b = tvm.const(0.5, dtype='float16') - c = tvm.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c') - s = tvm.create_schedule(c.op) + a = te.placeholder(shape, dtype='float16', name='a') + b = tvm.tir.const(0.5, dtype='float16') + c = te.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c') + s = te.create_schedule(c.op) axes = [axis for axis in c.op.axis] fused = s[c].fuse(*axes) bx, tx = s[c].split(fused, factor=64) - s[c].bind(bx, tvm.thread_axis('blockIdx.x')) - s[c].bind(tx, tvm.thread_axis('threadIdx.x')) + s[c].bind(bx, te.thread_axis('blockIdx.x')) + s[c].bind(tx, te.thread_axis('threadIdx.x')) func = tvm.build(s, [a, c], 'cuda') ctx = tvm.gpu(0) @@ -298,8 +299,8 @@ def check_cuda(dtype, m=32, n=32): print("Skip because gpu does not have fp16 support") return - a = tvm.placeholder((m, n), name="a", dtype=dtype) - b = tvm.placeholder((m, n), name="b", dtype=dtype) + a = te.placeholder((m, n), name="a", dtype=dtype) + b = te.placeholder((m, n), name="b", dtype=dtype) c = a + b d = a * b e = topi.elemwise_sum([c, d]) diff --git a/tests/python/unittest/test_codegen_device.py b/tests/python/unittest/test_codegen_device.py index 63ee03028e7ee..d6a44fbc9b12f 100644 --- a/tests/python/unittest/test_codegen_device.py +++ b/tests/python/unittest/test_codegen_device.py @@ -15,20 +15,21 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import util import numpy as np def test_large_uint_imm(): value = (1 << 63) + 123 - other = tvm.const(3, "uint64") + other = tvm.tir.const(3, "uint64") n = 12 num_thread = 2 - A = tvm.compute((n,), lambda *i: tvm.const(value, "uint64") + other, name='A') - s = tvm.create_schedule(A.op) + A = te.compute((n,), lambda *i: tvm.tir.const(value, "uint64") + other, name='A') + s = te.create_schedule(A.op) xo, xi = s[A].split(A.op.axis[0], factor=num_thread) - s[A].bind(xi, tvm.thread_axis("threadIdx.x")) - s[A].bind(xo, tvm.thread_axis("blockIdx.x")) + s[A].bind(xi, te.thread_axis("threadIdx.x")) + s[A].bind(xo, te.thread_axis("blockIdx.x")) def check_target(device): ctx = tvm.context(device, 0) @@ -45,30 +46,30 @@ def check_target(device): def test_add_pipeline(): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(), name='C') - D = tvm.compute(A.shape, lambda *i: C(*i) + 1, name='D') - s = tvm.create_schedule(D.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(), name='C') + D = te.compute(A.shape, lambda *i: C(*i) + 1, name='D') + s = te.create_schedule(D.op) # GPU schedule have to split by gridIdx and threadIdx num_thread = 256 xo, xi = s[C].split(C.op.axis[0], factor=num_thread) - s[C].bind(xi, tvm.thread_axis("threadIdx.x")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) + s[C].bind(xi, te.thread_axis("threadIdx.x")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) xo, xi = s[D].split(D.op.axis[0], factor=num_thread) - s[D].bind(xi, tvm.thread_axis("threadIdx.x")) - s[D].bind(xo, tvm.thread_axis("blockIdx.x")) + s[D].bind(xi, te.thread_axis("threadIdx.x")) + s[D].bind(xo, te.thread_axis("blockIdx.x")) # compile to IR s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - Db = tvm.decl_buffer(D.shape, D.dtype, name='D') + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + Db = tvm.tir.decl_buffer(D.shape, D.dtype, name='D') stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64) stmt = tvm.ir_pass.Simplify(stmt) diff --git a/tests/python/unittest/test_codegen_extern.py b/tests/python/unittest/test_codegen_extern.py index 03efee58cc576..3b6b5edb88518 100644 --- a/tests/python/unittest/test_codegen_extern.py +++ b/tests/python/unittest/test_codegen_extern.py @@ -15,37 +15,38 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_add_pipeline(): nn = 64 max_threads = 4 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') def extern_generator(ins, outs): """Manually write the IR for the extern function, add pipeline""" ib = tvm.ir_builder.create() with ib.for_range(0, (n+1) // 2) as i: - ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2"))) + ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.tir.const(1, "float32x2"))) return ib.get() def extern_generator_gpu(ins, outs): """Manually write the IR for the extern function, add pipeline""" ib = tvm.ir_builder.create() - bx = tvm.thread_axis("blockIdx.x") - tx = tvm.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") ib.scope_attr(bx, "thread_extent", (nn+max_threads-1) // max_threads) ib.scope_attr(tx, "thread_extent", max_threads) idx = bx.var * max_threads + tx.var with ib.if_scope(ib.likely(idx < n)): - ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.const(1, "float32x2"))) + ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.tir.const(1, "float32x2"))) return ib.get() - C_cpu = tvm.extern(A.shape, [A], extern_generator, name='C') - C_gpu = tvm.extern(A.shape, [A], extern_generator_gpu, name='C') - s_cpu = tvm.create_schedule(C_cpu.op) - s_gpu = tvm.create_schedule(C_gpu.op) + C_cpu = te.extern(A.shape, [A], extern_generator, name='C') + C_gpu = te.extern(A.shape, [A], extern_generator_gpu, name='C') + s_cpu = te.create_schedule(C_cpu.op) + s_gpu = te.create_schedule(C_gpu.op) print(tvm.lower(s_cpu, [A, C_cpu], simple_mode=True)) print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True)) @@ -70,14 +71,14 @@ def check_target(target): def test_pack_buffer_simple(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') def extern_generator(ins, outs): """Manually write the IR for the extern function, add pipeline.""" - return tvm.call_packed("my_extern_array_func1", ins[0], outs[0]) + return tvm.tir.call_packed("my_extern_array_func1", ins[0], outs[0]) - C = tvm.extern(A.shape, [A], extern_generator, name='C') - s = tvm.create_schedule(C.op) + C = te.extern(A.shape, [A], extern_generator, name='C') + s = te.create_schedule(C.op) @tvm.register_func def my_extern_array_func1(aa, bb): @@ -104,15 +105,15 @@ def check_target(target): def test_pack_buffer_intermediate(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda i: A[i] + 1, name="B") + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda i: A[i] + 1, name="B") def extern_generator(ins, outs): """Manually write the IR for the extern function, add pipeline.""" - return tvm.call_packed("my_extern_array_func2", ins[0], outs[0]) + return tvm.tir.call_packed("my_extern_array_func2", ins[0], outs[0]) - C = tvm.extern(B.shape, [B], extern_generator, name='C') - s = tvm.create_schedule(C.op) + C = te.extern(B.shape, [B], extern_generator, name='C') + s = te.create_schedule(C.op) def check_target(target): if not tvm.runtime.enabled(target): diff --git a/tests/python/unittest/test_codegen_llvm.py b/tests/python/unittest/test_codegen_llvm.py index ca3229389c278..0eae4b9fc3e46 100644 --- a/tests/python/unittest/test_codegen_llvm.py +++ b/tests/python/unittest/test_codegen_llvm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi from tvm.contrib import util, clang import numpy as np @@ -23,10 +24,10 @@ def test_llvm_intrin(): ib = tvm.ir_builder.create() - n = tvm.convert(4) + n = tvm.runtime.convert(4) A = ib.pointer("float32", name="A") args = [ - tvm.call_pure_intrin("handle", "tvm_address_of", A[0]), + tvm.tir.call_pure_intrin("handle", "tvm_address_of", A[0]), 0, 3, 1 ] ib.emit(tvm.tir.Evaluate( @@ -45,9 +46,9 @@ def test_llvm_import(): } """ n = 10 - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: - tvm.call_pure_extern("float32", "my_add", A(*i), 1.0), + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: + tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0), name='B') def check_llvm(use_file): if not tvm.runtime.enabled("llvm"): @@ -58,7 +59,7 @@ def check_llvm(use_file): temp = util.tempdir() ll_path = temp.relpath("temp.ll") ll_code = clang.create_llvm(cc_code, output=ll_path) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) if use_file: s[B].pragma(s[B].op.axis[0], "import_llvm", ll_path) else: @@ -79,9 +80,9 @@ def check_llvm(use_file): def test_llvm_lookup_intrin(): ib = tvm.ir_builder.create() - m = tvm.size_var("m") + m = te.size_var("m") A = ib.pointer("uint8x8", name="A") - x = tvm.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.const(1, 'uint32'), A) + x = tvm.tir.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.tir.const(1, 'uint32'), A) ib.emit(x) body = ib.get() func = tvm.ir_pass.MakeAPI(body, "ctpop", [A], 1, True) @@ -90,9 +91,9 @@ def test_llvm_lookup_intrin(): def test_llvm_large_uintimm(): value = (1 << 63) + 123 - other = tvm.const(3, "uint64") - A = tvm.compute((), lambda : tvm.const(value, "uint64") + other, name='A') - s = tvm.create_schedule(A.op) + other = tvm.tir.const(3, "uint64") + A = te.compute((), lambda : tvm.tir.const(value, "uint64") + other, name='A') + s = te.create_schedule(A.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): @@ -109,14 +110,14 @@ def check_llvm(): def test_llvm_add_pipeline(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - AA = tvm.compute((n,), lambda *i: A(*i), name='A') - BB = tvm.compute((n,), lambda *i: B(*i), name='B') - T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') - C = tvm.compute(A.shape, lambda *i: T(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + AA = te.compute((n,), lambda *i: A(*i), name='A') + BB = te.compute((n,), lambda *i: B(*i), name='B') + T = te.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') + C = te.compute(A.shape, lambda *i: T(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) xo1, xo2 = s[C].split(xo, factor=13) s[C].parallel(xo2) @@ -129,9 +130,9 @@ def check_llvm(): if not tvm.runtime.enabled("llvm"): return # Specifically allow offset to test codepath when offset is available - Ab = tvm.decl_buffer( + Ab = tvm.tir.decl_buffer( A.shape, A.dtype, - elem_offset=tvm.size_var('Aoffset'), + elem_offset=te.size_var('Aoffset'), offset_factor=8, name='A') binds = {A : Ab} @@ -153,10 +154,10 @@ def check_llvm(): def test_llvm_persist_parallel(): n = 128 - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='B') - C = tvm.compute(A.shape, lambda *i: tvm.sqrt(B(*i)) * 2 + 2, name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1, name='B') + C = te.compute(A.shape, lambda *i: te.sqrt(B(*i)) * 2 + 2, name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=8) xo1, xo2 = s[C].split(xo, nparts=1) s[B].compute_at(s[C], xo1) @@ -187,10 +188,10 @@ def test_llvm_flip_pipeline(): def check_llvm(nn, base): if not tvm.runtime.enabled("llvm"): return - n = tvm.convert(nn) - A = tvm.placeholder((n + base), name='A') - C = tvm.compute((n,), lambda i: A(nn + base- i - 1), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n + base), name='A') + C = te.compute((n,), lambda i: A(nn + base- i - 1), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -214,10 +215,10 @@ def test_llvm_vadd_pipeline(): def check_llvm(n, lanes): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes) - B = tvm.compute((n,), lambda i: A[i], name='B') - C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes) + B = te.compute((n,), lambda i: A[i], name='B') + C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], nparts=2) _, xi = s[C].split(xi, factor=2) s[C].parallel(xo) @@ -243,10 +244,10 @@ def test_llvm_madd_pipeline(): def check_llvm(nn, base, stride): if not tvm.runtime.enabled("llvm"): return - n = tvm.convert(nn) - A = tvm.placeholder((n + base, stride), name='A') - C = tvm.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n + base, stride), name='A') + C = te.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -268,11 +269,11 @@ def check_llvm(nn, base, stride): def test_llvm_temp_space(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A(i) + 1, name='B') - C = tvm.compute(A.shape, lambda i: B(i) + 1, name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A(i) + 1, name='B') + C = te.compute(A.shape, lambda i: B(i) + 1, name='C') + s = te.create_schedule(C.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): @@ -291,11 +292,11 @@ def check_llvm(): def test_multiple_func(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -328,9 +329,9 @@ def test_llvm_condition(): def check_llvm(n, offset): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n, ), name='A') - C = tvm.compute((n,), lambda i: tvm.if_then_else(i >= offset, A[i], 0.0), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n, ), name='A') + C = te.compute((n,), lambda i: tvm.tir.if_then_else(i >= offset, A[i], 0.0), name='C') + s = te.create_schedule(C.op) # build and invoke the kernel. f = tvm.build(s, [A, C], "llvm") ctx = tvm.cpu(0) @@ -348,9 +349,9 @@ def test_llvm_bool(): def check_llvm(n): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n, ), name='A', dtype="int32") - C = tvm.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n, ), name='A', dtype="int32") + C = te.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C') + s = te.create_schedule(C.op) # build and invoke the kernel. f = tvm.build(s, [A, C], "llvm") ctx = tvm.cpu(0) @@ -367,12 +368,12 @@ def test_rank_zero(): def check_llvm(n): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n, ), name='A') - scale = tvm.placeholder((), name='scale') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k), name="C") - D = tvm.compute((), lambda : C() + 1) - s = tvm.create_schedule(D.op) + A = te.placeholder((n, ), name='A') + scale = te.placeholder((), name='scale') + k = te.reduce_axis((0, n), name="k") + C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C") + D = te.compute((), lambda : C() + 1) + s = te.create_schedule(D.op) # build and invoke the kernel. f = tvm.build(s, [A, scale, D], "llvm") ctx = tvm.cpu(0) @@ -391,12 +392,12 @@ def check_llvm(n): if not tvm.runtime.enabled("llvm"): return with tvm.build_config(instrument_bound_checkers=True): - A = tvm.placeholder((n, ), name='A') - scale = tvm.placeholder((), name='scale') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k), name="C") - D = tvm.compute((), lambda : C() + 1) - s = tvm.create_schedule(D.op) + A = te.placeholder((n, ), name='A') + scale = te.placeholder((), name='scale') + k = te.reduce_axis((0, n), name="k") + C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C") + D = te.compute((), lambda : C() + 1) + s = te.create_schedule(D.op) # build and invoke the kernel. f = tvm.build(s, [A, scale, D], "llvm") ctx = tvm.cpu(0) @@ -412,10 +413,10 @@ def check_llvm(n): def test_alignment(): - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] * 3, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] * 3, name='B') + s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=8) s[B].vectorize(tx) f = tvm.build(s, [A, B], "llvm") @@ -427,26 +428,26 @@ def test_alignment(): def test_llvm_div(): """Check that the semantics of div and mod is correct""" def check(start, end, dstart, dend, dtype, floor_div=False): - div = tvm.floordiv if floor_div else tvm.truncdiv - mod = tvm.floormod if floor_div else tvm.truncmod + div = tvm.te.floordiv if floor_div else tvm.tir.truncdiv + mod = tvm.te.floormod if floor_div else tvm.tir.truncmod # A are dividends, B are divisors. Note that we add 1 to make include end in the range. - A = tvm.placeholder((end - start + 1,), name="A", dtype=dtype) - B = tvm.placeholder((dend - dstart + 1,), name="B", dtype=dtype) + A = te.placeholder((end - start + 1,), name="A", dtype=dtype) + B = te.placeholder((dend - dstart + 1,), name="B", dtype=dtype) # We clip values with min and max so that simplifiers know the ranges of values - clipa = lambda x: tvm.min(tvm.const(end, dtype), tvm.max(tvm.const(start, dtype), x)) - clipb = lambda x: tvm.min(tvm.const(dend, dtype), tvm.max(tvm.const(dstart, dtype), x)) + clipa = lambda x: tvm.te.min(tvm.tir.const(end, dtype), tvm.te.max(tvm.tir.const(start, dtype), x)) + clipb = lambda x: tvm.te.min(tvm.tir.const(dend, dtype), tvm.te.max(tvm.tir.const(dstart, dtype), x)) # If the range is just a single point, use the constant itself if start == end: - clipa = lambda x: tvm.const(start, dtype) + clipa = lambda x: tvm.tir.const(start, dtype) if dstart == dend: - clipb = lambda x: tvm.const(dstart, dtype) + clipb = lambda x: tvm.tir.const(dstart, dtype) # D are division results and M are modulo results - [D, M] = tvm.compute((end - start + 1, dend - dstart + 1), + [D, M] = te.compute((end - start + 1, dend - dstart + 1), lambda i, j: (div(clipa(A[i]), clipb(B[j])), mod(clipa(A[i]), clipb(B[j])))) - s = tvm.create_schedule([D.op, M.op]) + s = te.create_schedule([D.op, M.op]) f = tvm.build(s, [A, B, D, M], "llvm") # Fill input arrays with values @@ -525,10 +526,10 @@ def _show_info(): def test_llvm_fp_math(): def check_llvm_reciprocal(n): - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda i: tvm.div(1.0,(1e+37*A[i])), name='B') + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda i: te.div(1.0,(1e+37*A[i])), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.full((n,), 100, 'float32')) @@ -541,10 +542,10 @@ def check_llvm_reciprocal(n): check_llvm_reciprocal(16) def check_llvm_sigmoid(n): - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda i: tvm.sigmoid(A[i]), name='B') + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda i: te.sigmoid(A[i]), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.full((n,), -1000, 'float32')) @@ -559,11 +560,11 @@ def check_llvm_sigmoid(n): def test_dwarf_debug_information(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -634,19 +635,19 @@ def check_llvm_ir(): def test_llvm_shuffle(): - a = tvm.placeholder((8, ), 'int32') - b = tvm.placeholder((8, ), 'int32') - c = tvm.compute((8, ), lambda x: a[x] + b[7-x]) - sch = tvm.create_schedule(c.op) + a = te.placeholder((8, ), 'int32') + b = te.placeholder((8, ), 'int32') + c = te.compute((8, ), lambda x: a[x] + b[7-x]) + sch = te.create_schedule(c.op) def my_vectorize(stmt): def vectorizer(op): store = op.body - idx = tvm.tir.Ramp(tvm.const(0, 'int32'), tvm.const(1, 'int32'), 8) - all_ones = tvm.const(1, 'int32x8') + idx = tvm.tir.Ramp(tvm.tir.const(0, 'int32'), tvm.tir.const(1, 'int32'), 8) + all_ones = tvm.tir.const(1, 'int32x8') value = store.value - b_idx = tvm.tir.Shuffle([idx], [tvm.const(i, 'int32') for i in range(7, -1, -1)]) + b_idx = tvm.tir.Shuffle([idx], [tvm.tir.const(i, 'int32') for i in range(7, -1, -1)]) new_a = tvm.tir.Load('int32x8', value.a.buffer_var, idx, all_ones) new_b = tvm.tir.Load('int32x8', value.b.buffer_var, b_idx, all_ones) value = new_a + new_b diff --git a/tests/python/unittest/test_codegen_opencl.py b/tests/python/unittest/test_codegen_opencl.py index 3b9b4a73c52df..140e1f6fbdeaf 100644 --- a/tests/python/unittest/test_codegen_opencl.py +++ b/tests/python/unittest/test_codegen_opencl.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te target = 'opencl' def test_opencl_ternary_expression(): def check_if_then_else(ctx, n, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - true_value = tvm.const(1, dtype=dtype) - false_value = tvm.const(3, dtype=dtype) - max_lhs = tvm.const(2, dtype=dtype) - max_rhs = tvm.if_then_else(A[0] > 0, true_value, false_value) - C = tvm.compute((n,), lambda i: tvm.max(max_lhs, max_rhs), name='C') - s = tvm.create_schedule(C.op) - s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x")) + A = te.placeholder((n,), name='A', dtype=dtype) + true_value = tvm.tir.const(1, dtype=dtype) + false_value = tvm.tir.const(3, dtype=dtype) + max_lhs = tvm.tir.const(2, dtype=dtype) + max_rhs = tvm.tir.if_then_else(A[0] > 0, true_value, false_value) + C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name='C') + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) @@ -36,14 +37,14 @@ def check_if_then_else(ctx, n, dtype): fun(a, c) def check_select(ctx, n, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - true_value = tvm.const(1, dtype=dtype) - false_value = tvm.const(3, dtype=dtype) - max_lhs = tvm.const(2, dtype=dtype) + A = te.placeholder((n,), name='A', dtype=dtype) + true_value = tvm.tir.const(1, dtype=dtype) + false_value = tvm.tir.const(3, dtype=dtype) + max_lhs = tvm.tir.const(2, dtype=dtype) max_rhs = tvm.tir.Select(A[0] > 0, true_value, false_value) - C = tvm.compute((n,), lambda i: tvm.max(max_lhs, max_rhs), name='C') - s = tvm.create_schedule(C.op) - s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x")) + C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name='C') + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) @@ -68,11 +69,11 @@ def check_select(ctx, n, dtype): def test_opencl_inf_nan(): def check_inf_nan(ctx, n, value, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - inf_value = tvm.const(value, dtype=dtype) - C = tvm.compute((n,), lambda i: inf_value, name='C') - s = tvm.create_schedule(C.op) - s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x")) + A = te.placeholder((n,), name='A', dtype=dtype) + inf_value = tvm.tir.const(value, dtype=dtype) + C = te.compute((n,), lambda i: inf_value, name='C') + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) c = tvm.nd.empty((n,), A.dtype, ctx) diff --git a/tests/python/unittest/test_codegen_rocm.py b/tests/python/unittest/test_codegen_rocm.py index 73f76465649aa..f107e592d2d3d 100644 --- a/tests/python/unittest/test_codegen_rocm.py +++ b/tests/python/unittest/test_codegen_rocm.py @@ -15,23 +15,24 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import unittest -tx = tvm.thread_axis("threadIdx.x") -ty = tvm.thread_axis("threadIdx.y") -bx = tvm.thread_axis("blockIdx.x") -by = tvm.thread_axis("blockIdx.y") +tx = te.thread_axis("threadIdx.x") +ty = te.thread_axis("threadIdx.y") +bx = te.thread_axis("blockIdx.x") +by = te.thread_axis("blockIdx.y") @unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..") def test_rocm_cross_thread_reduction(): # based on the reduction tutorial - n = tvm.size_var("n") - m = tvm.size_var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") - s = tvm.create_schedule(B.op) + n = te.size_var("n") + m = te.size_var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") + s = te.create_schedule(B.op) ko, ki = s[B].split(B.op.reduce_axis[0], factor=16) BF = s.rfactor(B, ki) xo, xi = s[B].split(s[B].op.axis[0], factor=32) @@ -54,10 +55,10 @@ def test_rocm_cross_thread_reduction(): @unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..") def test_rocm_inf_nan(): def check_inf_nan(ctx, n, value, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - inf_value = tvm.const(value, dtype=dtype) - C = tvm.compute((n,), lambda i: inf_value, name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype=dtype) + inf_value = tvm.tir.const(value, dtype=dtype) + C = te.compute((n,), lambda i: inf_value, name='C') + s = te.create_schedule(C.op) s[C].bind(s[C].op.axis[0], tx) fun = tvm.build(s, [A, C], "rocm") a = tvm.nd.empty((n,), A.dtype, ctx) @@ -76,12 +77,12 @@ def check_inf_nan(ctx, n, value, dtype): @unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..") def test_rocm_reducition_binding(): - k = tvm.reduce_axis((0, 32), 'k') - A = tvm.placeholder((96, 32), name='A') - B = tvm.compute( (96,), lambda m: - tvm.sum(A[m, k], axis=k), + k = te.reduce_axis((0, 32), 'k') + A = te.placeholder((96, 32), name='A') + B = te.compute( (96,), lambda m: + te.sum(A[m, k], axis=k), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].reorder(B.op.reduce_axis[0], B.op.axis[0]) @@ -92,7 +93,7 @@ def test_rocm_reducition_binding(): def test_rocm_copy(): def check_rocm(dtype, n): - A = tvm.placeholder((n,), name='A', dtype=dtype) + A = te.placeholder((n,), name='A', dtype=dtype) ctx = tvm.rocm(0) a_np = np.random.uniform(size=(n,)).astype(A.dtype) a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(a_np) @@ -111,9 +112,9 @@ def test_rocm_vectorize_add(): num_thread = 8 def check_rocm(dtype, n, lanes): - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i]+tvm.const(1, A.dtype), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(xo, bx) s[B].bind(xi, tx) diff --git a/tests/python/unittest/test_codegen_static_init.py b/tests/python/unittest/test_codegen_static_init.py index 4d71cb3929a7c..5eb79e5391891 100644 --- a/tests/python/unittest/test_codegen_static_init.py +++ b/tests/python/unittest/test_codegen_static_init.py @@ -15,17 +15,18 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import ctypes import numpy as np def test_static_callback(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') ib = tvm.ir_builder.create() A = ib.buffer_ptr(Ab) - cp = tvm.thread_axis((0, 1), "cop") + cp = te.thread_axis((0, 1), "cop") finit = tvm.tir.StringImm("TVMBackendRunOnce") ib.scope_attr(cp, "coproc_uop_scope", finit) with ib.for_range(0, n, "i", for_type="parallel") as i: @@ -41,13 +42,13 @@ def test_static_callback(): def test_static_init(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') ib = tvm.ir_builder.create() - handle = tvm.call_intrin("handle", "tvm_static_handle") + handle = tvm.tir.call_intrin("handle", "tvm_static_handle") ib.emit( - tvm.call_packed("test_static_callback", handle, Ab)) + tvm.tir.call_packed("test_static_callback", handle, Ab)) @tvm.register_func("test_static_callback") def test_cb(sh, A): diff --git a/tests/python/unittest/test_codegen_vm_basic.py b/tests/python/unittest/test_codegen_vm_basic.py index 7f08c75366e6b..896b95d314815 100644 --- a/tests/python/unittest/test_codegen_vm_basic.py +++ b/tests/python/unittest/test_codegen_vm_basic.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def run_jit(fapi, check): @@ -32,9 +33,9 @@ def tvm_call_back_get_shape(shape0): print(shape0) assert shape0 == a.shape[0] - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), tvm.float32) - stmt = tvm.tir.Evaluate(tvm.call_packed("tvm_call_back_get_shape", Ab.shape[0])) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), "float32") + stmt = tvm.tir.Evaluate(tvm.tir.call_packed("tvm_call_back_get_shape", Ab.shape[0])) fapi = tvm.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True) fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) fapi = tvm.ir_pass.LowerIntrin(fapi, "stackvm") @@ -47,15 +48,15 @@ def tvm_stack_vm_print(*x): def test_stack_vm_loop(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') ib = tvm.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n - 1, "i") as i: A[i + 1] = A[i] + 1 - ib.emit(tvm.call_packed("tvm_stack_vm_print", i)) + ib.emit(tvm.tir.call_packed("tvm_stack_vm_print", i)) stmt = ib.get() fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) @@ -69,8 +70,8 @@ def check(f): def test_stack_vm_cond(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) ib = tvm.ir_builder.create() A = ib.buffer_ptr(Ab) @@ -93,9 +94,9 @@ def check(f): def test_vm_parallel(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') ib = tvm.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n, "i", for_type="parallel") as i: diff --git a/tests/python/unittest/test_codegen_vulkan.py b/tests/python/unittest/test_codegen_vulkan.py index d480a0f6ead8e..722a9ec6be157 100644 --- a/tests/python/unittest/test_codegen_vulkan.py +++ b/tests/python/unittest/test_codegen_vulkan.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import re import numpy as np @@ -28,18 +29,18 @@ def test_vector_comparison(): def check_correct_assembly(dtype): n = (1024,) - A = tvm.placeholder(n, dtype=dtype, name='A') - B = tvm.compute( + A = te.placeholder(n, dtype=dtype, name='A') + B = te.compute( A.shape, lambda i: tvm.tir.Select( - A[i] >= 0, A[i] + tvm.const(1, dtype), - tvm.const(0, dtype)), name='B') - s = tvm.create_schedule(B.op) + A[i] >= 0, A[i] + tvm.tir.const(1, dtype), + tvm.tir.const(0, dtype)), name='B') + s = te.create_schedule(B.op) (bx, tx) = s[B].split(s[B].op.axis[0], factor=128) (tx, vx) = s[B].split(tx, factor=4) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) s[B].vectorize(vx) f = tvm.build(s, [A, B], target) @@ -55,8 +56,8 @@ def check_correct_assembly(dtype): check_correct_assembly('float16') -tx = tvm.thread_axis("threadIdx.x") -bx = tvm.thread_axis("blockIdx.x") +tx = te.thread_axis("threadIdx.x") +bx = te.thread_axis("blockIdx.x") def test_vulkan_copy(): @@ -65,7 +66,7 @@ def check_vulkan(dtype, n): if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"): print("skip because vulkan is not enabled..") return - A = tvm.placeholder((n,), name='A', dtype=dtype) + A = te.placeholder((n,), name='A', dtype=dtype) ctx = tvm.vulkan(0) a_np = np.random.uniform(size=(n,)).astype(A.dtype) a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(a_np) @@ -87,9 +88,9 @@ def check_vulkan(dtype, n, lanes): if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"): print("skip because vulkan is not enabled..") return - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i]+tvm.const(1, A.dtype), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(xo, bx) s[B].bind(xi, tx) @@ -120,21 +121,21 @@ def worker(): if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"): print("skip because vulkan is not enabled..") return - A = tvm.placeholder((n,), name='A', dtype="float32") - B = tvm.placeholder((n,), name='B', dtype="float32") + A = te.placeholder((n,), name='A', dtype="float32") + B = te.placeholder((n,), name='B', dtype="float32") functions = [ - (lambda: tvm.compute((n,), lambda i: 2 * A[i] + 3 * B[i]), + (lambda: te.compute((n,), lambda i: 2 * A[i] + 3 * B[i]), lambda a, b: 2 * a + 3 * b), - (lambda: tvm.compute((n,), lambda i: A[i]+B[i]), + (lambda: te.compute((n,), lambda i: A[i]+B[i]), lambda a, b: a + b), - (lambda: tvm.compute((n,), lambda i: A[i]+2 * B[i]), + (lambda: te.compute((n,), lambda i: A[i]+2 * B[i]), lambda a, b: a + 2 * b), ] def build_f(f_ref): (C_f, ref) = f_ref C = C_f() - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=num_thread) s[C].bind(xo, bx) s[C].bind(xi, tx) diff --git a/tests/python/unittest/test_codegen_x86.py b/tests/python/unittest/test_codegen_x86.py index e17c6cf8cbcce..cdba774ff10d0 100644 --- a/tests/python/unittest/test_codegen_x86.py +++ b/tests/python/unittest/test_codegen_x86.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import re @@ -26,10 +27,10 @@ def test_fp16_to_fp32(): def fp16_to_fp32(target, width, match=None, not_match=None): elements = 64 - n = tvm.convert(elements) - A = tvm.placeholder((n, width), dtype="float16", name='A') - B = tvm.compute(A.shape, lambda *i: A(*i).astype("float32"), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(elements) + A = te.placeholder((n, width), dtype="float16", name='A') + B = te.compute(A.shape, lambda *i: A(*i).astype("float32"), name='B') + s = te.create_schedule(B.op) s[B].vectorize(s[B].op.axis[1]) f = tvm.build(s, [A, B], target) diff --git a/tests/python/unittest/test_container.py b/tests/python/unittest/test_container.py index f7ffd0288f1b1..84b26be6cbc10 100644 --- a/tests/python/unittest/test_container.py +++ b/tests/python/unittest/test_container.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import nd, relay from tvm.runtime import container as _container diff --git a/tests/python/unittest/test_custom_datatypes_mybfloat16.py b/tests/python/unittest/test_custom_datatypes_mybfloat16.py index cae481353d6bb..32f6e1865b63e 100644 --- a/tests/python/unittest/test_custom_datatypes_mybfloat16.py +++ b/tests/python/unittest/test_custom_datatypes_mybfloat16.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from ctypes import * import topi import tvm.tir.ir_pass as ir_pass @@ -56,14 +57,14 @@ def lower_datatypes_and_build(schedule, args): return tvm.build(flist[0], target=tgt) def test_bfloat_add_and_cast_1(): - X = tvm.placeholder((3, ), name="X") - Y = tvm.placeholder((3, ), name="Y") + X = te.placeholder((3, ), name="X") + Y = te.placeholder((3, ), name="Y") Z = topi.cast( topi.cast(X, dtype="custom[bfloat]16") + topi.cast(Y, dtype="custom[bfloat]16"), dtype="float") - s = tvm.create_schedule([Z.op]) + s = te.create_schedule([Z.op]) built_cast = lower_datatypes_and_build(s, [X,Y,Z]) ctx = tvm.context(tgt, 0) @@ -87,14 +88,14 @@ def test_bfloat_add_and_cast_1(): def test_bfloat_add_and_cast_2(): - X = tvm.placeholder((3, ), name="X") - Y = tvm.placeholder((3, ), name="Y") + X = te.placeholder((3, ), name="X") + Y = te.placeholder((3, ), name="Y") Z = topi.cast( topi.cast(X, dtype="custom[bfloat]16") + topi.cast(Y, dtype="custom[bfloat]16"), dtype="float") - s = tvm.create_schedule([Z.op]) + s = te.create_schedule([Z.op]) built_cast = lower_datatypes_and_build(s, [X,Y,Z]) ctx = tvm.context(tgt, 0) @@ -122,14 +123,14 @@ def test_bfloat_add_and_cast_2(): def test_bfloat_add_and_cast_FloatImm(): - X = tvm.placeholder((3, ), name="X") + X = te.placeholder((3, ), name="X") Z = topi.cast( topi.add( topi.cast(X, dtype="custom[bfloat]16"), tvm.tir.FloatImm("custom[bfloat]16", 1.5)), dtype="float") - s = tvm.create_schedule([Z.op]) + s = te.create_schedule([Z.op]) built_cast = lower_datatypes_and_build(s, [X,Z]) ctx = tvm.context(tgt, 0) diff --git a/tests/python/unittest/test_graph_tuner_core.py b/tests/python/unittest/test_graph_tuner_core.py index 27e077f5319ca..a7be18a5a2d30 100644 --- a/tests/python/unittest/test_graph_tuner_core.py +++ b/tests/python/unittest/test_graph_tuner_core.py @@ -24,6 +24,7 @@ import copy import numpy as np import tvm +from tvm import te import tvm.relay.testing from tvm import autotvm @@ -79,20 +80,20 @@ def _create_data(target, dshape, dtype, layout): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) ltf_records.append((ms_input, ms_output)) ltf_keys = [] - ltf_arg = [tvm.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c"] + ltf_arg = [te.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c"] ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform') ltf_keys.append(ltf_wkl) - ltf_arg = [tvm.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c"] + ltf_arg = [te.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c"] ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform') ltf_keys.append(ltf_wkl) - ltf_arg = [tvm.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c"] + ltf_arg = [te.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c"] ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform') ltf_keys.append(ltf_wkl) @@ -314,7 +315,7 @@ def test_many_sub_graphs(): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) @@ -397,7 +398,7 @@ def test_tuple(): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) @@ -497,7 +498,7 @@ def test_triangle_block(): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) diff --git a/tests/python/unittest/test_graph_tuner_utils.py b/tests/python/unittest/test_graph_tuner_utils.py index 112c5b8a70593..f620accb17195 100644 --- a/tests/python/unittest/test_graph_tuner_utils.py +++ b/tests/python/unittest/test_graph_tuner_utils.py @@ -21,6 +21,7 @@ # https://github.com/apache/incubator-tvm/issues/3240 # TODO: restore the file name after this issue is resolved. import tvm +from tvm import te from tvm import autotvm, relay from tvm.relay.testing import resnet diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 311dae803dbaa..bc8483f37a834 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. import tvm, inspect, sys, traceback, numpy, pytest, types, os + +from tvm import te from tvm.contrib import util from tvm.hybrid import script from tvm.hybrid.runtime import HYBRID_GLOBALS @@ -31,9 +33,9 @@ def tvm_val_2_py_val(val): op = None if sch is None: - outs = func(*tuple(tvm.convert(i) if isinstance(i, list) else i for i in args)) + outs = func(*tuple(tvm.runtime.convert(i) if isinstance(i, list) else i for i in args)) op = outs[0].op if isinstance(outs, list) else outs.op - sch = tvm.create_schedule(op) + sch = te.create_schedule(op) else: assert outs is not None assert isinstance(outs, list) @@ -42,7 +44,7 @@ def tvm_val_2_py_val(val): emu_args = [] nd_args = [] for i in args: - if isinstance(i, tvm.tensor.Tensor): + if isinstance(i, te.tensor.Tensor): shape = [tvm_val_2_py_val(j) for j in i.shape] emu_args.append(numpy.random.randn(*shape).astype(i.dtype)) nd_args.append(tvm.nd.array(emu_args[-1], ctx)) @@ -53,7 +55,7 @@ def tvm_val_2_py_val(val): assert isinstance(i, list) emu_args.append(numpy.array(i)) - compile_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.tir.Var))] + \ + compile_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))] + \ (outs if isinstance(outs, list) else [outs]) module = tvm.build(sch, compile_args, @@ -76,8 +78,8 @@ def tvm_val_2_py_val(val): for nd, np in zip(out_tensors, ref_data): tvm.testing.assert_allclose(nd.asnumpy(), np, rtol=1e-5, atol=1e-5) - module_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.tir.Var))] - module_outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + module_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))] + module_outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs h_module = tvm.hybrid.build(sch, module_args, module_outs) return h_module, module_args, module_outs @@ -98,10 +100,10 @@ def outer_product(n, m, a, b): #Test global function #Test bridge between frontend and backend def test_outer_product(): - n = tvm.size_var('n') - m = tvm.size_var('m') - a = tvm.placeholder((n, ), name='a') - b = tvm.placeholder((m, ), name='b') + n = te.size_var('n') + m = te.size_var('m') + a = te.placeholder((n, ), name='a') + b = te.placeholder((m, ), name='b') try: c = outer_product(n, m, a, b) @@ -167,8 +169,8 @@ def fanout(n, a): b[i] = sigma return b - n = tvm.size_var('n') - a = tvm.placeholder((n, ), 'float32', name='a') + n = te.size_var('n') + a = te.placeholder((n, ), 'float32', name='a') try: b = fanout(n, a) ir = b.op.body @@ -249,9 +251,9 @@ def looptype(a, b, c): f[k] = c[k] return d, e, f - a = tvm.placeholder((16, ), name='a', dtype='int32') - b = tvm.placeholder((16, ), name='b', dtype='int32') - c = tvm.placeholder((16, ), name='c', dtype='int32') + a = te.placeholder((16, ), name='a', dtype='int32') + b = te.placeholder((16, ), name='b', dtype='int32') + c = te.placeholder((16, ), name='c', dtype='int32') try: d, e, f = looptype(a, b, c) ir = d.op.body @@ -282,7 +284,7 @@ def if_then_else(a): b[i] = -1 if i % 2 == 0 else 1 return b, c - a = tvm.placeholder((10, ), dtype='int32', name='a') + a = te.placeholder((10, ), dtype='int32', name='a') func, ins, outs = run_and_check(if_then_else, [a]) run_and_check(func, ins, outs=outs) @@ -326,8 +328,8 @@ def vec_add(a, b): c[tx] = a[tx] + b[tx] return c - a = tvm.placeholder((1000, ), dtype='float32', name='a') - b = tvm.placeholder((1000, ), dtype='float32', name='b') + a = te.placeholder((1000, ), dtype='float32', name='a') + b = te.placeholder((1000, ), dtype='float32', name='b') func, ins, outs = run_and_check(vec_add, [a, b], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -339,8 +341,8 @@ def raw(a, b): return c c = raw(a, b) - sch = tvm.create_schedule(c.op) - x = tvm.thread_axis('threadIdx.x') + sch = te.create_schedule(c.op) + x = te.thread_axis('threadIdx.x') sch[c].bind(c.op.axis[0], x) func, ins, outs = run_and_check(raw, [a, b], sch=sch, outs=[c], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -360,9 +362,9 @@ def foo(a): return c - a = tvm.placeholder((8, 4), 'float32') + a = te.placeholder((8, 4), 'float32') c = foo(a) - s = tvm.create_schedule(c.op) + s = te.create_schedule(c.op) ir = tvm.lower(s, [a, c], simple_mode=True) assert not isinstance(ir, tvm.tir.AttrStmt) func, ins, outs = run_and_check(foo, [a], target='cuda') @@ -379,7 +381,7 @@ def max_threads(a): b[i * m + j] = a[i * m + j] + a[i * m + j] return b - a = tvm.placeholder((10000, ), 'float32') + a = te.placeholder((10000, ), 'float32') with tvm.target.create('cuda'): func, ins, outs = run_and_check(max_threads, [a], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -399,9 +401,9 @@ def intrin_real(a): b[7] = max(a[5], a[6]) return b - a8 = tvm.placeholder((8, ), dtype='float32', name='a') + a8 = te.placeholder((8, ), dtype='float32', name='a') b8 = intrin_real(a8) - sch = tvm.create_schedule(b8.op) + sch = te.create_schedule(b8.op) func = tvm.build(sch, [a8, b8]) assert func a = numpy.arange(2, 10).astype('float32') @@ -417,9 +419,9 @@ def intrin_int(a): b[0] = popcount(a[0]) return b - a1 = tvm.placeholder((1, ), dtype='int32') + a1 = te.placeholder((1, ), dtype='int32') b1 = intrin_int(a1) - sch = tvm.create_schedule(b1.op) + sch = te.create_schedule(b1.op) func = tvm.build(sch, [a1, b1]) assert func a = numpy.array([114514]).astype('int32') @@ -443,7 +445,7 @@ def blur(a): b[i-2, j-2] = s / 9.0 return b - a = tvm.placeholder((32, 32), 'float32', 'a') + a = te.placeholder((32, 32), 'float32', 'a') func, ins, outs = run_and_check(blur, [a]) run_and_check(func, ins, outs=outs) @@ -455,8 +457,8 @@ def triangle(a, b): c[i, j] = a[i] * b[j] return c - a = tvm.placeholder((10, ), dtype='float32', name='a') - b = tvm.placeholder((10, ), dtype='float32', name='b') + a = te.placeholder((10, ), dtype='float32', name='a') + b = te.placeholder((10, ), dtype='float32', name='b') func, ins, outs = run_and_check(triangle, [a, b]) run_and_check(func, ins, outs=outs) @@ -474,9 +476,9 @@ def blur2d(a): b[i, j] = (ha[0, j] + ha[1, j] + ha[2, j]) / 9.0 return b - a = tvm.placeholder((32, 32), 'float32', 'a') + a = te.placeholder((32, 32), 'float32', 'a') b = blur2d(a) - sch = tvm.create_schedule(b.op) + sch = te.create_schedule(b.op) func, ins, outs = run_and_check(blur2d, [a]) run_and_check(func, ins, outs=outs) @@ -494,8 +496,8 @@ def share_vec_add(a, b): c[i] = shared[i] + local[i] return c - a = tvm.placeholder((256, ), dtype='float32', name='a') - b = tvm.placeholder((256, ), dtype='float32', name='b') + a = te.placeholder((256, ), dtype='float32', name='a') + b = te.placeholder((256, ), dtype='float32', name='b') c = share_vec_add(a, b) func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -510,11 +512,11 @@ def upstream(a): b[i] = a[i] * i return b - a = tvm.placeholder((20, ), 'float32') - b = tvm.placeholder((20, ), 'float32') - c = tvm.compute((20, ), lambda x: a[x] + b[x]) + a = te.placeholder((20, ), 'float32') + b = te.placeholder((20, ), 'float32') + c = te.compute((20, ), lambda x: a[x] + b[x]) d = upstream(c) - sch = tvm.create_schedule([c.op, d.op]) + sch = te.create_schedule([c.op, d.op]) ir = tvm.lower(sch, [a, b, d], simple_mode=True) func = tvm.build(sch, [a, b, d]) assert(func) @@ -541,11 +543,11 @@ def downstream(a): return b - a = tvm.placeholder((20, ), 'float32') + a = te.placeholder((20, ), 'float32') b = downstream(a) - c = tvm.compute((20, ), lambda x: b[x] + 1.0) + c = te.compute((20, ), lambda x: b[x] + 1.0) - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) module = tvm.build(sch, [a, c]) assert module @@ -567,10 +569,10 @@ def add_something(a, b): c[i] = a[i] + b return c - a = tvm.placeholder((11, ), dtype='int32', name='a') - b = tvm.const(11, 'int32') + a = te.placeholder((11, ), dtype='int32', name='a') + b = tvm.tir.const(11, 'int32') c = add_something(a, b) - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) module = tvm.build(sch, [a, c], 'llvm') assert(module) @@ -603,10 +605,10 @@ def kernel_b(b, a): c[i, j] = a[i * 4 + j] * b[i, j] return c - a = tvm.placeholder((16, ), 'int32') + a = te.placeholder((16, ), 'int32') b, c = kernel_a(a) d = kernel_b(c, b) - sch = tvm.create_schedule(d.op) + sch = te.create_schedule(d.op) module = tvm.build(sch, [a, d]) assert module @@ -632,8 +634,8 @@ def foo(a, b): d[i, j] = c[i, j] + i * j return d - a = tvm.placeholder((10, ), name='a') - b = tvm.placeholder((10, ), name='b') + a = te.placeholder((10, ), name='a') + b = te.placeholder((10, ), name='b') func, ins, outs = run_and_check(foo, [a, b]) run_and_check(func, ins, outs=outs) @@ -648,7 +650,7 @@ def foo(a): else: b[i] = 0.0 return b - a = tvm.placeholder((10, ), name='a') + a = te.placeholder((10, ), name='a') func, ins, outs = run_and_check(foo, [a]) run_and_check(func, ins, outs=outs) @@ -668,7 +670,7 @@ def foo(a, b): return c, d - a = tvm.placeholder((2, 5), name='a', dtype='float32') + a = te.placeholder((2, 5), name='a', dtype='float32') b = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]] func, ins, outs = run_and_check(foo, [a, b]) run_and_check(func, ins, outs=outs) @@ -683,10 +685,10 @@ def goo(a, b): else: c[i - len_b] = a[i - len_b] + b[i - len_b] return c - a = tvm.placeholder((5, ), name='a', dtype='int32') + a = te.placeholder((5, ), name='a', dtype='int32') b = [1, 2, 3, 4, 5] - c = goo(a, tvm.convert(b)) - sch = tvm.create_schedule(c.op) + c = goo(a, tvm.runtime.convert(b)) + sch = te.create_schedule(c.op) func, ins, outs = run_and_check(goo, [a, b]) run_and_check(func, ins, outs=outs) @@ -700,7 +702,7 @@ def hoo(a, b): d += a[i] + b[j] c[i] = d return c - a = tvm.placeholder((5, ), name='a', dtype='int32') + a = te.placeholder((5, ), name='a', dtype='int32') b = [1, 2, 3, 4, 5] func, ins, outs = run_and_check(hoo, [a, b]) run_and_check(func, ins, outs=outs) @@ -713,14 +715,14 @@ def outer_product(a, b): for j in range(64): c[i, j] = a[i] * b[j] return c - a = tvm.placeholder((64,), name='a', dtype='float32') - b = tvm.placeholder((64,), name='b', dtype='float32') + a = te.placeholder((64,), name='a', dtype='float32') + b = te.placeholder((64,), name='b', dtype='float32') c = outer_product(a, b) # Test perfect loop split # Test loop reorder # Test loop annotation - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) i, j = c.op.axis io, ii = sch[c].split(i, 4) sch[c].parallel(ii) @@ -749,7 +751,7 @@ def outer_product(a, b): run_and_check(func, ins, outs=outs) # Test fuse - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) sch[c].fuse(c.op.axis[0], c.op.axis[1]) ir = tvm.lower(sch, [a, b, c], simple_mode=True) assert isinstance(ir, tvm.tir.ProducerConsumer) @@ -762,7 +764,7 @@ def outer_product(a, b): run_and_check(func, ins, outs=outs) # Test imperfect loop split - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) sch[c].split(c.op.axis[0], 3) ir = tvm.lower(sch, [a, b, c], simple_mode=True) func, ins, outs = run_and_check(outer_product, [a, b], sch=sch, outs=[c]) @@ -784,7 +786,7 @@ def add_something(a): c[i] = a[i] + constant_list[1][const_value] return c - a = tvm.placeholder((n, ), dtype='int32', name='a') + a = te.placeholder((n, ), dtype='int32', name='a') func, ins, outs = run_and_check(add_something, [a]) run_and_check(func, ins, outs=outs) @@ -801,12 +803,12 @@ def sum_array(inputs): n = 5 inputs = [] for i in range(n): - inputs.append(tvm.placeholder((10,), name='t%s' % i, dtype='float32')) + inputs.append(te.placeholder((10,), name='t%s' % i, dtype='float32')) - out = sum_array(tvm.convert(inputs)) + out = sum_array(tvm.runtime.convert(inputs)) assert len(out.op.inputs) == n - sch = tvm.create_schedule(out.op) + sch = te.create_schedule(out.op) mod = tvm.build(sch, inputs + [out], target='llvm') assert mod diff --git a/tests/python/unittest/test_ir_builder.py b/tests/python/unittest/test_ir_builder.py index 5679625e7799e..689f6fa32839d 100644 --- a/tests/python/unittest/test_ir_builder.py +++ b/tests/python/unittest/test_ir_builder.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_for(): ib = tvm.ir_builder.create() - n = tvm.size_var("n") + n = te.size_var("n") A = ib.allocate("float32", n, name="A", scope="global") with ib.for_range(0, n, name="i") as i: A[i] = A[i] + 1 @@ -39,9 +40,9 @@ def test_for(): def test_if(): ib = tvm.ir_builder.create() - n = tvm.size_var("n") + n = te.size_var("n") A = ib.pointer("float32", name="A") - tmod = tvm.truncmod + tmod = tvm.tir.truncmod with ib.for_range(0, n, name="i") as i: with ib.if_scope(tmod(i, 2) == 0): A[i] = A[i] + 1 @@ -58,9 +59,9 @@ def test_if(): assert body.else_case.index.value == 0 def test_prefetch(): - A = tvm.placeholder((10, 20), name="A") + A = te.placeholder((10, 20), name="A") ib = tvm.ir_builder.create() - n = tvm.size_var("n") + n = te.size_var("n") with ib.for_range(0, n, name="i") as i: ib.emit( @@ -74,8 +75,8 @@ def test_prefetch(): def test_cpu(): n = 1024 dtype = "float32" - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') def test_device_ir(A, B, C): n = A.shape[0] max_threads = 8 @@ -87,9 +88,9 @@ def test_device_ir(A, B, C): Cptr[i] = Aptr[i] + Bptr[i] body = ib.get() return body - C = tvm.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), + C = te.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), name="vector_add", dtype=dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def check_target(target): if not tvm.runtime.enabled(target): return @@ -105,18 +106,18 @@ def check_target(target): check_target("llvm") def test_gpu(): - n = tvm.size_var('n') + n = te.size_var('n') dtype = "float32" - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - idxd = tvm.indexdiv + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + idxd = tvm.tir.indexdiv def test_device_ir(A, B, C): n = A.shape[0] max_threads = 32 ib = tvm.ir_builder.create() - bx = tvm.thread_axis("blockIdx.x") - tx = tvm.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") ib.scope_attr(bx, "thread_extent", idxd(n+max_threads-1, max_threads)) ib.scope_attr(tx, "thread_extent", max_threads) idx = bx.var * max_threads + tx.var @@ -127,11 +128,11 @@ def test_device_ir(A, B, C): Cptr[idx] = Aptr[idx] + Bptr[idx] body = ib.get() return body - C = tvm.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), + C = te.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), name="vector_add", dtype=dtype) - s = tvm.create_schedule(C.op) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + s = te.create_schedule(C.op) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def check_target(target): n = 1024 if not tvm.runtime.enabled(target): diff --git a/tests/python/unittest/test_lang_basic.py b/tests/python/unittest/test_lang_basic.py index 3b1431a54d36c..cd532a0db77fc 100644 --- a/tests/python/unittest/test_lang_basic.py +++ b/tests/python/unittest/test_lang_basic.py @@ -15,13 +15,14 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_const(): - x = tvm.const(1, "int32") + x = tvm.tir.const(1, "int32") print(x.dtype) - assert x.dtype == tvm.int32 + assert x.dtype == "int32" assert isinstance(x, tvm.tir.IntImm) @@ -29,28 +30,28 @@ def test_scalar_dtype_inference(): for data in [True, np.bool(1), np.uint8(1), np.uint16(1), np.uint32(1), np.uint64(1), np.int8(1), np.int16(1), np.int32(1), np.int64(1), np.float16(1), np.float32(1), np.float64(1)]: - assert tvm.const(data).dtype == str(np.array(data).dtype) - assert tvm.const(1).dtype == 'int32' - assert tvm.const(1.0).dtype == 'float32' + assert tvm.tir.const(data).dtype == str(np.array(data).dtype) + assert tvm.tir.const(1).dtype == 'int32' + assert tvm.tir.const(1.0).dtype == 'float32' for data in [True, np.bool(1), np.uint8(1), np.uint16(1), np.uint32(1), np.uint64(1), np.int8(1), np.int16(1), np.int32(1), np.int64(1), np.float16(1), np.float32(1), np.float64(1)]: - assert tvm.convert(data).dtype == str(np.array(data).dtype) - assert tvm.convert(1).dtype == 'int32' - assert tvm.convert(1.0).dtype == 'float32' + assert tvm.runtime.convert(data).dtype == str(np.array(data).dtype) + assert tvm.runtime.convert(1).dtype == 'int32' + assert tvm.runtime.convert(1.0).dtype == 'float32' def test_make(): - x = tvm.const(1, "int32") - y = tvm.var("x") + x = tvm.tir.const(1, "int32") + y = te.var("x") z = x + y - assert isinstance(tvm.max(x, y), tvm.tir.Max) - assert isinstance(tvm.min(x, y), tvm.tir.Min) + assert isinstance(tvm.te.max(x, y), tvm.tir.Max) + assert isinstance(tvm.te.min(x, y), tvm.tir.Min) def test_ir(): - x = tvm.const(1, "int32") + x = tvm.tir.const(1, "int32") y = tvm.tir.IntImm('int32', 1) z = x + y stmt = tvm.tir.Evaluate(z) @@ -58,22 +59,22 @@ def test_ir(): def test_ir2(): - x = tvm.var("n") - a = tvm.var("array", tvm.handle) + x = te.var("n") + a = te.var("array", "handle") st = tvm.tir.Store(a, x + 1, 1) assert isinstance(st, tvm.tir.Store) assert(st.buffer_var == a) def test_let(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') stmt = tvm.tir.LetStmt( x, 10, tvm.tir.Evaluate(x + 1)); def test_cast(): - x = tvm.var('x', dtype="float32") + x = te.var('x', dtype="float32") y = x.astype("int32") z = x.astype("float32x4") assert isinstance(y, tvm.tir.Cast) @@ -82,13 +83,13 @@ def test_cast(): def test_attr(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') stmt = tvm.tir.AttrStmt( y, "stride", 10, tvm.tir.Evaluate(x + 1)); assert stmt.node == y - a = tvm.convert(1) + a = tvm.runtime.convert(1) assert a.value == 1 try: a.no_field @@ -98,78 +99,78 @@ def test_attr(): def test_basic(): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') c = a + b assert str(c) == '(%s + %s)' % (a.name, b.name) def test_stmt(): x = tvm.tir.Evaluate(0) - tvm.tir.For(tvm.var('i'), 0, 1, + tvm.tir.For(te.var('i'), 0, 1, tvm.tir.For.Serial, 0, x) def test_dir(): - x = tvm.var('x') + x = te.var('x') dir(x) def test_dtype(): - x = tvm.var('x') + x = te.var('x') assert x.dtype == 'int32' - y = tvm.var('y') + y = te.var('y') assert (x > y).dtype == 'bool' def test_any(): - x = tvm.var('x') - y = tvm.var('y') - z = tvm.var('z') + x = te.var('x') + y = te.var('y') + z = te.var('z') try: t = x or x assert False except ValueError: pass try: - tvm.any() + tvm.tir.any() assert False except ValueError: pass - assert str(tvm.any(x < y)) == '(%s < %s)' % (x.name, y.name) - assert str(tvm.any(x < y, x > z)) == '((%s < %s) || (%s > %s))' % ( + assert str(tvm.tir.any(x < y)) == '(%s < %s)' % (x.name, y.name) + assert str(tvm.tir.any(x < y, x > z)) == '((%s < %s) || (%s > %s))' % ( x.name, y.name, x.name, z.name) - assert str(tvm.any(x < y, y > z + 1, x < z * 2)) == \ + assert str(tvm.tir.any(x < y, y > z + 1, x < z * 2)) == \ '(((%s < %s) || (%s > (%s + 1))) || (%s < (%s*2)))' % ( x.name, y.name, y.name, z.name, x.name, z.name) def test_all(): - x = tvm.var('x') - y = tvm.var('y') - z = tvm.var('z') + x = te.var('x') + y = te.var('y') + z = te.var('z') try: t = x and x assert False except ValueError: pass try: - tvm.all() + tvm.tir.all() assert False except ValueError: pass - assert str(tvm.all(x < y)) == '(%s < %s)' % (x.name, y.name) - assert str(tvm.all(x < y, x > z)) == '((%s < %s) && (%s > %s))' % ( + assert str(tvm.tir.all(x < y)) == '(%s < %s)' % (x.name, y.name) + assert str(tvm.tir.all(x < y, x > z)) == '((%s < %s) && (%s > %s))' % ( x.name, y.name, x.name, z.name) - assert str(tvm.all(x < y, y > z + 1, x < z * 2)) == \ + assert str(tvm.tir.all(x < y, y > z + 1, x < z * 2)) == \ '(((%s < %s) && (%s > (%s + 1))) && (%s < (%s*2)))' % ( x.name, y.name, y.name, z.name, x.name, z.name) def test_bitwise(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') assert str(x << y) == 'shift_left(x, y)' assert str(x >> y) == 'shift_right(x, y)' assert str(x & y) == 'bitwise_and(x, y)' @@ -182,12 +183,12 @@ def test_bitwise(): assert str(10 << x) == 'shift_left(10, x)' assert str(10 % x) == 'floormod(10, x)' assert str(~x) == 'bitwise_not(x)' - assert(tvm.const(1, "int8x2") >> 1).dtype == "int8x2" - assert(x >> tvm.const(1, "int32x2")).dtype == "int32x2" - assert(tvm.var("z", "int8x2") << tvm.const(1, "int8x2")).dtype == "int8x2" + assert(tvm.tir.const(1, "int8x2") >> 1).dtype == "int8x2" + assert(x >> tvm.tir.const(1, "int32x2")).dtype == "int32x2" + assert(te.var("z", "int8x2") << tvm.tir.const(1, "int8x2")).dtype == "int8x2" def test_float_bitwise(): - t = tvm.const(1.5,dtype='float32') + t = tvm.tir.const(1.5,dtype='float32') for test in [lambda lhs, rhs : lhs << rhs, lambda lhs, rhs : lhs >> rhs, lambda lhs, rhs : lhs | rhs, @@ -206,20 +207,20 @@ def test_float_bitwise(): pass def test_isnan(): - x = tvm.var('x', 'float32') - assert str(tvm.isnan(x)) == 'isnan(x)' - assert str(tvm.isnan(x).dtype) == 'bool' - y = tvm.var('y', 'float16') - assert str(tvm.isnan(y)) == 'isnan(float32(y))' - z = tvm.var('z', 'int32') - assert str(tvm.isnan(z)) == '(bool)0' - k = tvm.var('k', 'int8x2') - assert str(tvm.isnan(k).dtype) == 'uint1x2' + x = te.var('x', 'float32') + assert str(tvm.tir.isnan(x)) == 'isnan(x)' + assert str(tvm.tir.isnan(x).dtype) == 'bool' + y = te.var('y', 'float16') + assert str(tvm.tir.isnan(y)) == 'isnan(float32(y))' + z = te.var('z', 'int32') + assert str(tvm.tir.isnan(z)) == '(bool)0' + k = te.var('k', 'int8x2') + assert str(tvm.tir.isnan(k).dtype) == 'uint1x2' def test_equality(): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') c = (a == b) assert not c d = (c != c) diff --git a/tests/python/unittest/test_lang_buffer.py b/tests/python/unittest/test_lang_buffer.py index 7568814fbfe69..6fff8c828adf7 100644 --- a/tests/python/unittest/test_lang_buffer.py +++ b/tests/python/unittest/test_lang_buffer.py @@ -15,25 +15,26 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.tir import Buffer import numpy as np def test_buffer(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - Ab = tvm.decl_buffer((m, n), tvm.float32) - Bb = tvm.decl_buffer((n, l), tvm.float32) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + Ab = tvm.tir.decl_buffer((m, n), "float32") + Bb = tvm.tir.decl_buffer((n, l), "float32") assert isinstance(Ab, tvm.tir.Buffer) - assert Ab.dtype == tvm.float32 + assert Ab.dtype == "float32" assert tuple(Ab.shape) == (m, n) def test_buffer_access_ptr(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1]) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1]) aptr = Ab.access_ptr("rw") assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m) assert aptr.args[0].dtype == Ab.dtype @@ -43,59 +44,59 @@ def test_buffer_access_ptr(): def test_buffer_access_ptr_offset(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32") aptr = Ab.access_ptr("rw", offset=100) offset = tvm.ir_pass.Simplify(aptr.args[2]) assert tvm.ir_pass.Equal(offset, 100) assert aptr.args[4].value == Buffer.READ | Buffer.WRITE - v = tvm.size_var('int32') + v = te.size_var('int32') aptr = Ab.access_ptr("rw", offset=100 + 100 + v) offset = tvm.ir_pass.Simplify(aptr.args[2]) assert tvm.ir_pass.Equal(offset, 200 + v) assert aptr.args[4].value == Buffer.READ | Buffer.WRITE - aptr = Ab.access_ptr("rw", offset=tvm.call_extern('int32', "test_call", 100 + 100 + v)) + aptr = Ab.access_ptr("rw", offset=tvm.tir.call_extern('int32', "test_call", 100 + 100 + v)) offset = tvm.ir_pass.Simplify(aptr.args[2]) - assert tvm.ir_pass.Equal(offset, tvm.call_extern('int32', "test_call", 200 + v)) + assert tvm.ir_pass.Equal(offset, tvm.tir.call_extern('int32', "test_call", 200 + v)) assert aptr.args[4].value == Buffer.READ | Buffer.WRITE def test_buffer_access_ptr_extent(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32") aptr = Ab.access_ptr("rw") assert tvm.ir_pass.Equal(aptr.args[3], m * n) aptr = Ab.access_ptr("rw", offset=100) assert tvm.ir_pass.Equal(aptr.args[3], m * n - 100) - Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1]) + Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1]) aptr = Ab.access_ptr("rw", offset=100) assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100) def test_buffer_vload(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32, elem_offset=100) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32", elem_offset=100) load = Ab.vload([2, 3]) offset = tvm.ir_pass.Simplify(load.index) assert tvm.ir_pass.Equal(offset, n * 2 + 103) def test_buffer_index_merge_mult_mod(): - m = tvm.size_var('m') - n = tvm.size_var('n') - s = tvm.size_var('s') - k0 = tvm.size_var('k0') - k1 = tvm.size_var('k1') - A = tvm.decl_buffer((m, n), tvm.float32) - A_stride = tvm.decl_buffer((m, n), tvm.float32, strides=(s, 1)) + m = te.size_var('m') + n = te.size_var('n') + s = te.size_var('s') + k0 = te.size_var('k0') + k1 = te.size_var('k1') + A = tvm.tir.decl_buffer((m, n), "float32") + A_stride = tvm.tir.decl_buffer((m, n), "float32", strides=(s, 1)) def assert_simplified_equal(index_simplified, index_direct): assert tvm.ir_pass.Equal(index_simplified, index_direct),\ "index_simplified=%s, index_direct=%s" %(index_simplified, index_direct) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod # Test Case1 index_simplified = A_stride.vload( (idxd(idxm(k0, k1), s), idxm(idxm(k0, k1), s) + idxd(k0, k1) * k1)) @@ -123,18 +124,18 @@ def assert_simplified_equal(index_simplified, index_direct): def test_buffer_broadcast(): - m0, m1, m2 = tvm.size_var("m0"), tvm.size_var("m1"), tvm.size_var("m2") - n0, n1, n2 = tvm.size_var("n0"), tvm.size_var("n1"), tvm.size_var("n2") - o0, o1, o2 = tvm.size_var("o0"), tvm.size_var("o1"), tvm.size_var("o2") + m0, m1, m2 = te.size_var("m0"), te.size_var("m1"), te.size_var("m2") + n0, n1, n2 = te.size_var("n0"), te.size_var("n1"), te.size_var("n2") + o0, o1, o2 = te.size_var("o0"), te.size_var("o1"), te.size_var("o2") - A = tvm.placeholder((m0, m1, m2), name='A') - B = tvm.placeholder((n0, n1, n2), name='B') + A = te.placeholder((m0, m1, m2), name='A') + B = te.placeholder((n0, n1, n2), name='B') - C = tvm.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') + C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') - Ab = tvm.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") - Bb = tvm.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") - s = tvm.create_schedule(C.op) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") + s = te.create_schedule(C.op) def check(): if not tvm.runtime.enabled("llvm"): @@ -151,18 +152,18 @@ def check(): def test_buffer_broadcast_expr(): - n0, m0, x = tvm.size_var('n0'), tvm.size_var('m0'), tvm.size_var('x') - n1, m1 = tvm.size_var('n1'), tvm.size_var('m1') - o0, o1 = tvm.size_var('o0'), tvm.size_var('o1') - - A = tvm.placeholder((m0, n0), name='A') - B = tvm.placeholder((m1, n1), name='B') - C = tvm.compute((o0, o1//x), lambda i, j: A[i, j] + B[i, j], name='C') - - Ab = tvm.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") - Bb = tvm.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") - Cc = tvm.decl_buffer(C.shape, C.dtype, name="Cc", buffer_type="auto_broadcast") - s = tvm.create_schedule(C.op) + n0, m0, x = te.size_var('n0'), te.size_var('m0'), te.size_var('x') + n1, m1 = te.size_var('n1'), te.size_var('m1') + o0, o1 = te.size_var('o0'), te.size_var('o1') + + A = te.placeholder((m0, n0), name='A') + B = te.placeholder((m1, n1), name='B') + C = te.compute((o0, o1//x), lambda i, j: A[i, j] + B[i, j], name='C') + + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") + Cc = tvm.tir.decl_buffer(C.shape, C.dtype, name="Cc", buffer_type="auto_broadcast") + s = te.create_schedule(C.op) def check_stride(): if not tvm.runtime.enabled("llvm"): diff --git a/tests/python/unittest/test_lang_constructor.py b/tests/python/unittest/test_lang_constructor.py index 797a04fa45740..9edaf92d0db76 100644 --- a/tests/python/unittest/test_lang_constructor.py +++ b/tests/python/unittest/test_lang_constructor.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_expr_constructor(): x = tvm.tir.Var("xx", "float32") @@ -47,8 +48,8 @@ def test_expr_constructor(): assert x.dtype == "float32" assert x.value.value == 1 - a = tvm.const(1.0, dtype="float32") - b = tvm.var("x", dtype="float32") + a = tvm.tir.const(1.0, dtype="float32") + b = te.var("x", dtype="float32") for cls in [tvm.tir.Add, tvm.tir.Sub, @@ -67,8 +68,8 @@ def test_expr_constructor(): assert x.b.same_as(b) - a = tvm.convert(tvm.var("x") > 1) - b = tvm.convert(tvm.var("x") == 1) + a = tvm.runtime.convert(te.var("x") > 1) + b = tvm.runtime.convert(te.var("x") == 1) for cls in [tvm.tir.And, tvm.tir.Or]: @@ -87,7 +88,7 @@ def test_expr_constructor(): assert x.false_value == b assert x.condition == a - buffer_var = tvm.var("x", dtype="handle") + buffer_var = te.var("x", dtype="handle") x = tvm.tir.Load("float32", buffer_var, 1, a) assert isinstance(x, tvm.tir.Load) assert x.dtype == "float32" @@ -120,7 +121,7 @@ def test_expr_constructor(): assert x.func == None assert x.value_index == 0 - v = tvm.var("aa") + v = te.var("aa") x = tvm.tir.Let(v, 1, v) assert x.var == v assert x.value.value == 1 @@ -128,8 +129,8 @@ def test_expr_constructor(): def test_stmt_constructor(): - v = tvm.var("aa") - buffer_var = tvm.var("buf", dtype="handle") + v = te.var("aa") + buffer_var = te.var("buf", dtype="handle") nop = tvm.tir.Evaluate(1) x = tvm.tir.LetStmt(v, 1, tvm.tir.Evaluate(1)) assert isinstance(x, tvm.tir.LetStmt) @@ -141,8 +142,8 @@ def test_stmt_constructor(): assert isinstance(x, tvm.tir.AttrStmt) assert x.value.value == 1 - x = tvm.tir.AssertStmt(tvm.const(1, "uint1"), - tvm.convert("hellow"), + x = tvm.tir.AssertStmt(tvm.tir.const(1, "uint1"), + tvm.runtime.convert("hellow"), nop) assert isinstance(x, tvm.tir.AssertStmt) assert x.body == nop @@ -151,26 +152,26 @@ def test_stmt_constructor(): assert isinstance(x, tvm.tir.ProducerConsumer) assert x.body == nop - x = tvm.tir.For(tvm.var("x"), 0, 10, 0, 0, nop) + x = tvm.tir.For(te.var("x"), 0, 10, 0, 0, nop) assert isinstance(x, tvm.tir.For) assert x.min.value == 0 assert x.extent.value == 10 assert x.body == nop - x = tvm.tir.Store(buffer_var, 1, 10, tvm.const(1, "uint1")) + x = tvm.tir.Store(buffer_var, 1, 10, tvm.tir.const(1, "uint1")) assert isinstance(x, tvm.tir.Store) assert x.buffer_var == buffer_var assert x.index.value == 10 assert x.value.value == 1 - tensor = tvm.placeholder((), dtype="float32") + tensor = te.placeholder((), dtype="float32") x = tvm.tir.Provide(tensor.op, 0, 10, []) assert isinstance(x, tvm.tir.Provide) assert x.value_index == 0 assert x.value.value == 10 x = tvm.tir.Allocate(buffer_var, "float32", [10], - tvm.const(1, "uint1"), nop) + tvm.tir.const(1, "uint1"), nop) assert isinstance(x, tvm.tir.Allocate) assert x.dtype == "float32" assert x.buffer_var == buffer_var @@ -186,11 +187,11 @@ def test_stmt_constructor(): assert isinstance(x, tvm.tir.Free) assert x.buffer_var == buffer_var - x = tvm.tir.Realize(None, 0, "float", [], tvm.const(1, "uint1"), nop) + x = tvm.tir.Realize(None, 0, "float", [], tvm.tir.const(1, "uint1"), nop) assert isinstance(x, tvm.tir.Realize) assert x.body == nop - x = tvm.tir.IfThenElse(tvm.const(1, "uint1"), + x = tvm.tir.IfThenElse(tvm.tir.const(1, "uint1"), tvm.tir.Evaluate(11), nop) assert isinstance(x, tvm.tir.IfThenElse) diff --git a/tests/python/unittest/test_lang_container.py b/tests/python/unittest/test_lang_container.py index 0b9fad9a2d201..c2d3aba01ec8d 100644 --- a/tests/python/unittest/test_lang_container.py +++ b/tests/python/unittest/test_lang_container.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_array(): - a = tvm.convert([1,2,3]) + a = tvm.runtime.convert([1,2,3]) assert len(a) == 3 assert a[-1].value == 3 a_slice = a[-3:-1] assert (a_slice[0].value, a_slice[1].value) == (1, 2) def test_array_save_load_json(): - a = tvm.convert([1,2,3]) + a = tvm.runtime.convert([1,2,3]) json_str = tvm.ir.save_json(a) a_loaded = tvm.ir.load_json(json_str) assert(a_loaded[1].value == 2) def test_map(): - a = tvm.var('a') - b = tvm.var('b') - amap = tvm.convert({a: 2, + a = te.var('a') + b = te.var('b') + amap = tvm.runtime.convert({a: 2, b: 3}) assert a in amap assert len(amap) == 2 @@ -45,7 +46,7 @@ def test_map(): def test_str_map(): - amap = tvm.convert({'a': 2, 'b': 3}) + amap = tvm.runtime.convert({'a': 2, 'b': 3}) assert 'a' in amap assert len(amap) == 2 dd = dict(amap.items()) @@ -55,9 +56,9 @@ def test_str_map(): def test_map_save_load_json(): - a = tvm.var('a') - b = tvm.var('b') - amap = tvm.convert({a: 2, + a = te.var('a') + b = te.var('b') + amap = tvm.runtime.convert({a: 2, b: 3}) json_str = tvm.ir.save_json(amap) amap = tvm.ir.load_json(json_str) @@ -67,14 +68,14 @@ def test_map_save_load_json(): def test_in_container(): - arr = tvm.convert(['a', 'b', 'c']) + arr = tvm.runtime.convert(['a', 'b', 'c']) assert 'a' in arr assert tvm.tir.StringImm('a') in arr assert 'd' not in arr def test_ndarray_container(): x = tvm.nd.array([1,2,3]) - arr = tvm.convert([x, x]) + arr = tvm.runtime.convert([x, x]) assert arr[0].same_as(x) assert arr[1].same_as(x) assert isinstance(arr[0], tvm.nd.NDArray) diff --git a/tests/python/unittest/test_lang_data_layout.py b/tests/python/unittest/test_lang_data_layout.py index 4c1cafcf3d67d..86a71da6dbeb7 100644 --- a/tests/python/unittest/test_lang_data_layout.py +++ b/tests/python/unittest/test_lang_data_layout.py @@ -17,6 +17,7 @@ """Test layout and bijective-layout node""" import tvm +from tvm import te from topi.util import get_const_tuple def test_layout(): @@ -51,18 +52,18 @@ def test_layout(): def test_bilayout_convertible(): # not convertible - assert tvm.bijective_layout("NCHW", "ABCD") is None - assert tvm.bijective_layout("__undef__", "NCHW") is None - assert tvm.bijective_layout("NCHW", "__undef__") is None - assert tvm.bijective_layout("__undef__", "__undef__") is None - assert tvm.bijective_layout("", "NCHW") is None - assert tvm.bijective_layout("NCHW", "") is None - assert tvm.bijective_layout("", "") is None + assert tvm.tir.bijective_layout("NCHW", "ABCD") is None + assert tvm.tir.bijective_layout("__undef__", "NCHW") is None + assert tvm.tir.bijective_layout("NCHW", "__undef__") is None + assert tvm.tir.bijective_layout("__undef__", "__undef__") is None + assert tvm.tir.bijective_layout("", "NCHW") is None + assert tvm.tir.bijective_layout("NCHW", "") is None + assert tvm.tir.bijective_layout("", "") is None # convertible - assert tvm.bijective_layout("NCHW", "NCHW16c") is not None + assert tvm.tir.bijective_layout("NCHW", "NCHW16c") is not None def test_bilayout_shape(): - bilayout = tvm.bijective_layout("NCHW", "NCHW16c") + bilayout = tvm.tir.bijective_layout("NCHW", "NCHW16c") assert isinstance(bilayout, tvm.tir.BijectiveLayout) dst_shape = bilayout.forward_shape((1, 32, 7, 7)) @@ -72,7 +73,7 @@ def test_bilayout_shape(): assert get_const_tuple(src_shape) == (1, 32, 7, 7) def test_bilayout_index(): - bilayout = tvm.bijective_layout("NCHW", "NCHW16c") + bilayout = tvm.tir.bijective_layout("NCHW", "NCHW16c") dst_index = bilayout.forward_index([0, 18, 6, 6]) assert get_const_tuple(dst_index) == (0, 1, 6, 6, 2) diff --git a/tests/python/unittest/test_lang_group.py b/tests/python/unittest/test_lang_group.py index e78ffb3541d31..0f1118d4890b0 100644 --- a/tests/python/unittest/test_lang_group.py +++ b/tests/python/unittest/test_lang_group.py @@ -16,20 +16,21 @@ # under the License. """Test group effect""" import tvm +from tvm import te def test_scan_group(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: x[0, i]) + m = te.size_var("m") + n = te.size_var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: x[0, i]) - s_update1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) - s_update2 = tvm.compute((m, n), lambda t, i: s_update1[t, i] + 1) - s_update3 = tvm.compute((m, n), lambda t, i: s_update2[t, i] + 1) - res = tvm.scan(s_init, s_update3, s_state, inputs=x) + s_update1 = te.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) + s_update2 = te.compute((m, n), lambda t, i: s_update1[t, i] + 1) + s_update3 = te.compute((m, n), lambda t, i: s_update2[t, i] + 1) + res = tvm.te.scan(s_init, s_update3, s_state, inputs=x) - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) assert s[s_update1].group is not None assert s[s_update2].group == s[s_update1].group # Assign within group, is valid @@ -50,12 +51,12 @@ def test_scan_group(): pass def test_compute_group(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.size_var("m") + n = te.size_var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g = s.create_group(outputs=x1, inputs=x, include_inputs=True) assert s[x1].group == g assert s[x].group == g @@ -64,12 +65,12 @@ def test_compute_group(): assert g.num_child_stages == 2 def test_nest_group(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.size_var("m") + n = te.size_var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g1 = s.create_group(outputs=x1, inputs=x) g2 = s.create_group(outputs=x1, inputs=x, include_inputs=True) assert set(s.groups) == set([g1, g2]) diff --git a/tests/python/unittest/test_lang_operator.py b/tests/python/unittest/test_lang_operator.py index d32b4c51ef695..7e2ec78e8b4c7 100644 --- a/tests/python/unittest/test_lang_operator.py +++ b/tests/python/unittest/test_lang_operator.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def check_throws(f): try: @@ -27,12 +28,12 @@ def check_throws(f): def test_const_fold(): def check(f, *args): - x = f(*[tvm.const(x, "int32") for x in args]) + x = f(*[tvm.tir.const(x, "int32") for x in args]) y = f(*args) if not isinstance(x, (tvm.tir.IntImm,)) or x.value != int(y): raise ValueError("check error: %s vs %s " % (x, y)) - tmod = tvm.truncmod + tmod = tvm.tir.truncmod check(lambda x, y: x + y, 3, 4) check(lambda x, y: x * y, 3, 12) check(lambda x, y: x * y - 10, 3, 12) @@ -47,9 +48,9 @@ def check(f, *args): def test_const_fold2(): - x = tvm.var("x") - tmod = tvm.truncmod - tdiv = tvm.truncdiv + x = te.var("x") + tmod = tvm.tir.truncmod + tdiv = tvm.tir.truncdiv assert (x + 0).same_as(x) assert (0 + x).same_as(x) assert (x - 0).same_as(x) @@ -60,48 +61,48 @@ def test_const_fold2(): def test_const_fold3(): # Test that using ints with logic operations is forbidden - x = tvm.var("x") + x = te.var("x") for val in [0, 1]: - for func in [tvm.all, tvm.any]: - check_throws(lambda: func(tvm.const(val, 'uint1'), x)) - check_throws(lambda: func(x, tvm.const(val, 'uint1'))) + for func in [tvm.tir.all, tvm.tir.any]: + check_throws(lambda: func(tvm.tir.const(val, 'uint1'), x)) + check_throws(lambda: func(x, tvm.tir.const(val, 'uint1'))) # Test const folding when both arguments are const - for tvm_func, py_func in [(tvm.all, lambda a, b: a and b), (tvm.any, lambda a, b: a or b)]: + for tvm_func, py_func in [(tvm.tir.all, lambda a, b: a and b), (tvm.tir.any, lambda a, b: a or b)]: for v1 in [0, 1]: for v2 in [0, 1]: - assert tvm.ir_pass.Equal(tvm_func(tvm.const(v1, 'uint1'), tvm.const(v2, 'uint1')), - tvm.const(py_func(v1, v2), 'uint1')) + assert tvm.ir_pass.Equal(tvm_func(tvm.tir.const(v1, 'uint1'), tvm.tir.const(v2, 'uint1')), + tvm.tir.const(py_func(v1, v2), 'uint1')) - x = tvm.var("x", 'uint1') - true = tvm.const(1, 'uint1') - false = tvm.const(0, 'uint1') + x = te.var("x", 'uint1') + true = tvm.tir.const(1, 'uint1') + false = tvm.tir.const(0, 'uint1') - assert tvm.all(x, true).same_as(x) - assert tvm.all(true, x).same_as(x) - assert tvm.any(x, false).same_as(x) - assert tvm.any(false, x).same_as(x) + assert tvm.tir.all(x, true).same_as(x) + assert tvm.tir.all(true, x).same_as(x) + assert tvm.tir.any(x, false).same_as(x) + assert tvm.tir.any(false, x).same_as(x) - assert tvm.all(x, false).same_as(false) - assert tvm.all(false, x).same_as(false) - assert tvm.any(x, true).same_as(true) - assert tvm.any(true, x).same_as(true) + assert tvm.tir.all(x, false).same_as(false) + assert tvm.tir.all(false, x).same_as(false) + assert tvm.tir.any(x, true).same_as(true) + assert tvm.tir.any(true, x).same_as(true) def test_const_fold4(): - x1 = tvm.const(4, "int32") + x1 = tvm.tir.const(4, "int32") x2 = x1 + 5 - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv assert isinstance(x2, tvm.tir.IntImm) and x2.value == 9 x3 = tdiv(x2, 3) assert isinstance(x3, tvm.tir.IntImm) and x3.value == 3 x4 = x3 + 0.55 assert isinstance(x4, tvm.tir.FloatImm) and abs(x4.value - 3.55) < 1e-6 - x5 = tvm.ceil(x4) + x5 = te.ceil(x4) assert isinstance(x5, tvm.tir.FloatImm) and x5.value == 4 x6 = x5.astype('int') assert isinstance(x6, tvm.tir.IntImm) and x6.value == 4, "x6={}".format(x6) - y = (tvm.round((tvm.const(6.5, 'float32') - 1) / 1.5) + 2).astype('int') + y = (te.round((tvm.tir.const(6.5, 'float32') - 1) / 1.5) + 2).astype('int') assert isinstance(y, tvm.tir.IntImm) and y.value == 6 @@ -112,8 +113,8 @@ def verify_general_dtype_support(f, is_conditional=False): [('int32', 'int64'), 'int64'], [('uint32', 'int32'), 'int32']] for (lhs_dtype, rhs_dtype), out_dtype in rules: - lhs = tvm.var('lhs', dtype=lhs_dtype) - rhs = tvm.var('rhs', dtype=rhs_dtype) + lhs = te.var('lhs', dtype=lhs_dtype) + rhs = te.var('rhs', dtype=rhs_dtype) out = f(lhs, rhs) if not is_conditional: assert out.dtype == out_dtype @@ -132,8 +133,8 @@ def verify_general_dtype_support(f, is_conditional=False): def verify_callop_float_only(f): for lhs_dtype in ['int32', 'float32', 'float64']: for rhs_dtype in ['int32', 'float32', 'float64']: - lhs = tvm.var('lhs', dtype=lhs_dtype) - rhs = tvm.var('rhs', dtype=rhs_dtype) + lhs = te.var('lhs', dtype=lhs_dtype) + rhs = te.var('rhs', dtype=rhs_dtype) if 'float' not in lhs_dtype and 'float' not in rhs_dtype: check_throws(lambda: f(lhs, rhs)) elif 'float' in lhs_dtype and 'float' in rhs_dtype and lhs_dtype != rhs_dtype: @@ -153,22 +154,22 @@ def verify_callop_float_only(f): verify_general_dtype_support(lambda a, b: a * b) verify_general_dtype_support(lambda a, b: a >= b, is_conditional=True) verify_general_dtype_support(lambda a, b: a <= b, is_conditional=True) - verify_callop_float_only(lambda a, b: tvm.power(a, b)) + verify_callop_float_only(lambda a, b: te.power(a, b)) def test_if_then_else(): - cases = [[(tvm.var('cond', dtype='bool'), 'bool', 'int32'), 'int32'], + cases = [[(te.var('cond', dtype='bool'), 'bool', 'int32'), 'int32'], [(True, 'int32', 'float32'), 'float32'], [(False, 'int32', 'int64'), 'int64'], - [(tvm.var('cond', dtype='bool'), 'uint32', 'int32'), 'int32'], - [(tvm.var('cond', dtype='int32'), 'uint32', 'int32'), 'int32']] + [(te.var('cond', dtype='bool'), 'uint32', 'int32'), 'int32'], + [(te.var('cond', dtype='int32'), 'uint32', 'int32'), 'int32']] for (cond, lhs_dtype, rhs_dtype), out_dtype in cases: - lhs = tvm.var('lhs', dtype=lhs_dtype) - rhs = tvm.var('rhs', dtype=rhs_dtype) + lhs = te.var('lhs', dtype=lhs_dtype) + rhs = te.var('rhs', dtype=rhs_dtype) if cond is True or cond is False: - out = tvm.if_then_else(cond, lhs, rhs) - out2 = tvm.if_then_else(not cond, rhs, lhs) - out3 = tvm.if_then_else(not cond, lhs, rhs) + out = tvm.tir.if_then_else(cond, lhs, rhs) + out2 = tvm.tir.if_then_else(not cond, rhs, lhs) + out3 = tvm.tir.if_then_else(not cond, lhs, rhs) assert tvm.ir_pass.Equal(out, out2) == 1 if cond: assert tvm.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1 @@ -177,12 +178,12 @@ def test_if_then_else(): assert tvm.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1 assert tvm.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1 elif cond.dtype == 'bool': - out = tvm.if_then_else(cond, lhs, rhs) + out = tvm.tir.if_then_else(cond, lhs, rhs) assert out.dtype == out_dtype assert out.args[1].dtype == out_dtype assert out.args[2].dtype == out_dtype elif cond.dtype != 'bool': - check_throws(lambda: tvm.if_then_else(cond, lhs, rhs)) + check_throws(lambda: tvm.tir.if_then_else(cond, lhs, rhs)) else: raise ValueError('Unknown combinations') diff --git a/tests/python/unittest/test_lang_reflection.py b/tests/python/unittest/test_lang_reflection.py index e97e73a1d1cc9..1691d7d11a7aa 100644 --- a/tests/python/unittest/test_lang_reflection.py +++ b/tests/python/unittest/test_lang_reflection.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_const_saveload_json(): # save load json - x = tvm.const(1, "int32") - y = tvm.const(10, "int32") + x = tvm.tir.const(1, "int32") + y = tvm.tir.const(10, "int32") z = x + y z = z + z json_str = tvm.ir.save_json(z) @@ -29,11 +30,11 @@ def test_const_saveload_json(): def test_make_smap(): # save load json - x = tvm.const(1, "int32") - y = tvm.const(10, "int32") + x = tvm.tir.const(1, "int32") + y = tvm.tir.const(10, "int32") z = tvm.tir.Add(x, y) - smap = tvm.convert({"z": z, "x": x}) - json_str = tvm.ir.save_json(tvm.convert([smap])) + smap = tvm.runtime.convert({"z": z, "x": x}) + json_str = tvm.ir.save_json(tvm.runtime.convert([smap])) arr = tvm.ir.load_json(json_str) assert len(arr) == 1 assert arr[0]["z"].a == arr[0]["x"] @@ -43,7 +44,7 @@ def test_make_node(): x = tvm.ir.make_node("IntImm", dtype="int32", value=10) assert isinstance(x, tvm.tir.IntImm) assert x.value == 10 - A = tvm.placeholder((10, ), name='A') + A = te.placeholder((10, ), name='A') AA = tvm.ir.make_node("Tensor", shape=A.shape, dtype=A.dtype, @@ -81,9 +82,9 @@ def test_make_attrs(): def test_make_sum(): - A = tvm.placeholder((2, 10), name='A') - k = tvm.reduce_axis((0,10), "k") - B = tvm.compute((2,), lambda i: tvm.sum(A[i, k], axis=k), name="B") + A = te.placeholder((2, 10), name='A') + k = te.reduce_axis((0,10), "k") + B = te.compute((2,), lambda i: te.sum(A[i, k], axis=k), name="B") json_str = tvm.ir.save_json(B) BB = tvm.ir.load_json(json_str) assert B.op.body[0].combiner is not None diff --git a/tests/python/unittest/test_lang_schedule.py b/tests/python/unittest/test_lang_schedule.py index 10843f993d068..4888d5bb918f4 100644 --- a/tests/python/unittest/test_lang_schedule.py +++ b/tests/python/unittest/test_lang_schedule.py @@ -16,17 +16,18 @@ # under the License. import pytest import tvm +from tvm import te import pickle as pkl def test_schedule_create(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.placeholder((n, l), name='B') - AA = tvm.compute((m, l), lambda i, j: A[i, j]) - T = tvm.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k)) - s = tvm.create_schedule(T.op) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.placeholder((n, l), name='B') + AA = te.compute((m, l), lambda i, j: A[i, j]) + T = te.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k)) + s = te.create_schedule(T.op) s[AA].set_scope("shared") xo, xi = s[T].split(T.op.axis[0], factor=10) xi1, xi2 = s[T].split(xi, factor=2) @@ -38,22 +39,22 @@ def test_schedule_create(): # save load json json_str = tvm.ir.save_json(s) s_loaded = tvm.ir.load_json(json_str) - assert isinstance(s_loaded, tvm.schedule.Schedule) + assert isinstance(s_loaded, tvm.te.schedule.Schedule) assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body)) # pickle unpickle dump = pkl.dumps(s) s_loaded = pkl.loads(dump) - assert isinstance(s_loaded, tvm.schedule.Schedule) + assert isinstance(s_loaded, tvm.te.schedule.Schedule) assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body)) def test_reorder(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute(m, lambda i: A[i+1]) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute(m, lambda i: A[i+1]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=10) xi1, xi2 = s[T].split(xi, factor=2) order = (xi2, xi1, xo) @@ -69,107 +70,107 @@ def test_reorder(): pass def test_split(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i: A[i]) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i: A[i]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=10) assert tuple(s[T].leaf_iter_vars) == (xo, xi) def test_tile(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - T = tvm.compute((m, n), lambda i, j: A[i, j]) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + T = te.compute((m, n), lambda i, j: A[i, j]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5) assert tuple(s[T].leaf_iter_vars) == (xo, yo, xi, yi) def test_fuse(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - T = tvm.compute((m, n), lambda i, j: A[i, j]) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + T = te.compute((m, n), lambda i, j: A[i, j]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5) fused = s[T].fuse(xo, yo) - assert any(isinstance(x, tvm.schedule.Fuse) for x in s[T].relations) + assert any(isinstance(x, tvm.te.schedule.Fuse) for x in s[T].relations) assert tuple(s[T].leaf_iter_vars) == (fused, xi, yi) def test_singleton(): print("test singleton") - A = tvm.placeholder((), name='A') - T = tvm.compute((), lambda : A() + 1) - s = tvm.create_schedule(T.op) + A = te.placeholder((), name='A') + T = te.compute((), lambda : A() + 1) + s = te.create_schedule(T.op) print("test singleton fin1") fused = s[T].fuse() - assert any(isinstance(x, tvm.schedule.Singleton) for x in s[T].relations) + assert any(isinstance(x, tvm.te.schedule.Singleton) for x in s[T].relations) assert tuple(s[T].leaf_iter_vars) == (fused,) dump = pkl.dumps(s) print("test singleton fin3") s_loaded = pkl.loads(dump) print("test singleton fin2") - assert isinstance(s_loaded, tvm.schedule.Schedule) + assert isinstance(s_loaded, tvm.te.schedule.Schedule) print("test singleton fin") def test_vectorize(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - T = tvm.compute((m, n), lambda i, j: A[i, j]) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + T = te.compute((m, n), lambda i, j: A[i, j]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5) s[T].vectorize(yi) s[T].unroll(xi) - UNROLL = tvm.schedule.IterVar.Unrolled - VECTORIZE = tvm.schedule.IterVar.Vectorized + UNROLL = tvm.te.schedule.IterVar.Unrolled + VECTORIZE = tvm.te.schedule.IterVar.Vectorized assert s[T].iter_var_attrs[xi].iter_type == UNROLL assert s[T].iter_var_attrs[yi].iter_type == VECTORIZE @pytest.mark.xfail def test_vectorize_commreduce(): - V = tvm.placeholder((128,), name='V') - ax = tvm.reduce_axis((0, 128), name='ax') - O = tvm.compute((1,), lambda _: tvm.sum(V[ax], axis=[ax])) - s = tvm.create_schedule(O.op) + V = te.placeholder((128,), name='V') + ax = te.reduce_axis((0, 128), name='ax') + O = te.compute((1,), lambda _: te.sum(V[ax], axis=[ax])) + s = te.create_schedule(O.op) s[O].vectorize(ax) # should throw here def test_pragma(): m = 100 - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i: A[i]) + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i: A[i]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=10) s[T].pragma(xo, "pragma1") s[T].pragma(xi, "vectorize") - VECTORIZE = tvm.schedule.IterVar.Vectorized + VECTORIZE = tvm.te.schedule.IterVar.Vectorized assert s[T].iter_var_attrs[xo].pragma_keys[0].value == "pragma1" assert s[T].iter_var_attrs[xi].iter_type == VECTORIZE def test_rfactor(): - n = tvm.size_var('n') - k1 = tvm.reduce_axis((0, n), name="k1") - k2 = tvm.reduce_axis((0, n), name="k2") - A = tvm.placeholder((n, n, n), name='A') - B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k1, k2], axis=[k1, k2])) + n = te.size_var('n') + k1 = te.reduce_axis((0, n), name="k1") + k2 = te.reduce_axis((0, n), name="k2") + A = te.placeholder((n, n, n), name='A') + B = te.compute((n, ), lambda i: te.sum(A[i, k1, k2], axis=[k1, k2])) # normal schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) BF = s.rfactor(B, k1) assert(tuple(BF.shape) == (n, n)) assert(set(BF.op.body[0].axis) == set([k2])) assert(s[B].op.body[0].axis[0].dom.extent == n) assert(len(s[B].all_iter_vars) == 2) # schedule with splot - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, ki = s[B].split(k1, factor=4) xo, xi = s[B].split(B.op.axis[0], factor=8) BF = s.rfactor(B, ki) @@ -179,7 +180,7 @@ def test_rfactor(): assert(BF.op.body[0].axis[1].var == ko.var) assert(s[B].op.body[0].axis[0].dom.extent.value == 4) # schedule with factor_axis - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, ki = s[B].split(k1, factor=4) xo, xi = s[B].split(B.op.axis[0], factor=8) BF = s.rfactor(B, ki, 1) @@ -191,41 +192,41 @@ def test_rfactor(): def test_tensor_intrin(): n = 16 - x = tvm.placeholder((n,), name='x') - y = tvm.placeholder((n,), name='y') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') + x = te.placeholder((n,), name='x') + y = te.placeholder((n,), name='y') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') def intrin_func(ins, outs): - assert(isinstance(ins[0], tvm.schedule.Buffer)) + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) assert(ins[0].shape[0].value == n) - return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0]) + return tvm.tir.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0]) intrin = tvm.decl_tensor_intrin(z.op, intrin_func) assert intrin.op == z.op assert intrin.reduce_init is None assert tuple(intrin.inputs) == tuple(z.op.input_tensors) assert(intrin.buffers[0].shape[0].value == n) m = 32 - x = tvm.placeholder((m,), name='x') - y = tvm.placeholder((m,), name='y') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') - s = tvm.create_schedule(z.op) + x = te.placeholder((m,), name='x') + y = te.placeholder((m,), name='y') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') + s = te.create_schedule(z.op) xo, xi = s[z].split(z.op.axis[0], factor=n) s[z].tensorize(xi, intrin) assert(s[z].iter_var_attrs[xi].tensor_intrin == intrin) - assert(s[z].iter_var_attrs[xi].iter_type == tvm.schedule.IterVar.Tensorized) + assert(s[z].iter_var_attrs[xi].iter_type == tvm.te.schedule.IterVar.Tensorized) def test_tensor_intrin_scalar_params(): - n = tvm.size_var("n") - x = tvm.placeholder((n,), name='x') - v = tvm.size_var("v") - w = tvm.size_var("w") - z = tvm.compute((n,), lambda i: x[i]*v + w, name='z') + n = te.size_var("n") + x = te.placeholder((n,), name='x') + v = te.size_var("v") + w = te.size_var("w") + z = te.compute((n,), lambda i: x[i]*v + w, name='z') def intrin_func(ins, outs, sp): - assert(isinstance(ins[0], tvm.schedule.Buffer)) + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) assert(ins[0].shape[0] == n) assert(sp[0] == v) assert(sp[1] == w) - return tvm.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1]) + return tvm.tir.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1]) with tvm.build_config(offset_factor=1): intrin = tvm.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w]) @@ -235,10 +236,10 @@ def intrin_func(ins, outs, sp): assert(intrin.buffers[0].shape[0] == n) assert tuple(intrin.scalar_params) == tuple((v, w)) - A = tvm.placeholder((10,10), name='A') + A = te.placeholder((10,10), name='A') # Pass scalar inputs to the TensorIntrin, interleaved with tensor inputs - C = tvm.compute((10,10), lambda i, j: intrin(i*i, A[i, j], i+j), name="C") - s = tvm.create_schedule(C.op) + C = te.compute((10,10), lambda i, j: intrin(i*i, A[i, j], i+j), name="C") + s = te.create_schedule(C.op) stmt = tvm.lower(s, [A, C], simple_mode=True) assert isinstance(stmt.body.body.body, tvm.tir.Evaluate) assert len(stmt.body.body.body.value.args) == 5 diff --git a/tests/python/unittest/test_lang_tag.py b/tests/python/unittest/test_lang_tag.py index 201abf193eb4a..c2bdd4b91606a 100644 --- a/tests/python/unittest/test_lang_tag.py +++ b/tests/python/unittest/test_lang_tag.py @@ -16,6 +16,8 @@ # under the License. import json import tvm +from tvm import te +from tvm import te @tvm.tag_scope(tag="conv") def compute_conv(data, weight): @@ -24,24 +26,24 @@ def compute_conv(data, weight): OH = H - KH + 1 OW = W - KW + 1 - ic = tvm.reduce_axis((0, IC), name='ic') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') + ic = te.reduce_axis((0, IC), name='ic') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') - return tvm.compute((N, OC, OH, OW), lambda i, oc, h, w: \ - tvm.sum(data[i, ic, h+dh, w+dw] * weight[oc, ic, dh, dw], + return te.compute((N, OC, OH, OW), lambda i, oc, h, w: \ + te.sum(data[i, ic, h+dh, w+dw] * weight[oc, ic, dh, dw], axis=[ic, dh, dw])) def test_with(): - n = tvm.size_var('n') - m = tvm.size_var('m') - l = tvm.size_var('l') + n = te.size_var('n') + m = te.size_var('m') + l = te.size_var('l') - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') with tvm.tag_scope(tag="gemm"): - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k), + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k), attrs={"hello" : 1, "arr": [10, 12]}) assert C.op.tag == 'gemm' @@ -56,31 +58,31 @@ def test_with(): def test_decorator(): - n = tvm.size_var('n') - c = tvm.size_var('c') - h = tvm.size_var('h') - w = tvm.size_var('w') - kh = tvm.size_var('kh') - kw = tvm.size_var('kw') + n = te.size_var('n') + c = te.size_var('c') + h = te.size_var('h') + w = te.size_var('w') + kh = te.size_var('kh') + kw = te.size_var('kw') - A = tvm.placeholder((n, c, h, w), name='A') - B = tvm.placeholder((c, c, kh, kw), name='B') + A = te.placeholder((n, c, h, w), name='A') + B = te.placeholder((c, c, kh, kw), name='B') C = compute_conv(A, B) assert C.op.tag == 'conv' assert len(C.op.attrs) == 0 def test_nested(): - n = tvm.size_var('n') - c = tvm.size_var('c') - h = tvm.size_var('h') - w = tvm.size_var('w') - kh = tvm.size_var('kh') - kw = tvm.size_var('kw') + n = te.size_var('n') + c = te.size_var('c') + h = te.size_var('h') + w = te.size_var('w') + kh = te.size_var('kh') + kw = te.size_var('kw') - A = tvm.placeholder((n, c, h, w), name='A') - B = tvm.placeholder((c, c, kh, kw), name='B') + A = te.placeholder((n, c, h, w), name='A') + B = te.placeholder((c, c, kh, kw), name='B') try: - with tvm.tag_scope(tag='conv'): + with te.tag_scope(tag='conv'): C = compute_conv(A, B) assert False except ValueError: diff --git a/tests/python/unittest/test_lang_target.py b/tests/python/unittest/test_lang_target.py index 6da99f8270473..da7bcee016e4d 100644 --- a/tests/python/unittest/test_lang_target.py +++ b/tests/python/unittest/test_lang_target.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te @tvm.target.generic_func def mygeneric(data): diff --git a/tests/python/unittest/test_lang_tensor.py b/tests/python/unittest/test_lang_tensor.py index 2de5e19c9e36d..92a42fcfdee87 100644 --- a/tests/python/unittest/test_lang_tensor.py +++ b/tests/python/unittest/test_lang_tensor.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from topi.nn.pooling import pool def test_tensor(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.placeholder((n, l), name='B') - T = tvm.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.placeholder((n, l), name='B') + T = te.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) print(T) print(T.op.body) assert(tuple(T.shape) == (m, n, l)) - assert(isinstance(A.op, tvm.tensor.PlaceholderOp)) + assert(isinstance(A.op, tvm.te.PlaceholderOp)) assert(A == A) assert(T.op.output(0) == T) assert(T.op.output(0).__hash__() == T.__hash__()) @@ -37,68 +38,68 @@ def test_tensor(): def test_rank_zero(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - scale = tvm.placeholder((), name='s') - k = tvm.reduce_axis((0, m), name="k") - T = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k)) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + scale = te.placeholder((), name='s') + k = te.reduce_axis((0, m), name="k") + T = te.compute((), lambda : te.sum(A[k] * scale(), axis=k)) print(T) print(T.op.body) assert(tuple(T.shape) == ()) def test_conv1d(): - n = tvm.size_var('n') - A = tvm.placeholder((n+2), name='A') + n = te.size_var('n') + A = te.placeholder((n+2), name='A') def computeB(ii): i = ii + 1 return A[i-1] + A[i] + A[i+1] - B = tvm.compute(n, computeB) + B = te.compute(n, computeB) def test_tensor_slice(): - n = tvm.size_var('n') - A = tvm.compute((n, n), lambda i, j: 1) - B = tvm.compute((n,), lambda i: A[0][i] + A[0][i]) + n = te.size_var('n') + A = te.compute((n, n), lambda i, j: 1) + B = te.compute((n,), lambda i: A[0][i] + A[0][i]) def test_tensor_reduce_multi_axis(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - k1 = tvm.reduce_axis((0, n), "k") - k2 = tvm.reduce_axis((0, m), "k") - C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=(k1, k2))) - C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=[k1, k2])) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + k1 = te.reduce_axis((0, n), "k") + k2 = te.reduce_axis((0, m), "k") + C = te.compute((1,), lambda _: te.sum(A[k1, k2], axis=(k1, k2))) + C = te.compute((1,), lambda _: te.sum(A[k1, k2], axis=[k1, k2])) def test_tensor_comm_reducer(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - k = tvm.reduce_axis((0, n), "k") - mysum = tvm.comm_reducer(lambda x, y: x+y, lambda t: tvm.const(0, dtype=t)) - C = tvm.compute((m,), lambda i: mysum(A[i, k], axis=k)) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + k = te.reduce_axis((0, n), "k") + mysum = te.comm_reducer(lambda x, y: x+y, lambda t: tvm.tir.const(0, dtype=t)) + C = te.compute((m,), lambda i: mysum(A[i, k], axis=k)) def test_tensor_comm_reducer_overload(): - m = tvm.size_var('m') - n = tvm.size_var('n') - mysum = tvm.comm_reducer(lambda x, y: x+y, lambda t: tvm.const(0, dtype=t)) + m = te.size_var('m') + n = te.size_var('n') + mysum = te.comm_reducer(lambda x, y: x+y, lambda t: tvm.tir.const(0, dtype=t)) sum_res = mysum(m, n) def test_tensor_reduce(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.placeholder((n, l), name='B') - T = tvm.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) - rv = tvm.reduce_axis((0, A.shape[1]), "k") - C = tvm.compute((m, n), lambda i, j: tvm.sum(T(i, j, rv+1), axis=rv)) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.placeholder((n, l), name='B') + T = te.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) + rv = te.reduce_axis((0, A.shape[1]), "k") + C = te.compute((m, n), lambda i, j: te.sum(T(i, j, rv+1), axis=rv)) # json load save C_json = tvm.ir.save_json(C) C_loaded = tvm.ir.load_json(C_json) - assert(isinstance(C_loaded, tvm.tensor.Tensor)) + assert(isinstance(C_loaded, te.tensor.Tensor)) assert(str(C_loaded) == str(C)) def test_tensor_compute1(): @@ -107,13 +108,13 @@ def test_tensor_compute1(): dtype = 'float32' def intrin_vadd(n): - x = tvm.placeholder((n,)) - y = tvm.placeholder((n,)) - z = tvm.compute(x.shape, lambda i: x[i] + y[i]) + x = te.placeholder((n,)) + y = te.placeholder((n,)) + z = te.compute(x.shape, lambda i: x[i] + y[i]) def intrin_func(ins, outs): ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) + ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) return ib.get() with tvm.build_config(offset_factor=n): @@ -121,12 +122,12 @@ def intrin_func(ins, outs): vadd = intrin_vadd(factor) - A = tvm.placeholder((m//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((m//factor, factor), name="B", dtype=dtype) - C = tvm.compute((m//factor, factor), + A = te.placeholder((m//factor, factor), name="A", dtype=dtype) + B = te.placeholder((m//factor, factor), name="B", dtype=dtype) + C = te.compute((m//factor, factor), lambda i: vadd(A[i, 0:factor], B[i, 0:factor])) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) stmt = tvm.lower(s, [A, B, C], simple_mode=True) assert isinstance(stmt.body.body, tvm.tir.Evaluate) @@ -140,21 +141,21 @@ def test_tensor_compute2(): dtype = 'float32' def intrin_gemm(m, n, l): - k = tvm.reduce_axis((0, l)) - x = tvm.placeholder((m, l)) - y = tvm.placeholder((n, l)) + k = te.reduce_axis((0, l)) + x = te.placeholder((m, l)) + y = te.placeholder((n, l)) # in theory, no relation - z = tvm.compute((m, n), lambda i, j: tvm.sum(x[i][k] * y[j][k], axis=k)) + z = te.compute((m, n), lambda i, j: te.sum(x[i][k] * y[j][k], axis=k)) def intrin_func(ins, outs): x_ptr = ins[0].access_ptr("r") y_ptr = ins[1].access_ptr("r") z_ptr = outs[0].access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemv", x_ptr, y_ptr, z_ptr, m, n, l) - reset = tvm.call_packed( + reset = tvm.tir.call_packed( "fill_zero", z_ptr, m, n) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", x_ptr, y_ptr, z_ptr, m, n, l) return body, reset, update @@ -163,79 +164,79 @@ def intrin_func(ins, outs): vgemm = intrin_gemm(factor1, factor2, factor) - A = tvm.placeholder((M//factor1, L//factor, factor1, factor), name="A", dtype=dtype) - B = tvm.placeholder((N//factor2, L//factor, factor2, factor), name="B", dtype=dtype) - k = tvm.reduce_axis((0, L//factor), name='k') - C = tvm.compute((M//factor1, N//factor2, factor1, factor2), + A = te.placeholder((M//factor1, L//factor, factor1, factor), name="A", dtype=dtype) + B = te.placeholder((N//factor2, L//factor, factor2, factor), name="B", dtype=dtype) + k = te.reduce_axis((0, L//factor), name='k') + C = te.compute((M//factor1, N//factor2, factor1, factor2), lambda i, j: vgemm(A[i, k, 0:factor1, 0:factor], B[j, k, 0:factor2, 0:factor], reduce_axis=k)) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) stmt = tvm.lower(s, [A, B, C], simple_mode=True) assert isinstance(stmt.body.body.body[0], tvm.tir.Evaluate) assert isinstance(stmt.body.body.body[1].body, tvm.tir.Evaluate) def test_tensor_scan(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.placeholder((m, n)) - s = tvm.placeholder((m, n)) - res = tvm.scan(tvm.compute((1, n), lambda _, i: x[0, i]), - tvm.compute((m, n), lambda t, i: s[t-1, i] + x[t, i]), + m = te.size_var("m") + n = te.size_var("n") + x = te.placeholder((m, n)) + s = te.placeholder((m, n)) + res = tvm.te.scan(te.compute((1, n), lambda _, i: x[0, i]), + te.compute((m, n), lambda t, i: s[t-1, i] + x[t, i]), s) assert tuple(res.shape) == (m, n) def test_scan_multi_out(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x1 = tvm.placeholder((m, n)) - s1 = tvm.placeholder((m, n)) - x2 = tvm.placeholder((m, n)) - s2 = tvm.placeholder((m, n)) - s1_init = tvm.compute((1, n), lambda _, i: x1[0, i]) - s2_init = tvm.compute((1, n), lambda _, i: x2[0, i]) - s1_update = tvm.compute((m, n), lambda t, i: s1[t-1, i] + s2[t-1, i] + x1[t, i]) - s2_update = tvm.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) - - r0, r1 = tvm.scan([s1_init, s2_init], + m = te.size_var("m") + n = te.size_var("n") + x1 = te.placeholder((m, n)) + s1 = te.placeholder((m, n)) + x2 = te.placeholder((m, n)) + s2 = te.placeholder((m, n)) + s1_init = te.compute((1, n), lambda _, i: x1[0, i]) + s2_init = te.compute((1, n), lambda _, i: x2[0, i]) + s1_update = te.compute((m, n), lambda t, i: s1[t-1, i] + s2[t-1, i] + x1[t, i]) + s2_update = te.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) + + r0, r1 = tvm.te.scan([s1_init, s2_init], [s1_update, s2_update], [s1, s2]) assert(r0.value_index == 0) assert(r1.value_index == 1) json_str = tvm.ir.save_json(r0.op) zz = tvm.ir.load_json(json_str) - assert isinstance(zz, tvm.tensor.ScanOp) + assert isinstance(zz, tvm.te.ScanOp) def test_extern(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') + m = te.size_var('m') + A = te.placeholder((m,), name='A') def extern_func(ins, outs): - assert(isinstance(ins[0], tvm.schedule.Buffer)) - return tvm.call_packed("myadd", ins[0].data, outs[0].data, m) - B = tvm.extern((m,), [A], extern_func) + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) + return tvm.tir.call_packed("myadd", ins[0].data, outs[0].data, m) + B = te.extern((m,), [A], extern_func) assert(tuple(B.shape) == (m,)) def test_extern_multi_out(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - B = tvm.compute((m,), lambda i: A[i] * 10) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + B = te.compute((m,), lambda i: A[i] * 10) def extern_func(ins, outs): - assert(isinstance(ins[0], tvm.schedule.Buffer)) - return tvm.call_packed( + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) + return tvm.tir.call_packed( "myadd", ins[0].data, outs[0].data, outs[1].data, m) - res = tvm.extern([A.shape, A.shape], [A, B], extern_func) + res = te.extern([A.shape, A.shape], [A, B], extern_func) assert(len(res) == 2) assert(res[1].value_index == 1) def test_tuple_inputs(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A0 = tvm.placeholder((m, n), name='A0') - A1 = tvm.placeholder((m, n), name='A1') - T0, T1 = tvm.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='T') - s = tvm.create_schedule(T0.op) + m = te.size_var('m') + n = te.size_var('n') + A0 = te.placeholder((m, n), name='A0') + A1 = te.placeholder((m, n), name='A1') + T0, T1 = te.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='T') + s = te.create_schedule(T0.op) for i in range(len(T0.shape)): assert(T0.shape[i] == T1.shape[i]) @@ -244,19 +245,19 @@ def test_tuple_inputs(): assert(T1.value_index == 1) def test_tuple_with_different_deps(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A0 = tvm.placeholder((m, n), name='A1') - A1 = tvm.placeholder((m, n), name='A2') - B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='B') - C = tvm.compute((m, n), lambda i, j: B0[i, j] + 4, name='C') - - s = tvm.create_schedule(C.op) + m = te.size_var('m') + n = te.size_var('n') + A0 = te.placeholder((m, n), name='A1') + A1 = te.placeholder((m, n), name='A2') + B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='B') + C = te.compute((m, n), lambda i, j: B0[i, j] + 4, name='C') + + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=10) s[B0.op].compute_at(s[C], xo) sch = s.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) def get_B1_realize(x): if isinstance(x, tvm.tir.Realize) and \ @@ -269,33 +270,33 @@ def get_B1_realize(x): def test_tensor_inputs(): - x = tvm.placeholder((1,), name='x') - y = tvm.compute(x.shape, lambda i: x[i] + x[i]) + x = te.placeholder((1,), name='x') + y = te.compute(x.shape, lambda i: x[i] + x[i]) assert tuple(y.op.input_tensors) == (x,) def test_tensor_pool(): def intrin_pool(): - A = tvm.placeholder((64, 16, 16), name='A') - kh = tvm.reduce_axis((0, 3), name='kh') - kw = tvm.reduce_axis((0, 3), name='kw') - P = tvm.compute((64, 14, 14), - lambda c, oh, ow: tvm.max(A[c, oh + kh, ow + kw], + A = te.placeholder((64, 16, 16), name='A') + kh = te.reduce_axis((0, 3), name='kh') + kw = te.reduce_axis((0, 3), name='kw') + P = te.compute((64, 14, 14), + lambda c, oh, ow: tvm.te.max(A[c, oh + kh, ow + kw], axis=[kh, kw]), name='p') def intrin_func(ins, outs): dinp = ins[0] dout = outs[0] - return tvm.call_packed("op", dinp, dout) + return tvm.tir.call_packed("op", dinp, dout) with tvm.build_config(offset_factor=1): return tvm.decl_tensor_intrin(P.op, intrin_func) - A = tvm.placeholder((1, 64, 16, 16), name='A') + A = te.placeholder((1, 64, 16, 16), name='A') P = pool(data=A, kernel=(3, 3), stride=(1, 1), padding=(0, 0, 0, 0), pool_type='max') - s = tvm.create_schedule(P.op) + s = te.create_schedule(P.op) _, oh, _, _ = P.op.axis intrin = intrin_pool() s[P].tensorize(oh, intrin) diff --git a/tests/python/unittest/test_lang_tensor_overload_op.py b/tests/python/unittest/test_lang_tensor_overload_op.py index 01c0d26dfc9b5..2e4696298919b 100644 --- a/tests/python/unittest/test_lang_tensor_overload_op.py +++ b/tests/python/unittest/test_lang_tensor_overload_op.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -23,27 +24,27 @@ def test_operator_type_and_tags(): k = 1 - n = tvm.var('n') - A = tvm.placeholder((), name='A') - B = tvm.placeholder((10, 5), name='B') + n = te.var('n') + A = te.placeholder((), name='A') + B = te.placeholder((10, 5), name='B') B1 = B[0] B2 = B[0,0] assert isinstance(k + n, tvm.tir.PrimExpr) assert isinstance(n + n, tvm.tir.PrimExpr) - assert isinstance(k + A, tvm.tensor.Tensor) - assert isinstance(A + k, tvm.tensor.Tensor) - assert isinstance(n + A, tvm.tensor.Tensor) - assert isinstance(A + n, tvm.tensor.Tensor) - assert isinstance(A + A, tvm.tensor.Tensor) - - assert isinstance(k + B, tvm.tensor.Tensor) - assert isinstance(B + k, tvm.tensor.Tensor) - assert isinstance(n + B, tvm.tensor.Tensor) - assert isinstance(B + n, tvm.tensor.Tensor) - assert isinstance(A + B, tvm.tensor.Tensor) - assert isinstance(B + A, tvm.tensor.Tensor) - assert isinstance(B + B, tvm.tensor.Tensor) + assert isinstance(k + A, te.tensor.Tensor) + assert isinstance(A + k, te.tensor.Tensor) + assert isinstance(n + A, te.tensor.Tensor) + assert isinstance(A + n, te.tensor.Tensor) + assert isinstance(A + A, te.tensor.Tensor) + + assert isinstance(k + B, te.tensor.Tensor) + assert isinstance(B + k, te.tensor.Tensor) + assert isinstance(n + B, te.tensor.Tensor) + assert isinstance(B + n, te.tensor.Tensor) + assert isinstance(A + B, te.tensor.Tensor) + assert isinstance(B + A, te.tensor.Tensor) + assert isinstance(B + B, te.tensor.Tensor) assert (k + B).op.tag == topi.tag.ELEMWISE assert (B + k).op.tag == topi.tag.ELEMWISE @@ -58,22 +59,22 @@ def test_operator_type_and_tags(): assert isinstance(n + B2, tvm.tir.PrimExpr) assert isinstance(B2 + n, tvm.tir.PrimExpr) assert isinstance(B2 + B2, tvm.tir.PrimExpr) - assert isinstance(B2 + A, tvm.tensor.Tensor) - assert isinstance(A + B2, tvm.tensor.Tensor) - assert isinstance(B2 + B, tvm.tensor.Tensor) - assert isinstance(B + B2, tvm.tensor.Tensor) + assert isinstance(B2 + A, te.tensor.Tensor) + assert isinstance(A + B2, te.tensor.Tensor) + assert isinstance(B2 + B, te.tensor.Tensor) + assert isinstance(B + B2, te.tensor.Tensor) def test_combination(): k = 3 n = 5 m = 10 - x = tvm.var('x') - A = tvm.placeholder((n, m), name='A') - B = tvm.placeholder((n, m), name='B') - C = tvm.placeholder((n, m), name='C') + x = te.var('x') + A = te.placeholder((n, m), name='A') + B = te.placeholder((n, m), name='B') + C = te.placeholder((n, m), name='C') D = k + A - B * C + x - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) foo = tvm.build(s, [x, A, B, C, D], "llvm") ctx = tvm.cpu(0) x = 2 @@ -87,9 +88,9 @@ def test_combination(): def verify_tensor_scalar_bop(shape, typ="add"): """Verify non-constant Tensor and scalar binary operations.""" - sh = [tvm.size_var('n%d' % i) for i in range(0, len(shape))] - k = tvm.var('k') - A = tvm.placeholder(sh, name='A') + sh = [te.size_var('n%d' % i) for i in range(0, len(shape))] + k = te.var('k') + A = te.placeholder(sh, name='A') if typ == "add": B = A + k elif typ == "sub": @@ -134,8 +135,8 @@ def check_device(device): def verify_broadcast_bop(lhs_shape, rhs_shape, typ="add"): - A = tvm.placeholder(shape=lhs_shape, name="A") - B = tvm.placeholder(shape=rhs_shape, name="B") + A = te.placeholder(shape=lhs_shape, name="A") + B = te.placeholder(shape=rhs_shape, name="B") if typ == "add": C = A + B elif typ == "sub": @@ -195,8 +196,8 @@ def check_device(device): k = 10.0 dilation = (1, 1) with tvm.target.create(device): - A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') + A = te.placeholder((batch, in_channel, in_size, in_size), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') B = conv2d_nchw(A, W, stride, padding, dilation, A.dtype) if typ == "add": C = B + k diff --git a/tests/python/unittest/test_lang_verify_compute.py b/tests/python/unittest/test_lang_verify_compute.py index 6d17a0ce23722..4231f481d88d0 100644 --- a/tests/python/unittest/test_lang_verify_compute.py +++ b/tests/python/unittest/test_lang_verify_compute.py @@ -15,38 +15,39 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_verify_compute(): - n = tvm.size_var("n") - m = tvm.size_var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - k_ = tvm.reduce_axis((0, m-1), "k_") - f1 = lambda i: tvm.sum(A[i, k], axis=k) + n = te.size_var("n") + m = te.size_var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + k_ = te.reduce_axis((0, m-1), "k_") + f1 = lambda i: te.sum(A[i, k], axis=k) f2 = lambda i: A[i,0] + 1 - f3 = lambda i: tvm.sum(A[i, k], axis=k) + 1 - f4 = lambda i: A[i,0] * (tvm.sum(A[i, k], axis=k) + 1) - f5 = lambda i: (tvm.sum(A[i, k], axis=k), A[i,0] + 1) - f6 = lambda i: (tvm.sum(A[i, k], axis=k), tvm.sum(A[i, k_], axis=k_)) + f3 = lambda i: te.sum(A[i, k], axis=k) + 1 + f4 = lambda i: A[i,0] * (te.sum(A[i, k], axis=k) + 1) + f5 = lambda i: (te.sum(A[i, k], axis=k), A[i,0] + 1) + f6 = lambda i: (te.sum(A[i, k], axis=k), te.sum(A[i, k_], axis=k_)) # # Valid compute try: - B = tvm.compute((n,), f1, name="B") + B = te.compute((n,), f1, name="B") except tvm._ffi.base.TVMError as ex: assert False # # Valid compute try: - B = tvm.compute((n,), f2, name="B") + B = te.compute((n,), f2, name="B") except tvm._ffi.base.TVMError as ex: assert False # # Invalid compute with non top level reduction try: - B = tvm.compute((n,), f3, name="B") + B = te.compute((n,), f3, name="B") assert False except tvm._ffi.base.TVMError as ex: pass @@ -54,7 +55,7 @@ def test_verify_compute(): # # Invalid compute with non top level reduction try: - B = tvm.compute((n,), f4, name="B") + B = te.compute((n,), f4, name="B") assert False except tvm._ffi.base.TVMError as ex: pass @@ -62,7 +63,7 @@ def test_verify_compute(): # # Invalid compute with reduction and non-reduction batch ops try: - B0, B1 = tvm.compute((n,), f5, name="B") + B0, B1 = te.compute((n,), f5, name="B") assert False except tvm._ffi.base.TVMError as ex: pass @@ -70,7 +71,7 @@ def test_verify_compute(): # # Invalid compute with unequal batch reduction ops try: - B0, B1 = tvm.compute((n,), f6, name="B") + B0, B1 = te.compute((n,), f6, name="B") assert False except tvm._ffi.base.TVMError as ex: pass diff --git a/tests/python/unittest/test_pass_attrs_hash_equal.py b/tests/python/unittest/test_pass_attrs_hash_equal.py index 2bd94e0d5cabd..19cd72cc50a73 100644 --- a/tests/python/unittest/test_pass_attrs_hash_equal.py +++ b/tests/python/unittest/test_pass_attrs_hash_equal.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_attrs_equal(): x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4)) @@ -33,7 +34,7 @@ def test_attrs_equal(): assert tvm.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]}) assert not tvm.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]}) - n = tvm.var("n") + n = te.var("n") assert tvm.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1}) diff --git a/tests/python/unittest/test_pass_basic.py b/tests/python/unittest/test_pass_basic.py index 93c815a4a21b6..57d37f73825a2 100644 --- a/tests/python/unittest/test_pass_basic.py +++ b/tests/python/unittest/test_pass_basic.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_simplify(): - tdiv = tvm.truncdiv - tmod = tvm.truncmod - x = tvm.var('x') + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod + x = te.var('x') e1 = tvm.ir_pass.Simplify(x + 2 + 1) assert(tvm.ir_pass.Equal(e1, x + 3)) e2 = tvm.ir_pass.Simplify(x * 3 + 5 * x) @@ -29,15 +30,15 @@ def test_simplify(): def test_verify_ssa(): - x = tvm.var('x') - y = tvm.var() + x = te.var('x') + y = te.var() z = tvm.tir.Evaluate(x + y) assert(tvm.ir_pass.VerifySSA(z)) def test_convert_ssa(): - x = tvm.var('x') - y = tvm.var() + x = te.var('x') + y = te.var() let1 = tvm.tir.Let(x, 1, x + 1) let2 = tvm.tir.Let(x, 1, x + y) z = tvm.tir.Evaluate(let1 + let2) @@ -47,7 +48,7 @@ def test_convert_ssa(): def test_expr_use_var(): - x = tvm.var('x') + x = te.var('x') assert(tvm.ir_pass.ExprUseVar(x+1, x)) assert(not tvm.ir_pass.ExprUseVar(1+10, x)) diff --git a/tests/python/unittest/test_pass_bound_checkers.py b/tests/python/unittest/test_pass_bound_checkers.py index 6b959e0d8da70..7abccc137c432 100644 --- a/tests/python/unittest/test_pass_bound_checkers.py +++ b/tests/python/unittest/test_pass_bound_checkers.py @@ -16,6 +16,7 @@ # under the License. import pytest import tvm +from tvm import te import numpy as np def collect_visit(stmt, f): ret = [] @@ -26,16 +27,16 @@ def lower(sch, args): binds = {} arg_list = [] for x in args: - if isinstance(x, tvm.tensor.Tensor): - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name) + if isinstance(x, te.tensor.Tensor): + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name) assert x not in binds binds[x] = buf arg_list.append(buf) else: raise ValueError("args must be Tensor, Buffer or Var") sch = sch.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, True) stmt = tvm.ir_pass.RemoveNoOp(stmt) stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64, True) @@ -46,11 +47,11 @@ def lower(sch, args): @pytest.mark.xfail def test_out_of_bounds_llvm(index_a, index_b): - n = tvm.size_var("n") - A = tvm.placeholder ((n,), name='A') - B = tvm.placeholder ((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i + index_a] + B[i + index_b], name='C') - s = tvm.create_schedule (C.op) + n = te.size_var("n") + A = te.placeholder ((n,), name='A') + B = te.placeholder ((n,), name='B') + C = te.compute(A.shape, lambda i: A[i + index_a] + B[i + index_b], name='C') + s = te.create_schedule (C.op) tgt = "llvm" tgt_host = "llvm" stmt = tvm.lower (s, [A, B, C], simple_mode=True) @@ -63,11 +64,11 @@ def test_out_of_bounds_llvm(index_a, index_b): fadd (a, b, c) def test_in_bounds_llvm(): - n = tvm.size_var("n") - A = tvm.placeholder ((n,), name='A') - B = tvm.placeholder ((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C') - s = tvm.create_schedule (C.op) + n = te.size_var("n") + A = te.placeholder ((n,), name='A') + B = te.placeholder ((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name='C') + s = te.create_schedule (C.op) tgt = "llvm" tgt_host = "llvm" stmt = tvm.lower (s, [A, B, C], simple_mode=True) @@ -81,11 +82,11 @@ def test_in_bounds_llvm(): @pytest.mark.xfail def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b): - n = tvm.convert(nn) - a = tvm.placeholder((n), name='a') - b = tvm.placeholder((n), name='b') - c = tvm.compute((n,), lambda i: a[i + index_a] + b[i + index_b], name='c') - s = tvm.create_schedule(c.op) + n = tvm.runtime.convert(nn) + a = te.placeholder((n), name='a') + b = te.placeholder((n), name='b') + c = te.compute((n,), lambda i: a[i + index_a] + b[i + index_b], name='c') + s = te.create_schedule(c.op) xo, xi = s[c].split(c.op.axis[0], factor=8) s[c].parallel(xo) s[c].vectorize(xi) @@ -104,10 +105,10 @@ def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b): def test_in_bounds_vectorize_llvm(): n = 512 lanes = 2 - A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes) - B = tvm.compute((n,), lambda i: A[i], name='B') - C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes) + B = te.compute((n,), lambda i: A[i], name='B') + C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], nparts=2) _, xi = s[C].split(xi, factor=2) s[C].parallel(xo) @@ -128,12 +129,12 @@ def test_in_bounds_vectorize_llvm(): tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1) def test_in_bounds_loop_partition_basic_llvm(): - n = tvm.size_var('n') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + n = te.size_var('n') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -147,12 +148,12 @@ def test_in_bounds_loop_partition_basic_llvm(): @pytest.mark.xfail def test_out_of_bounds_loop_partition_basic_llvm(index_a, index_b): - n = tvm.size_var('n') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + n = te.size_var('n') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -187,14 +188,14 @@ def collect_branch_stmt (x): branch_collector.append(x) n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) stmt = lower (s, [A, B, T]) # num_attributes = num_buffers * num_splits = 2 * 3 # before instrumentation @@ -214,11 +215,11 @@ def collect_branch_stmt (x): def test_in_bounds_const_loop_partition_llvm(): with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True): n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -234,11 +235,11 @@ def test_in_bounds_const_loop_partition_llvm(): def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b): with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True): n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -258,18 +259,18 @@ def test_in_bounds_conv_llvm(loop_tiling=False): batch_size = 1 in_height = in_width = 64 out_height = out_width = in_height - kernel_height + 1 - data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data') - kernel = tvm.placeholder((kernel_height, kernel_width, in_channel, + data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data') + kernel = te.placeholder((kernel_height, kernel_width, in_channel, out_channel), name='kernel') - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - conv = tvm.compute((batch_size, out_channel, out_height, out_width), - lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + conv = te.compute((batch_size, out_channel, out_height, out_width), + lambda n, oc, oh, ow: te.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * kernel[kh, kw, ic, oc], axis=[ic, kh, kw]), name="conv2d") - s = tvm.create_schedule(conv.op) + s = te.create_schedule(conv.op) n, oc, oh, ow = conv.op.axis if loop_tiling: @@ -280,10 +281,10 @@ def test_in_bounds_conv_llvm(loop_tiling=False): f = tvm.build(s, [data, kernel, conv], "llvm") data_input = tvm.nd.array(np.random.uniform( - size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx) + size=(batch_size, in_channel, in_height, in_width)).astype("float32"), ctx) kernel_input = tvm.nd.array(np.random.uniform( - size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx) - conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx) + size=(kernel_height, kernel_width, in_channel, out_channel)).astype("float32"), ctx) + conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx) f(data_input, kernel_input, conv_out) @pytest.mark.xfail @@ -295,14 +296,14 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False batch_size = 1 in_height = in_width = 64 out_height = out_width = in_height - kernel_height + 1 - data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data') - kernel = tvm.placeholder((kernel_height, kernel_width, in_channel, + data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data') + kernel = te.placeholder((kernel_height, kernel_width, in_channel, out_channel), name='kernel') - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - conv = tvm.compute((batch_size, out_channel, out_height, out_width), - lambda n, oc, oh, ow: tvm.sum(data[n + data_offsets[0], + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + conv = te.compute((batch_size, out_channel, out_height, out_width), + lambda n, oc, oh, ow: te.sum(data[n + data_offsets[0], ic + data_offsets[1], oh*HSTR + kh + data_offsets[2], ow*WSTR + kw + data_offsets[3]] @@ -313,7 +314,7 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False oc + kernel_offsets[3]], axis=[ic, kh, kw]), name="conv2d") - s = tvm.create_schedule(conv.op) + s = te.create_schedule(conv.op) n, oc, oh, ow = conv.op.axis if loop_tiling: @@ -324,21 +325,21 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False f = tvm.build(s, [data, kernel, conv], "llvm") data_input = tvm.nd.array(np.random.uniform( - size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx) + size=(batch_size, in_channel, in_height, in_width)).astype("float32"), ctx) kernel_input = tvm.nd.array(np.random.uniform( - size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx) - conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx) + size=(kernel_height, kernel_width, in_channel, out_channel)).astype("float32"), ctx) + conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx) f(data_input, kernel_input, conv_out) def test_in_bounds_tensors_with_same_shapes1D_llvm(): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((k, ), name='B') - - T = tvm.compute((m, ), lambda i: A[i]*B[i]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, ), name='A') + B = te.placeholder((k, ), name='B') + + T = te.compute((m, ), lambda i: A[i]*B[i]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -351,14 +352,14 @@ def test_in_bounds_tensors_with_same_shapes1D_llvm(): @pytest.mark.xfail def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((k, ), name='B') - - T = tvm.compute((m, ), lambda i: A[i]*B[i]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, ), name='A') + B = te.placeholder((k, ), name='B') + + T = te.compute((m, ), lambda i: A[i]*B[i]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -370,14 +371,14 @@ def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape f(a, b, t) def test_in_bounds_tensors_with_same_shapes2D_llvm(): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((k, k), name='B') - - T = tvm.compute((m, m), lambda i, j: A[i][j]*B[i][j]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n), name='A') + B = te.placeholder((k, k), name='B') + + T = te.compute((m, m), lambda i, j: A[i][j]*B[i][j]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -390,14 +391,14 @@ def test_in_bounds_tensors_with_same_shapes2D_llvm(): @pytest.mark.xfail def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((k, k), name='B') - - T = tvm.compute((m, m), lambda i, j: A[i][j]*B[i][j]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n), name='A') + B = te.placeholder((k, k), name='B') + + T = te.compute((m, m), lambda i, j: A[i][j]*B[i][j]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -409,14 +410,14 @@ def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape f(a, b, t) def test_in_bounds_tensors_with_same_shapes3D_llvm(): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n, n), name='A') - B = tvm.placeholder((k, k, k), name='B') - - T = tvm.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n, n), name='A') + B = te.placeholder((k, k, k), name='B') + + T = te.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -429,14 +430,14 @@ def test_in_bounds_tensors_with_same_shapes3D_llvm(): @pytest.mark.xfail def test_out_of_bounds_tensors_with_diff_shapes3D_llvm(a_shape, b_shape, c_shape): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n, n), name='A') - B = tvm.placeholder((k, k, k), name='B') - - T = tvm.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n, n), name='A') + B = te.placeholder((k, k, k), name='B') + + T = te.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -452,12 +453,12 @@ def test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm(): if not tvm.runtime.enabled("llvm"): return n = 64 - A = tvm.placeholder((n, ), name='A') - scale = tvm.placeholder((), name='scale') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((), lambda : tvm.sum(A[k + k + k] * scale, axis=k), name="C") - D = tvm.compute((), lambda : C + 1) - s = tvm.create_schedule(D.op) + A = te.placeholder((n, ), name='A') + scale = te.placeholder((), name='scale') + k = te.reduce_axis((0, n), name="k") + C = te.compute((), lambda : te.sum(A[k + k + k] * scale, axis=k), name="C") + D = te.compute((), lambda : C + 1) + s = te.create_schedule(D.op) stmt = tvm.lower (s, [A, scale, D], simple_mode=True) print (stmt) # build and invoke the kernel. diff --git a/tests/python/unittest/test_pass_combine_context_call.py b/tests/python/unittest/test_pass_combine_context_call.py index ef741a4bff7b9..189dab70be0aa 100644 --- a/tests/python/unittest/test_pass_combine_context_call.py +++ b/tests/python/unittest/test_pass_combine_context_call.py @@ -15,24 +15,25 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_for(): - dev_type = tvm.var("dev_type") + dev_type = te.var("dev_type") def device_context(dev_id): - ctx = tvm.call_extern("handle", "device_context", dev_type, dev_id) + ctx = tvm.tir.call_extern("handle", "device_context", dev_type, dev_id) return tvm.tir.Call( "handle", "tvm_thread_context", [ctx], tvm.tir.Call.Intrinsic, None, 0) ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") A = ib.allocate("float32", n, name="A", scope="global") with ib.for_range(0, n, name="i") as i: - ib.emit(tvm.call_extern + ib.emit(tvm.tir.call_extern ("int32", "fadd", device_context(0), A)) with ib.for_range(0, 10, name="j") as j: - ib.emit(tvm.call_extern + ib.emit(tvm.tir.call_extern ("int32", "fadd", device_context(1), A)) - ib.emit(tvm.call_extern + ib.emit(tvm.tir.call_extern ("int32", "fadd", device_context(0), A)) body = ib.get() f = tvm.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True) diff --git a/tests/python/unittest/test_pass_decorate_device_scope.py b/tests/python/unittest/test_pass_decorate_device_scope.py index b464354e008ab..1cda4d9b0c3ff 100644 --- a/tests/python/unittest/test_pass_decorate_device_scope.py +++ b/tests/python/unittest/test_pass_decorate_device_scope.py @@ -15,22 +15,23 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_decorate_device(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], factor=8) s[A1].compute_at(s[A2], xo) s[A1].set_scope("shared") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) stmt1 = tvm.ir_pass.Simplify(stmt) stmt2 = tvm.ir_pass.DecorateDeviceScope(stmt1) assert isinstance(stmt2, tvm.tir.AttrStmt) diff --git a/tests/python/unittest/test_pass_equal.py b/tests/python/unittest/test_pass_equal.py index 1f5bb9cba9a9d..cc62bb1863fc9 100644 --- a/tests/python/unittest/test_pass_equal.py +++ b/tests/python/unittest/test_pass_equal.py @@ -15,16 +15,17 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_equal_expr(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') def func1(): return x + y + 1 def func2(): - return tvm.exp(tvm.truncdiv((x + y + 1) * y, 4)) + return te.exp(tvm.tir.truncdiv((x + y + 1) * y, 4)) assert tvm.ir_pass.Equal(func1(), func1()) assert tvm.ir_pass.Equal(func2(), func2()) @@ -32,20 +33,20 @@ def func2(): def test_equal_compute(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') n = 128 - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((n, n), name='B') - ii = tvm.var('i') - jj = tvm.var('j') + A = te.placeholder((n, n), name='A') + B = te.placeholder((n, n), name='B') + ii = te.var('i') + jj = te.var('j') def func1(): - k = tvm.reduce_axis((0, n), name='k') - return tvm.sum(A[ii, k] * B[jj, k], axis=k) + k = te.reduce_axis((0, n), name='k') + return te.sum(A[ii, k] * B[jj, k], axis=k) - Ab = tvm.decl_buffer((n,), name='A') - n = tvm.var("n") + Ab = tvm.tir.decl_buffer((n,), name='A') + n = te.var("n") def func2(): ib = tvm.ir_builder.create() A = ib.buffer_ptr(Ab) diff --git a/tests/python/unittest/test_pass_hoist_if.py b/tests/python/unittest/test_pass_hoist_if.py index 2eb641b0cd907..d93ebbc91a7a1 100644 --- a/tests/python/unittest/test_pass_hoist_if.py +++ b/tests/python/unittest/test_pass_hoist_if.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te var_list = [] @@ -53,9 +54,9 @@ def _visit(op): def test_basic(): ib = tvm.ir_builder.create() - l = tvm.var('l') - m = tvm.var('m') - n = tvm.var('n') + l = te.var('l') + m = te.var('m') + n = te.var('n') with ib.for_range(0, l, "i") as i: with ib.for_range(0, m, "j") as j: @@ -74,9 +75,9 @@ def test_basic(): def test_no_else(): ib = tvm.ir_builder.create() - l = tvm.var('l') - m = tvm.var('m') - n = tvm.var('n') + l = te.var('l') + m = te.var('m') + n = te.var('n') with ib.for_range(0, l, "i") as i: with ib.for_range(0, m, "j") as j: @@ -95,18 +96,18 @@ def test_attr_stmt(): ib = tvm.ir_builder.create() dshape = (32, 64) data = ib.pointer("float32", name="data") - l = tvm.var('l') - m = tvm.var('m') - n = tvm.var('n') + l = te.var('l') + m = te.var('m') + n = te.var('n') - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", dshape[0]) ib.scope_attr(bx, "thread_extent", dshape[1]) with ib.for_range(0, l, "i") as i: with ib.for_range(0, m, "j") as j: with ib.for_range(0, n, "k") as k: - with ib.if_scope(tvm.any(i < 4, j >= 8)): + with ib.if_scope(tvm.tir.any(i < 4, j >= 8)): data[bx * j + tx * j * k] = data[bx * j + tx * j * k] + 0.5 with ib.else_scope(): data[bx * j + tx * j * k] = data[bx * j + tx * j * k] + 1.0 @@ -130,7 +131,7 @@ def test_nested_for(): data[i * 3 + j] = data[i * 3 + j] + 0.5 with ib.for_range(0, 15, "k") as k: with ib.for_range(0, 20, "l") as l: - with ib.if_scope(tvm.any(i < 4, j >= 8)): + with ib.if_scope(tvm.tir.any(i < 4, j >= 8)): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 2 with ib.else_scope(): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5 @@ -145,7 +146,7 @@ def test_nested_for(): def test_if_block(): ib = tvm.ir_builder.create() data = ib.pointer("float32", name="data") - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, 5, "i") as i: @@ -154,7 +155,7 @@ def test_if_block(): data[i * 3 + j] = data[i * 3 + j] + 0.5 with ib.for_range(0, 15, "k") as k: with ib.for_range(0, 20, "l") as l: - with ib.if_scope(tvm.any(i < 4, j >= 8)): + with ib.if_scope(tvm.tir.any(i < 4, j >= 8)): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 2 with ib.else_scope(): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5 diff --git a/tests/python/unittest/test_pass_inject_copy_intrin.py b/tests/python/unittest/test_pass_inject_copy_intrin.py index f49388db3eb22..5920ed780b9d2 100644 --- a/tests/python/unittest/test_pass_inject_copy_intrin.py +++ b/tests/python/unittest/test_pass_inject_copy_intrin.py @@ -15,18 +15,19 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_copy2d(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.compute((m, l), lambda i, j: A[i, j], name='B') - s = tvm.create_schedule(B.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + B = te.compute((m, l), lambda i, j: A[i, j], name='B') + s = te.create_schedule(B.op) s[B].pragma(B.op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) def cb(src, dst, pad_before, pad_after, pad_value): assert dst.strides[0] == l @@ -37,18 +38,18 @@ def cb(src, dst, pad_before, pad_after, pad_value): stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) def test_copy_pad(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.compute((m + 2, l), lambda i, j: - tvm.if_then_else(tvm.all(i >= 1, i < m + 1), + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + B = te.compute((m + 2, l), lambda i, j: + tvm.tir.if_then_else(tvm.tir.all(i >= 1, i < m + 1), A[i - 1, j], 1.0), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].pragma(B.op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) def cb(src, dst, pad_before, pad_after, pad_value): assert tvm.ir_pass.Simplify(src.elem_offset).value == 0 @@ -61,15 +62,15 @@ def cb(src, dst, pad_before, pad_after, pad_value): stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) def test_single_point_test(): - A = tvm.placeholder((1,), name='A') - B = tvm.compute((1,), lambda i: + A = te.placeholder((1,), name='A') + B = te.compute((1,), lambda i: A[i], name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].pragma(B.op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) def cb(src, dst, pad_before, pad_after, pad_value): assert tvm.ir_pass.Simplify(src.elem_offset).value == 0 @@ -84,28 +85,28 @@ def assert_expr_equal(a, b): def test_copy_pad_split(): m = 4 * 3 - A = tvm.placeholder((m, ), name="A") - Apad = tvm.compute((m + 2,), lambda i: - tvm.if_then_else(tvm.all(i >= 1, i <= m), + A = te.placeholder((m, ), name="A") + Apad = te.compute((m + 2,), lambda i: + tvm.tir.if_then_else(tvm.tir.all(i >= 1, i <= m), A[i - 1], 0.0), "Apad") - B = tvm.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2]) - s = tvm.create_schedule(B.op) + B = te.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2]) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=4) s[Apad].compute_at(s[B], xo) s[Apad].pragma(s[Apad].op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) stmt = tvm.ir_pass.Simplify(stmt) stmt = tvm.ir_pass.CanonicalSimplify(stmt) def cb(src, dst, pad_before, pad_after, pad_value): assert(dst.elem_offset.value == 0) - assert_expr_equal(src.elem_offset, tvm.max(xo * 4, 1) - 1) + assert_expr_equal(src.elem_offset, tvm.te.max(xo * 4, 1) - 1) - rpad_before = tvm.max(1 - xo * 4, 0) - rpad_after = tvm.max(xo * 4 - 7, 0) + rpad_before = tvm.te.max(1 - xo * 4, 0) + rpad_after = tvm.te.max(xo * 4 - 7, 0) assert_expr_equal(pad_before[0], rpad_before) assert_expr_equal(pad_after[0], rpad_after) assert_expr_equal(src.shape[0], 6 - rpad_before - rpad_after) diff --git a/tests/python/unittest/test_pass_inject_double_buffer.py b/tests/python/unittest/test_pass_inject_double_buffer.py index cf8f78c8090d6..314902b3bdf35 100644 --- a/tests/python/unittest/test_pass_inject_double_buffer.py +++ b/tests/python/unittest/test_pass_inject_double_buffer.py @@ -15,12 +15,13 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_double_buffer(): dtype = 'int64' n = 100 m = 4 - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") diff --git a/tests/python/unittest/test_pass_inject_vthread.py b/tests/python/unittest/test_pass_inject_vthread.py index 08e261b68f6d3..89285912f7729 100644 --- a/tests/python/unittest/test_pass_inject_vthread.py +++ b/tests/python/unittest/test_pass_inject_vthread.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_vthread(): dtype = 'int64' @@ -22,8 +23,8 @@ def test_vthread(): m = 4 nthread = 2 def get_vthread(name): - tx = tvm.thread_axis(name) - ty = tvm.thread_axis(name) + tx = te.thread_axis(name) + ty = te.thread_axis(name) ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") @@ -32,10 +33,10 @@ def get_vthread(name): ib.scope_attr(ty, "virtual_thread", nthread) B = ib.allocate("float32", m, name="B", scope="shared") B[i] = A[i * nthread + tx] - bbuffer = tvm.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) - ib.emit(tvm.call_extern("int32", "Run", + bbuffer = tvm.tir.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) + ib.emit(tvm.tir.call_extern("int32", "Run", bbuffer.access_ptr("r"), - tvm.call_pure_intrin("int32", "tvm_context_id"))) + tvm.tir.call_pure_intrin("int32", "tvm_context_id"))) C[i * nthread + tx] = B[i] + 1 return ib.get() @@ -51,8 +52,8 @@ def test_vthread_extern(): m = 4 nthread = 2 def get_vthread(name): - tx = tvm.thread_axis(name) - ty = tvm.thread_axis(name) + tx = te.thread_axis(name) + ty = te.thread_axis(name) ib = tvm.ir_builder.create() with ib.for_range(0, n) as i: ib.scope_attr(tx, "virtual_thread", nthread) @@ -60,12 +61,12 @@ def get_vthread(name): A = ib.allocate("float32", m, name="A", scope="shared") B = ib.allocate("float32", m, name="B", scope="shared") C = ib.allocate("float32", m, name="C", scope="shared") - cbuffer = tvm.decl_buffer((m,), dtype=C.dtype, data=C.asobject()) - abuffer = tvm.decl_buffer((m,), dtype=A.dtype, data=A.asobject()) - bbuffer = tvm.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) + cbuffer = tvm.tir.decl_buffer((m,), dtype=C.dtype, data=C.asobject()) + abuffer = tvm.tir.decl_buffer((m,), dtype=A.dtype, data=A.asobject()) + bbuffer = tvm.tir.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) A[tx] = tx + 1.0 B[ty] = ty + 1.0 - ib.emit(tvm.call_extern("int32", "Run", + ib.emit(tvm.tir.call_extern("int32", "Run", abuffer.access_ptr("r"), bbuffer.access_ptr("r"), cbuffer.access_ptr("rw"))) @@ -78,7 +79,7 @@ def get_vthread(name): def test_vthread_if_then_else(): nthread = 2 - tx = tvm.thread_axis("vthread") + tx = te.thread_axis("vthread") ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 100) as i: diff --git a/tests/python/unittest/test_pass_inline.py b/tests/python/unittest/test_pass_inline.py index 521a6f99e026c..d332add3d92dc 100644 --- a/tests/python/unittest/test_pass_inline.py +++ b/tests/python/unittest/test_pass_inline.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_inline(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i,: A[i] + 10, name='T') + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i,: A[i] + 10, name='T') stmt = tvm.tir.Evaluate(T[10] + 11 * T[100]) stmt = tvm.ir_pass.Inline( stmt, T.op, [x.var for x in T.op.axis], T.op.body[0]) @@ -36,10 +37,10 @@ def test_inline(): pass def test_inline2(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i,: A[i] + 10, name='T') - stmt = tvm.tir.Evaluate(tvm.exp(T[10]) + 11 * T[100]) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i,: A[i] + 10, name='T') + stmt = tvm.tir.Evaluate(te.exp(T[10]) + 11 * T[100]) stmt = tvm.ir_pass.Inline( stmt, T.op, [x.var for x in T.op.axis], T.op.body[0]) def check(op): diff --git a/tests/python/unittest/test_pass_ir_transform.py b/tests/python/unittest/test_pass_ir_transform.py index b024a3c8d5b9f..564831f52fcf8 100644 --- a/tests/python/unittest/test_pass_ir_transform.py +++ b/tests/python/unittest/test_pass_ir_transform.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_ir_transform(): ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: - x = tvm.call_extern("int32", "TestA", i * 3 + j * 1) - ib.emit(tvm.call_extern("int32", "TestB", x)) - ib.emit(tvm.call_extern("int32", "TestC", x)) + x = tvm.tir.call_extern("int32", "TestA", i * 3 + j * 1) + ib.emit(tvm.tir.call_extern("int32", "TestB", x)) + ib.emit(tvm.tir.call_extern("int32", "TestC", x)) body = ib.get() def preorder(op): if op.name == "TestC": - return tvm.const(0, "int32") + return tvm.tir.const(0, "int32") return None def postorder(op): assert isinstance(op, tvm.tir.Call) if op.name == "TestA": - return tvm.call_extern("int32", "TestB", op.args[0] + 1) + return tvm.tir.call_extern("int32", "TestB", op.args[0] + 1) return op body = tvm.ir_pass.IRTransform(body, preorder, postorder, ["Call"]) stmt_list = tvm.tir.stmt_list(body.body.body) diff --git a/tests/python/unittest/test_pass_lift_attr_scope.py b/tests/python/unittest/test_pass_lift_attr_scope.py index 181f4ef57a4f7..e774dc427e1ad 100644 --- a/tests/python/unittest/test_pass_lift_attr_scope.py +++ b/tests/python/unittest/test_pass_lift_attr_scope.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_coproc_lift(): ib = tvm.ir_builder.create() - n = tvm.var("n") - cp = tvm.thread_axis((0, 1), "cop") + n = te.var("n") + cp = te.thread_axis((0, 1), "cop") value = tvm.tir.StringImm("xxx") A = ib.allocate("float32", n, name="A", scope="global") diff --git a/tests/python/unittest/test_pass_loop_partition.py b/tests/python/unittest/test_pass_loop_partition.py index e9df98e43d794..739f01fb68e46 100644 --- a/tests/python/unittest/test_pass_loop_partition.py +++ b/tests/python/unittest/test_pass_loop_partition.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy def collect_visit(stmt, f): @@ -34,16 +35,16 @@ def lower(sch, args): binds = {} arg_list = [] for x in args: - if isinstance(x, tvm.tensor.Tensor): - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name) + if isinstance(x, te.tensor.Tensor): + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name) assert x not in binds binds[x] = buf arg_list.append(buf) else: raise ValueError("args must be Tensor, Buffer or Var") sch = sch.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) @@ -52,16 +53,16 @@ def lower(sch, args): return stmt def test_basic(): - n = tvm.size_var('n') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + n = te.size_var('n') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body.body.body[0])) @@ -69,23 +70,23 @@ def test_basic(): def test_const_loop(): n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, True) stmt = tvm.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body.body.body[0])) def test_multi_loop(): ib = tvm.ir_builder.create() - m = tvm.size_var('m') - n = tvm.size_var('n') + m = te.size_var('m') + n = te.size_var('n') with ib.for_range(0, 4, "i") as i: with ib.for_range(0, n, "j") as j: with ib.for_range(0, m, "k") as k: @@ -100,8 +101,8 @@ def test_multi_loop(): def test_multi_if(): ib = tvm.ir_builder.create() - m = tvm.size_var('m') - n = tvm.size_var('n') + m = te.size_var('m') + n = te.size_var('n') with ib.for_range(0, 4, 'i') as i: with ib.for_range(0, n, 'j') as j: with ib.for_range(0, m, 'k') as k: @@ -119,40 +120,40 @@ def test_multi_if(): assert('if' not in str(stmt.body[0])) def test_thread_axis(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.compute((m, l), lambda i, j: A[i, j] + 3, name='B') - s = tvm.create_schedule(B.op) + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.compute((m, l), lambda i, j: A[i, j] + 3, name='B') + s = te.create_schedule(B.op) s[B].set_scope("shared") num_thread = 16 xo, xi = s[B].split(B.op.axis[0], 32) xi0, xi1 = s[B].split(xi, nparts=num_thread) - s[B].bind(xi0, tvm.thread_axis("threadIdx.x")) + s[B].bind(xi0, te.thread_axis("threadIdx.x")) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body.body.body[0])) def test_vectorize(): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - bias = tvm.size_var("bias", dtype="float32") - scale = tvm.size_var("scale", dtype="float32") - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C') + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + bias = te.size_var("bias", dtype="float32") + scale = te.size_var("scale", dtype="float32") + C = te.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 32 bx, x = s[C].split(C.op.axis[0], factor=num_thread*4) tx, x = s[C].split(x, nparts=num_thread) _, x = s[C].split(x, factor=4) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) s[C].vectorize(x) stmt = lower(s, [A, B]) body = stmt.body.body.body.body.body @@ -161,9 +162,9 @@ def test_vectorize(): def test_condition(): ib = tvm.ir_builder.create() - m = tvm.size_var('m') - n = tvm.size_var('n') - with ib.for_range(0, tvm.truncdiv(n+3,4), 'i') as i: + m = te.size_var('m') + n = te.size_var('n') + with ib.for_range(0, tvm.tir.truncdiv(n+3,4), 'i') as i: with ib.for_range(0, 4, 'j') as j: ib.emit(tvm.tir.Evaluate( tvm.tir.Select(ib.likely(i*4+j 0)): with ib.if_scope(ib.likely(ow < 15)): - out[ow] = tvm.max(out[ow], data[ow + kw - 1]) + out[ow] = tvm.te.max(out[ow], data[ow + kw - 1]) with ib.for_range(0, 16, 'ow') as ow: with ib.for_range(0, 3, 'kw') as kw: with ib.if_scope(ib.likely(ow < 1)): with ib.if_scope(ib.likely(kw > 0)): - out[ow] = tvm.max(out[ow], data[ow + kw - 1]) + out[ow] = tvm.te.max(out[ow], data[ow + kw - 1]) with ib.for_range(0, 16, 'ow') as ow: with ib.for_range(0, 3, 'kw') as kw: with ib.if_scope(ib.likely(ow > 14)): with ib.if_scope(ib.likely(kw < 2)): - out[ow] = tvm.max(out[ow], data[ow + kw - 1]) + out[ow] = tvm.te.max(out[ow], data[ow + kw - 1]) stmt = ib.get() stmt = tvm.ir_pass.LoopPartition(stmt, True) @@ -284,11 +285,11 @@ def test_cce_loop_1(): dtype = 'float16' n = 514 m = 514 - _A = tvm.placeholder((n*m,), name = 'A') - Ab = tvm.decl_buffer((n*m,), dtype, name="A") + _A = te.placeholder((n*m,), name = 'A') + Ab = tvm.tir.decl_buffer((n*m,), dtype, name="A") A = ib.buffer_ptr(Ab) - _B = tvm.placeholder((n*m,), name = 'B') - Bb = tvm.decl_buffer((n*m,), dtype, name="B") + _B = te.placeholder((n*m,), name = 'B') + Bb = tvm.tir.decl_buffer((n*m,), dtype, name="B") B = ib.buffer_ptr(Bb) #for i in 0 to n-1: with ib.for_range(0, 11, name="i") as i: @@ -309,10 +310,10 @@ def test_cce_loop_2(): head = i * tile with ib.if_scope(ib.likely(head + tile > len)): tail = len - ib.emit(tvm.call_extern('float32', "cce_intrisic", head, tail)) + ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail)) with ib.else_scope(): tail = head + tile - ib.emit(tvm.call_extern('float32', "cce_intrisic", head, tail)) + ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail)) stmt = ib.get() stmt = tvm.ir_pass.LoopPartition(stmt, True) @@ -330,7 +331,7 @@ def test_cce_loop_3(): head1 = i head2 = j with ib.if_scope(ib.likely(head1*loop1 + head2 < tile)): - ib.emit(tvm.call_extern('float16',"cce_intrisic",head1)) + ib.emit(tvm.tir.call_extern('float16',"cce_intrisic",head1)) stmt = ib.get() stmt = tvm.ir_pass.LoopPartition(stmt,True) @@ -345,23 +346,23 @@ def test_conv_tiling(): batch_size = 1 in_height = in_width = 64 out_height = out_width = in_height - kernel_height + 1 - data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data') - kernel = tvm.placeholder((kernel_height, kernel_width, in_channel, + data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data') + kernel = te.placeholder((kernel_height, kernel_width, in_channel, out_channel), name='kernel') - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - conv = tvm.compute((batch_size, out_channel, out_height, out_width), - lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + conv = te.compute((batch_size, out_channel, out_height, out_width), + lambda n, oc, oh, ow: te.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * kernel[kh, kw, ic, oc], axis=[ic, kh, kw]), name="conv2d") - s = tvm.create_schedule(conv.op) + s = te.create_schedule(conv.op) n, oc, oh, ow = conv.op.axis oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, True) stmt = tvm.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse)))) @@ -369,9 +370,9 @@ def test_conv_tiling(): def test_multilevel_splitting_with_indivisble_factors(): import topi - A = tvm.placeholder((130,), dtype="float32") + A = te.placeholder((130,), dtype="float32") B = topi.nn.relu(A) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) (y,) = s[B].op.axis (yo, yi) = s[B].split(y, factor=8) (yoo, yoi) = s[B].split(yo, factor=16) @@ -390,11 +391,11 @@ def visit_stmt(op): def test_double_splitting_with_indivisible_factors(): m = 48 dtype="float32" - A = tvm.placeholder((m,), name='A', dtype=dtype) - C = tvm.compute((m,), lambda i: A[i], name='C') - D = tvm.compute((m,), lambda i: C[i], name='D') + A = te.placeholder((m,), name='A', dtype=dtype) + C = te.compute((m,), lambda i: A[i], name='C') + D = te.compute((m,), lambda i: C[i], name='D') - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) co, ci = s[C].split(C.op.axis[0], factor=10) do, di = s[D].split(D.op.axis[0], 32) s[C].compute_at(s[D], do) @@ -420,23 +421,23 @@ def test_double_splitting_with_indivisible_factors(): def test_simple_rfactor(): K = 16*4+4 - k = tvm.reduce_axis((0, K), 'k') + k = te.reduce_axis((0, K), 'k') - A = tvm.placeholder((1, K), name='A') + A = te.placeholder((1, K), name='A') - B = tvm.compute( (1,), lambda b: - tvm.sum(A[b, k], axis=k), + B = te.compute( (1,), lambda b: + te.sum(A[b, k], axis=k), name='B' ) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, _ = s[B].split(s[B].op.reduce_axis[0], 16) BF = s.rfactor(B, ko, 0) s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) - stmt1 = tvm.schedule.ScheduleOps(s, bounds) + stmt1 = tvm.te.schedule.ScheduleOps(s, bounds) stmt1 = tvm.ir_pass.Simplify(stmt1) stmt2 = tvm.ir_pass.LoopPartition(stmt1, True) diff --git a/tests/python/unittest/test_pass_lower_intrin.py b/tests/python/unittest/test_pass_lower_intrin.py index 1e54f38b87d0e..40252053013e4 100644 --- a/tests/python/unittest/test_pass_lower_intrin.py +++ b/tests/python/unittest/test_pass_lower_intrin.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def lower_intrin(stmt): @@ -28,8 +29,8 @@ def lower_intrin(stmt): def check_value(expr, vx, vy, data, fref): n = len(data) - A = tvm.placeholder((n,), name="A", dtype=expr.dtype) - B = tvm.placeholder((n,), name="B", dtype=expr.dtype) + A = te.placeholder((n,), name="A", dtype=expr.dtype) + B = te.placeholder((n,), name="B", dtype=expr.dtype) def make_binds(i): x = expr @@ -37,8 +38,8 @@ def make_binds(i): x = tvm.tir.Let(vy, B[i], x) return x - C = tvm.compute((n,), make_binds) - s = tvm.create_schedule([C.op]) + C = te.compute((n,), make_binds) + s = te.create_schedule([C.op]) if not tvm.runtime.enabled("llvm"): return @@ -65,43 +66,43 @@ def get_ref_data(): def test_lower_floordiv(): data = get_ref_data() for dtype in ["int32", "int64", "int16"]: - x = tvm.var("x", dtype=dtype) - y = tvm.var("y", dtype=dtype) - zero = tvm.const(0, dtype) + x = te.var("x", dtype=dtype) + y = te.var("y", dtype=dtype) + zero = tvm.tir.const(0, dtype) # no constraints - res = lower_intrin(tvm.floordiv(x, y)) + res = lower_intrin(tvm.te.floordiv(x, y)) check_value(res, x, y, data, lambda a, b: a // b) # rhs >= 0 - res = lower_intrin(tvm.tir.Select(y >= 0, tvm.floordiv(x, y), zero)) + res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.floordiv(x, y), zero)) check_value(res, x, y, data, lambda a, b: a // b if b > 0 else 0) # involves max - res = lower_intrin(tvm.tir.Select(y >= 0, tvm.max(tvm.floordiv(x, y), zero), zero)) + res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.max(tvm.te.floordiv(x, y), zero), zero)) check_value(res, x, y, data, lambda a, b: max(a // b, 0) if b > 0 else 0) # lhs >= 0 - res = lower_intrin(tvm.tir.Select(tvm.all(y >= 0, x >= 0), tvm.floordiv(x, y), zero)) + res = lower_intrin(tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floordiv(x, y), zero)) check_value(res, x, y, data, lambda a, b: a // b if b > 0 and a >= 0 else 0) # const power of two - res = lower_intrin(tvm.floordiv(x, tvm.const(8, dtype=dtype))) + res = lower_intrin(tvm.te.floordiv(x, tvm.tir.const(8, dtype=dtype))) check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a // b) def test_lower_floormod(): data = get_ref_data() for dtype in ["int32", "int64", "int16"]: - x = tvm.var("x", dtype=dtype) - y = tvm.var("y", dtype=dtype) - zero = tvm.const(0, dtype) + x = te.var("x", dtype=dtype) + y = te.var("y", dtype=dtype) + zero = tvm.tir.const(0, dtype) # no constraints - res = lower_intrin(tvm.floormod(x, y)) + res = lower_intrin(tvm.te.floormod(x, y)) check_value(res, x, y, data, lambda a, b: a % b) # rhs >= 0 - res = lower_intrin(tvm.tir.Select(y >= 0, tvm.floormod(x, y), zero)) + res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.floormod(x, y), zero)) check_value(res, x, y, data, lambda a, b: a % b if b > 0 else 0) # lhs >= 0 - res = lower_intrin(tvm.tir.Select(tvm.all(y >= 0, x >= 0), tvm.floormod(x, y), zero)) + res = lower_intrin(tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floormod(x, y), zero)) check_value(res, x, y, data, lambda a, b: a % b if b > 0 and a >= 0 else 0) # const power of two - res = lower_intrin(tvm.floormod(x, tvm.const(8, dtype=dtype))) + res = lower_intrin(tvm.te.floormod(x, tvm.tir.const(8, dtype=dtype))) check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a % b) diff --git a/tests/python/unittest/test_pass_lower_warp_memory.py b/tests/python/unittest/test_pass_lower_warp_memory.py index 4f09271374445..191d80128a8ef 100644 --- a/tests/python/unittest/test_pass_lower_warp_memory.py +++ b/tests/python/unittest/test_pass_lower_warp_memory.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_lower_warp_mem(): m = 128 - A = tvm.placeholder((m,), name='A') - B = tvm.compute((m,), lambda i: A[i] + 3, name='B') + A = te.placeholder((m,), name='A') + B = te.compute((m,), lambda i: A[i] + 3, name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) AA = s.cache_read(A, "warp", [B]) xo, xi = s[B].split(B.op.axis[0], 32) xi0, xi1 = s[B].split(xi, factor=16) - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") s[B].bind(xi1, tx) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) s[AA].compute_at(s[B], xo) xo, xi = s[AA].split(s[AA].op.axis[0], 16) s[AA].bind(xi, tx) diff --git a/tests/python/unittest/test_pass_makeapi.py b/tests/python/unittest/test_pass_makeapi.py index 34f32ef01c7c2..d11acb5ccb9c5 100644 --- a/tests/python/unittest/test_pass_makeapi.py +++ b/tests/python/unittest/test_pass_makeapi.py @@ -15,22 +15,23 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy def test_makeapi(): """Not yet working, mock design""" - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - Cb = tvm.decl_buffer(C.shape, C.dtype, name='C') + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + Cb = tvm.tir.decl_buffer(C.shape, C.dtype, name='C') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64) num_unpacked_args = 2 diff --git a/tests/python/unittest/test_pass_remove_no_op.py b/tests/python/unittest/test_pass_remove_no_op.py index a3927f7db49d9..b2339417dc70e 100644 --- a/tests/python/unittest/test_pass_remove_no_op.py +++ b/tests/python/unittest/test_pass_remove_no_op.py @@ -15,18 +15,19 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def nop(): return tvm.tir.Evaluate(0) def test_remove_no_op(): - i = tvm.var('i') - j = tvm.var('j') - k = tvm.var('k') - m = tvm.var('m') - n = tvm.var('n') + i = te.var('i') + j = te.var('j') + k = te.var('k') + m = te.var('m') + n = te.var('n') dtype = 'int64' - Ab = tvm.decl_buffer((n, ), dtype) + Ab = tvm.tir.decl_buffer((n, ), dtype) stmt = tvm.tir.For( i, 0, 4, 0, 0, tvm.tir.For( diff --git a/tests/python/unittest/test_pass_rewrite_for_tensor_core.py b/tests/python/unittest/test_pass_rewrite_for_tensor_core.py index cc99a25d81e9a..977dfc3d6b261 100644 --- a/tests/python/unittest/test_pass_rewrite_for_tensor_core.py +++ b/tests/python/unittest/test_pass_rewrite_for_tensor_core.py @@ -15,16 +15,17 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import numpy as np from tvm.contrib import nvcc def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): - A = tvm.placeholder((n, l), name='A', dtype='float16') - B = tvm.placeholder((l, m), name='B', dtype='float16') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k].astype('float32') * B[k, j].astype('float32'), axis=k)) - s = tvm.create_schedule(C.op) + A = te.placeholder((n, l), name='A', dtype='float16') + B = te.placeholder((l, m), name='B', dtype='float16') + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: te.sum(A[i, k].astype('float32') * B[k, j].astype('float32'), axis=k)) + s = te.create_schedule(C.op) y, x = s[C].op.axis k = s[C].op.reduce_axis[0] @@ -57,12 +58,12 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): kl, ki = s[CL].split(ki, tile_k) s[C].reorder(yo, xo, tz, ty, tx, yi, xi) - s[C].bind(yo, tvm.thread_axis("blockIdx.y")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) - s[C].bind(vy, tvm.thread_axis((0, vthread), "vthread", name="vy")) + s[C].bind(yo, te.thread_axis("blockIdx.y")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) + s[C].bind(vy, te.thread_axis((0, vthread), "vthread", name="vy")) s[CL].compute_at(s[C], tx) yo, xo = CL.op.axis s[CL].reorder(ko, kl, ki, yo, xo) @@ -73,9 +74,9 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): tx, vec = s[AA].split(tx, factor=v) fused = s[AA].fuse(s[AA].op.axis[0], xo) _, ty = s[AA].split(fused, factor=by) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tz, tvm.thread_axis("threadIdx.z")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tz, te.thread_axis("threadIdx.z")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) s[AA].vectorize(vec) s[BB].compute_at(s[CL], ko) @@ -84,9 +85,9 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): tx, vec = s[BB].split(tx, factor=v) fused = s[BB].fuse(s[BB].op.axis[0], xo) _, ty = s[BB].split(fused, factor=by) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tz, tvm.thread_axis("threadIdx.z")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tz, te.thread_axis("threadIdx.z")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].vectorize(vec) s[AL].compute_at(s[CL], kl) @@ -111,11 +112,11 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): np.testing.assert_allclose(c_np, c.asnumpy(), rtol=1e-3) def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): - A = tvm.placeholder((batch, n, l), name='A', dtype='float16') - B = tvm.placeholder((batch, l, m), name='B', dtype='float16') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((batch, n, m), lambda b, i, j: tvm.sum((A[b, i, k] * B[b, k, j]).astype('float32'), axis=k)) - s = tvm.create_schedule(C.op) + A = te.placeholder((batch, n, l), name='A', dtype='float16') + B = te.placeholder((batch, l, m), name='B', dtype='float16') + k = te.reduce_axis((0, l), name='k') + C = te.compute((batch, n, m), lambda b, i, j: te.sum((A[b, i, k] * B[b, k, j]).astype('float32'), axis=k)) + s = te.create_schedule(C.op) z, y, x = s[C].op.axis k = s[C].op.reduce_axis[0] @@ -148,13 +149,13 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): kl, ki = s[CL].split(ki, tile_k) s[C].reorder(z, yo, xo, tz, ty, tx, yi, xi) - s[C].bind(z, tvm.thread_axis("blockIdx.z")) - s[C].bind(yo, tvm.thread_axis("blockIdx.y")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) - s[C].bind(vy, tvm.thread_axis((0, vthread), "vthread", name="vy")) + s[C].bind(z, te.thread_axis("blockIdx.z")) + s[C].bind(yo, te.thread_axis("blockIdx.y")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) + s[C].bind(vy, te.thread_axis((0, vthread), "vthread", name="vy")) s[CL].compute_at(s[C], tx) zo, yo, xo = CL.op.axis s[CL].reorder(ko, kl, ki, zo, yo, xo) @@ -165,9 +166,9 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): tx, vec = s[AA].split(tx, factor=v) fused = s[AA].fuse(s[AA].op.axis[1], xo) _, ty = s[AA].split(fused, factor=by) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tz, tvm.thread_axis("threadIdx.z")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tz, te.thread_axis("threadIdx.z")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) s[AA].vectorize(vec) s[BB].compute_at(s[CL], ko) @@ -176,9 +177,9 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): tx, vec = s[BB].split(tx, factor=v) fused = s[BB].fuse(s[BB].op.axis[1], xo) _, ty = s[BB].split(fused, factor=by) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tz, tvm.thread_axis("threadIdx.z")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tz, te.thread_axis("threadIdx.z")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].vectorize(vec) s[AL].compute_at(s[CL], kl) diff --git a/tests/python/unittest/test_pass_rewrite_unsafe_select.py b/tests/python/unittest/test_pass_rewrite_unsafe_select.py index dc6ae8286213b..c6203d90d2aff 100644 --- a/tests/python/unittest/test_pass_rewrite_unsafe_select.py +++ b/tests/python/unittest/test_pass_rewrite_unsafe_select.py @@ -15,12 +15,13 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_rewrite_Select(): ib = tvm.ir_builder.create() A = ib.allocate("float32", 100, name="A", scope="global") - i = tvm.var("i") + i = te.var("i") y = tvm.tir.Select(i > 1, A[i-1], 1.0) yy = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value @@ -28,7 +29,7 @@ def test_rewrite_Select(): tvm.tir.Select(i > 1, A[i-1], 1.0) > 0.0, A[i], 0.1) zz = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value - a = tvm.tir.Select(tvm.floordiv(i, 4) > 10, y, z) + a = tvm.tir.Select(tvm.te.floordiv(i, 4) > 10, y, z) aa = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value assert yy.name == "tvm_if_then_else" assert zz.name == "tvm_if_then_else" diff --git a/tests/python/unittest/test_pass_split_host_device.py b/tests/python/unittest/test_pass_split_host_device.py index e8858b8aa41e0..09f7740df9c95 100644 --- a/tests/python/unittest/test_pass_split_host_device.py +++ b/tests/python/unittest/test_pass_split_host_device.py @@ -16,13 +16,14 @@ # under the License. import pytest import tvm +from tvm import te @pytest.mark.xfail def test_loop_dependent_allocate(): - N = tvm.size_var("N") - A = tvm.placeholder((2*N,), "float32", "A") - C = tvm.compute((N, ), lambda i: A[2*i] + A[i+1], name='C') - s = tvm.create_schedule(C.op) + N = te.size_var("N") + A = te.placeholder((2*N,), "float32", "A") + C = te.compute((N, ), lambda i: A[2*i] + A[i+1], name='C') + s = te.create_schedule(C.op) AA = s.cache_read(A, "local", [C]) s[AA].compute_at(s[C], s[C].op.axis[0]) # this line should fail due to IRUseDefAnalysis sees an allocate statement diff --git a/tests/python/unittest/test_pass_storage_flatten.py b/tests/python/unittest/test_pass_storage_flatten.py index 47a43c7ac2a0f..e246c994239b9 100644 --- a/tests/python/unittest/test_pass_storage_flatten.py +++ b/tests/python/unittest/test_pass_storage_flatten.py @@ -15,31 +15,32 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_flatten2(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) stmt = tvm.ir_pass.Simplify(stmt) def test_flatten_prefetch(): - A = tvm.placeholder((25, 100, 4), name = 'A') - _A= tvm.decl_buffer(A.shape, A.dtype, name = 'A'); - i = tvm.size_var('i') - j = tvm.size_var('j') + A = te.placeholder((25, 100, 4), name = 'A') + _A= tvm.tir.decl_buffer(A.shape, A.dtype, name = 'A'); + i = te.size_var('i') + j = te.size_var('j') region = [tvm.ir.Range.make_by_min_extent(i[0], i[1]) for i in [(i, 2), (j, 8), (0, 4)]] stmt = tvm.tir.Prefetch(A.op, 0, A.dtype, region) stmt = tvm.ir_pass.StorageFlatten(stmt, {A: _A}, 64) @@ -52,17 +53,17 @@ def test_flatten_prefetch(): def test_flatten_storage_align(): m = 8 l = 16 - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) s[A1].storage_align(A1.op.axis[0], 2, 1) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) stmt = tvm.ir_pass.Simplify(stmt) assert(stmt.body.extents[0].value == 17 * 8) @@ -71,7 +72,7 @@ def test_flatten_double_buffer(): dtype = 'int64' n = 100 m = 4 - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") diff --git a/tests/python/unittest/test_pass_storage_rewrite.py b/tests/python/unittest/test_pass_storage_rewrite.py index d4125d0931985..562df4e43d713 100644 --- a/tests/python/unittest/test_pass_storage_rewrite.py +++ b/tests/python/unittest/test_pass_storage_rewrite.py @@ -15,22 +15,23 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_storage_share(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') num_stage = 5 B = A for t in range(num_stage): - B = tvm.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t) + B = te.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t) - s = tvm.create_schedule(B.op) - bounds = tvm.schedule.InferBound(s) + s = te.create_schedule(B.op) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -61,7 +62,7 @@ def test_alloc_seq(): register_mem(scope_tb, max_bits) ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", 200, name="A", scope=scope_tb) @@ -84,24 +85,24 @@ def test_alloc_different_dtypes(): def stmt_generater(dtype_list, length): ib = tvm.ir_builder.create() base_dtype = dtype_list[0] - global_a = tvm.placeholder((length,), name = "global_a", dtype = base_dtype) + global_a = te.placeholder((length,), name = "global_a", dtype = base_dtype) assert len(dtype_list) == 4 with ib.for_range(0, length, name="j") as j: dtype = dtype_list[0] A = ib.allocate(dtype, length, name="A", scope="local.L0A") - A[j] = tvm.const(1, dtype = dtype) + A[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = dtype_list[1] B = ib.allocate(dtype, length, name="B", scope="local.L0A") - B[j] = tvm.const(1, dtype = dtype) + B[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = dtype_list[2] C = ib.allocate(dtype, length, name="C", scope="local.L0A") - C[j] = tvm.const(1, dtype = dtype) + C[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = dtype_list[3] D = ib.allocate(dtype, length, name="D", scope="local.L0A") - D[j] = tvm.const(1, dtype = dtype) + D[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = "int8" E = ib.allocate(dtype, length, name="E", scope="local.L0A") @@ -147,17 +148,17 @@ def verify(n): def test_inplace_rule(): m = 10 - A = tvm.placeholder((m,), name='A') - A0 = tvm.compute((m,), lambda i: A[i], name='A0') - A1 = tvm.compute((m,), lambda i: A[i] + 1, name='A1') - AA = tvm.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA') - B = tvm.compute((m,), lambda i: AA[i] + 1, name='B') - s = tvm.create_schedule(B.op) - bounds = tvm.schedule.InferBound(s) + A = te.placeholder((m,), name='A') + A0 = te.compute((m,), lambda i: A[i], name='A0') + A1 = te.compute((m,), lambda i: A[i] + 1, name='A1') + AA = te.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA') + B = te.compute((m,), lambda i: AA[i] + 1, name='B') + s = te.create_schedule(B.op) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -174,22 +175,22 @@ def verify(n): def test_storage_combine(): n = 8 - A = tvm.placeholder((4,), name='A') + A = te.placeholder((4,), name='A') num_stage = 5 B = A stages = [] for t in range(num_stage): - B = tvm.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t) + B = te.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t) stages.append(B) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) for S in stages[:-1]: s[S].set_scope("global:tag") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -204,26 +205,26 @@ def verify(n): def test_storage_share_gpu(): - m = tvm.var('m') - A = [tvm.placeholder((m), name='A')] + m = te.var('m') + A = [te.placeholder((m), name='A')] num_stage = 5 for t in range(num_stage): - A.append(tvm.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t)) - A.append(tvm.compute((m,), lambda i: A[-1][i], name='A%d' % t)) - s = tvm.create_schedule(A[-1].op) + A.append(te.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t)) + A.append(te.compute((m,), lambda i: A[-1][i], name='A%d' % t)) + s = te.create_schedule(A[-1].op) for t in range(num_stage): x = A[2*t+2].op.axis[0] bx, tx = s[A[2*t+2]].split(x, factor=32) - s[A[2*t+2]].bind(bx, tvm.thread_axis("blockIdx.x")) - s[A[2*t+2]].bind(tx, tvm.thread_axis("threadIdx.x")) + s[A[2*t+2]].bind(bx, te.thread_axis("blockIdx.x")) + s[A[2*t+2]].bind(tx, te.thread_axis("threadIdx.x")) s[A[2*t+1]].compute_at(s[A[2*t+2]], tx) s[A[2*t+1]].set_scope("shared") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A[0].shape, A[0].dtype, name='A') - Bb = tvm.decl_buffer(A[0].shape, A[0].dtype, name='B') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='A') + Bb = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -240,7 +241,7 @@ def verify(n): def test_parallel_alloc(): ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="i", for_type="parallel") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", n, name="A", scope="global") @@ -251,10 +252,10 @@ def test_parallel_alloc(): assert (isinstance(body.body.body, tvm.tir.Allocate)) ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="t") as i: ib.scope_attr( - tvm.const(1, "int32") , "pragma_scope", + tvm.tir.const(1, "int32") , "pragma_scope", tvm.tir.StringImm("parallel_launch_point")) with ib.for_range(0, n, name="i", for_type="parallel") as i: with ib.for_range(0, 10, name="j") as j: @@ -269,24 +270,24 @@ def test_inplace_rule2(scope_tb = "local_TB2", max_bits = 1024 * 1024 * 1024): #Test Buffer register_mem(scope_tb, max_bits) m = 10 - A = tvm.placeholder((m,), name='A') - C = tvm.placeholder((m,), name='C') - D = tvm.placeholder((m,), name='D') - A0 = tvm.compute((m,), lambda i: A[i] + C[i], name='A0') - A1 = tvm.compute((m,), lambda i: D[i] * D[i], name='A1') - A2 = tvm.compute((m,), lambda i: A0[i] + A1[i], name='A2') - B = tvm.compute((m,), lambda i: A2[i], name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((m,), name='A') + C = te.placeholder((m,), name='C') + D = te.placeholder((m,), name='D') + A0 = te.compute((m,), lambda i: A[i] + C[i], name='A0') + A1 = te.compute((m,), lambda i: D[i] * D[i], name='A1') + A2 = te.compute((m,), lambda i: A0[i] + A1[i], name='A2') + B = te.compute((m,), lambda i: A2[i], name='B') + s = te.create_schedule(B.op) A0L = s.cache_read(A0, scope_tb, [A2]) A1L = s.cache_read(A1, scope_tb, [A2]) A2L = s.cache_read(A2, scope_tb, [B]) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - Cc = tvm.decl_buffer(C.shape, B.dtype, name='C') - Dd = tvm.decl_buffer(D.shape, B.dtype, name='D') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + Cc = tvm.tir.decl_buffer(C.shape, B.dtype, name='C') + Dd = tvm.tir.decl_buffer(D.shape, B.dtype, name='D') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -318,27 +319,27 @@ def test_inplace_rule3(): register_mem(scope_tb, max_bits) m = 10 - B0 = tvm.placeholder((m,), name='B0') - B1 = tvm.placeholder((m,), name='B1') - B2 = tvm.placeholder((m,), name='B2') - B3 = tvm.placeholder((m,), name='B3') - B4 = tvm.placeholder((m,), name='B4') - B5 = tvm.placeholder((m,), name='B5') + B0 = te.placeholder((m,), name='B0') + B1 = te.placeholder((m,), name='B1') + B2 = te.placeholder((m,), name='B2') + B3 = te.placeholder((m,), name='B3') + B4 = te.placeholder((m,), name='B4') + B5 = te.placeholder((m,), name='B5') - B6 = tvm.compute((m,), lambda i: B1[i] * B5[i], name='B6') - B7 = tvm.compute((m,), lambda i: B2[i] * B4[i], name='B7') - B8 = tvm.compute((m,), lambda i: B6[i] - B7[i], name='B8') + B6 = te.compute((m,), lambda i: B1[i] * B5[i], name='B6') + B7 = te.compute((m,), lambda i: B2[i] * B4[i], name='B7') + B8 = te.compute((m,), lambda i: B6[i] - B7[i], name='B8') - B9 = tvm.compute((m,), lambda i: B2[i] * B3[i], name='B9') - B10 = tvm.compute((m,), lambda i: B0[i] * B5[i], name='B10') - B11 = tvm.compute((m,), lambda i: B9[i] - B10[i], name='B11') + B9 = te.compute((m,), lambda i: B2[i] * B3[i], name='B9') + B10 = te.compute((m,), lambda i: B0[i] * B5[i], name='B10') + B11 = te.compute((m,), lambda i: B9[i] - B10[i], name='B11') - B12 = tvm.compute((m,), lambda i: B0[i] * B4[i], name='B12') - B13 = tvm.compute((m,), lambda i: B1[i] * B3[i], name='B13') - B14 = tvm.compute((m,), lambda i: B12[i] - B13[i], name='B14') + B12 = te.compute((m,), lambda i: B0[i] * B4[i], name='B12') + B13 = te.compute((m,), lambda i: B1[i] * B3[i], name='B13') + B14 = te.compute((m,), lambda i: B12[i] - B13[i], name='B14') - B = tvm.compute((m,), lambda i: B8[i] * B11[i] + B14[i], name='B') - s = tvm.create_schedule(B.op) + B = te.compute((m,), lambda i: B8[i] * B11[i] + B14[i], name='B') + s = te.create_schedule(B.op) B1L = s.cache_read(B1, scope_tb, [B6, B13]) B5L = s.cache_read(B5, scope_tb, [B6, B10]) @@ -368,18 +369,18 @@ def test_inplace_rule3(): s[B10].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - B0a = tvm.decl_buffer(B0.shape, B0.dtype, name='B0') - B1a = tvm.decl_buffer(B1.shape, B1.dtype, name='B1') - B2a = tvm.decl_buffer(B2.shape, B2.dtype, name='B2') - B3a = tvm.decl_buffer(B3.shape, B3.dtype, name='B3') - B4a = tvm.decl_buffer(B4.shape, B4.dtype, name='B4') - B5a = tvm.decl_buffer(B5.shape, B5.dtype, name='B5') + B0a = tvm.tir.decl_buffer(B0.shape, B0.dtype, name='B0') + B1a = tvm.tir.decl_buffer(B1.shape, B1.dtype, name='B1') + B2a = tvm.tir.decl_buffer(B2.shape, B2.dtype, name='B2') + B3a = tvm.tir.decl_buffer(B3.shape, B3.dtype, name='B3') + B4a = tvm.tir.decl_buffer(B4.shape, B4.dtype, name='B4') + B5a = tvm.tir.decl_buffer(B5.shape, B5.dtype, name='B5') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') stmt = tvm.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64) stmt = tvm.ir_pass.CanonicalSimplify(stmt) stmt = tvm.ir_pass.Simplify(stmt) @@ -393,7 +394,7 @@ def verify(n): def test_alloc_seq_type(): ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", 200, name="A", scope="local.L0A") @@ -401,9 +402,9 @@ def test_alloc_seq_type(): A[j] = 1.2 A1[j] = 1.3 B = ib.allocate("int16", 200, name="B", scope="local.L0A") - B[j] = tvm.const(1, "int16") + B[j] = tvm.tir.const(1, "int16") C = ib.allocate("int16", 200, name="C", scope="local.L0A") - C[j] = tvm.const(1, "int16") + C[j] = tvm.tir.const(1, "int16") D = ib.allocate("int16", 200, name="D", scope="local.L0A") D[j] = B[j] + C[j] A2 = ib.allocate("float32", 200, name="A2", scope="local.L0A") @@ -426,14 +427,14 @@ def test_alloc_seq_type2(): register_mem(scope_tb, max_bits) ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", 200, name="A", scope=scope_tb) A[j] = 1.2 with ib.for_range(0, 20, name="j") as j: B = ib.allocate("int16", 400, name="B", scope=scope_tb) - B[j] = tvm.const(1, "int16") + B[j] = tvm.tir.const(1, "int16") with ib.for_range(0, 10, name="j") as j: C = ib.allocate("float32", 200, name="C", scope=scope_tb) C[j] = 1.2 @@ -451,19 +452,19 @@ def verify(n): def test_reuse_small_buffer(): ib = tvm.ir_builder.create() - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("int16", 200, name="A", scope="local.L0A") - A[j] = tvm.const(1, "int16") + A[j] = tvm.tir.const(1, "int16") B = ib.allocate("int16", 200, name="B", scope="local.L0A") - B[j] = tvm.const(1, "int16") + B[j] = tvm.tir.const(1, "int16") B1 = ib.allocate("int16", 200, name="B1", scope="local.L0A") B1[j] = A[j] + B[j] C = ib.allocate("int16", 400, name="C", scope="local.L0A") - C[j] = tvm.const(1, "int16") + C[j] = tvm.tir.const(1, "int16") D = ib.allocate("int16", 400, name="D", scope="local.L0A") - D[j] = tvm.const(1, "int16") + D[j] = tvm.tir.const(1, "int16") E = ib.allocate("int16", 400, name="E", scope="local.L0A") E[j] = C[j] @@ -481,15 +482,15 @@ def verify(n): def test_replace_dataflow(): shape = (255,) - A = tvm.placeholder(shape, name = "A") - B = tvm.compute(shape, lambda i: A[i] + A[i], name = "B") - C = tvm.compute(shape, lambda i: A[i] + B[i], name = "C") - D = tvm.compute(shape, lambda i: A[i] + C[i], name = "D") - E = tvm.compute(shape, lambda i: A[i] + D[i], name = "E") + A = te.placeholder(shape, name = "A") + B = te.compute(shape, lambda i: A[i] + A[i], name = "B") + C = te.compute(shape, lambda i: A[i] + B[i], name = "C") + D = te.compute(shape, lambda i: A[i] + C[i], name = "D") + E = te.compute(shape, lambda i: A[i] + D[i], name = "E") - s = tvm.create_schedule(E.op) + s = te.create_schedule(E.op) s.cache_read(A, "local", [B, C, D, E]) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) @@ -505,11 +506,11 @@ def compute(a, b): n = 16384 shape = (n, n) - a = tvm.placeholder(shape, name='a', dtype='int32') - b = tvm.placeholder(shape, name='b', dtype='int32') - c = tvm.compute(shape, lambda i, j: compute(a, b)[i, j]) - c = tvm.compute(shape, lambda i, j: 1 + c[i, j]) - s = tvm.create_schedule(c.op) + a = te.placeholder(shape, name='a', dtype='int32') + b = te.placeholder(shape, name='b', dtype='int32') + c = te.compute(shape, lambda i, j: compute(a, b)[i, j]) + c = te.compute(shape, lambda i, j: 1 + c[i, j]) + s = te.create_schedule(c.op) stmt = tvm.lower(s, [a, b, c], simple_mode=True) def verify(n): if isinstance(n, tvm.tir.Allocate): diff --git a/tests/python/unittest/test_pass_storage_sync.py b/tests/python/unittest/test_pass_storage_sync.py index 0ed0c993ac559..c6c6b0fbdb391 100644 --- a/tests/python/unittest/test_pass_storage_sync.py +++ b/tests/python/unittest/test_pass_storage_sync.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_storage_sync(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], factor=8) - s[A2].bind(xo, tvm.thread_axis("blockIdx.x")) + s[A2].bind(xo, te.thread_axis("blockIdx.x")) s[A1].compute_at(s[A2], xo) s[A1].set_scope("shared") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True) flist = tvm.ir_pass.SplitHostDevice(f) @@ -52,10 +53,10 @@ def meminfo_cache(): unit_bits=8, max_simd_bits=32, max_num_bits=128, - head_address=tvm.call_extern("handle", "global_cache")) + head_address=tvm.tir.call_extern("handle", "global_cache")) ib = tvm.ir_builder.create() - n = tvm.size_var("n") - cp = tvm.thread_axis((0, 1), "cop") + n = te.size_var("n") + cp = te.thread_axis((0, 1), "cop") A = ib.allocate("float32", 128, name="A", scope="global.cache") with ib.for_range(0, n, name="i") as i: A[i] = A[i] + 1 @@ -76,9 +77,9 @@ def meminfo_cache(): def test_coproc_sync2(): ib = tvm.ir_builder.create() - n = tvm.size_var("n") - cp = tvm.thread_axis((0, 1), "cop") - ty = tvm.thread_axis("cthread") + n = te.size_var("n") + cp = te.thread_axis((0, 1), "cop") + ty = te.thread_axis("cthread") A = ib.allocate("float32", 128, name="A") ib.scope_attr(ty, "virtual_thread", 2) with ib.new_scope(): @@ -102,8 +103,8 @@ def __check_list(tvm_array, py_list): return True ib = tvm.ir_builder.create() - n = tvm.size_var("n") - cp = tvm.thread_axis((0, 1), "cop") + n = te.size_var("n") + cp = te.thread_axis((0, 1), "cop") A = ib.allocate("float32", 128, name="A", scope="global.cache") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, n, name="i") as j: diff --git a/tests/python/unittest/test_pass_unroll.py b/tests/python/unittest/test_pass_unroll.py index c6b536bf970e1..8995395b64d24 100644 --- a/tests/python/unittest/test_pass_unroll.py +++ b/tests/python/unittest/test_pass_unroll.py @@ -15,14 +15,15 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import os def test_unroll_loop(): ib = tvm.ir_builder.create() dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) Aptr = ib.buffer_ptr(Ab) # for i in 0 to n-1: with ib.for_range(n, n + 2, name="i") as i: @@ -40,7 +41,7 @@ def test_unroll_loop(): assert ret.for_type == tvm.tir.For.Unrolled ib = tvm.ir_builder.create() - ib.scope_attr(tvm.const(0, "int32"), "pragma_auto_unroll_max_step", 16) + ib.scope_attr(tvm.tir.const(0, "int32"), "pragma_auto_unroll_max_step", 16) ib.emit(stmt) wrapped = ib.get() wrapped = tvm.tir.SeqStmt([wrapped, stmt]) @@ -54,8 +55,8 @@ def test_unroll_loop(): def test_unroll_fake_loop(): ib = tvm.ir_builder.create() dtype = 'int32' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) Aptr = ib.buffer_ptr(Ab) # for i in 0 to n-1: with ib.for_range(0, 1, name="i") as i: @@ -68,13 +69,13 @@ def test_unroll_fake_loop(): assert isinstance(ret[0], tvm.tir.Store) def test_unroll_single_count_loops(): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: A(*i), name='B') - s = tvm.create_schedule(B.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: A(*i), name='B') + s = te.create_schedule(B.op) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + dom_map = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) # all parameters to UnrolLoops are default values except for # auto_unroll_max_extent which has been set to 1 (default:0) after_unroll_stmt = tvm.ir_pass.UnrollLoop(stmt, 0, 8, 1, True) diff --git a/tests/python/unittest/test_pass_vectorize.py b/tests/python/unittest/test_pass_vectorize.py index d1cd2d46074ae..af545aab8648a 100644 --- a/tests/python/unittest/test_pass_vectorize.py +++ b/tests/python/unittest/test_pass_vectorize.py @@ -15,15 +15,16 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_vectorize_loop(): dtype = 'int64' - n = tvm.var('n') + n = te.var('n') ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, n) as i: with ib.for_range(0, 4, for_type="vectorize") as j: - A[j] = tvm.const(1, A.dtype) + A[j] = tvm.tir.const(1, A.dtype) stmt = ib.get() assert isinstance(stmt.body, tvm.tir.For) @@ -35,12 +36,12 @@ def test_vectorize_loop(): def test_vectorize_vector(): dtype = 'int64' - n = tvm.var('n') + n = te.var('n') ib = tvm.ir_builder.create() A = ib.pointer("float32x4", name="A") with ib.for_range(0, n) as i: with ib.for_range(0, 4, for_type="vectorize") as j: - A[j] = tvm.const(1, A.dtype) + A[j] = tvm.tir.const(1, A.dtype) stmt = ib.get() assert isinstance(stmt.body, tvm.tir.For) stmt = tvm.ir_pass.VectorizeLoop(stmt) @@ -51,8 +52,8 @@ def test_vectorize_vector(): def test_vectorize_with_if(): - n = tvm.var('n') - x = tvm.var('x') + n = te.var('n') + x = te.var('x') ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: @@ -70,7 +71,7 @@ def test_vectorize_with_if(): assert isinstance(stmt.else_case, tvm.tir.For) def test_vectorize_with_le_cond(): - n = tvm.var('n') + n = te.var('n') ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: @@ -81,7 +82,7 @@ def test_vectorize_with_le_cond(): assert isinstance(stmt, tvm.tir.For) def test_vectorize_with_ge_cond(): - n = tvm.var('n') + n = te.var('n') ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: @@ -92,12 +93,12 @@ def test_vectorize_with_ge_cond(): assert isinstance(stmt, tvm.tir.For) def test_vectorize_if_then_else(): - n = tvm.var('n') - x = tvm.var('x') + n = te.var('n') + x = te.var('x') ib = tvm.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: - A[i] = tvm.call_intrin("float32", "tvm_if_then_else", + A[i] = tvm.tir.call_intrin("float32", "tvm_if_then_else", i > 0, A[i] + 1, A[i]) stmt = ib.get() @@ -109,7 +110,7 @@ def test_vectorize_if_then_else(): A = ib.pointer("float32", name="A") with ib.for_range(0, n) as k: with ib.for_range(0, 4, for_type="vectorize") as i: - A[k * 4 + i] = tvm.call_intrin("float32", "tvm_if_then_else", + A[k * 4 + i] = tvm.tir.call_intrin("float32", "tvm_if_then_else", k > 0, A[k * 4 + i], 0) stmt = ib.get() diff --git a/tests/python/unittest/test_pass_verify_gpu_code.py b/tests/python/unittest/test_pass_verify_gpu_code.py index 76e5f0d38c3c9..724165385d809 100644 --- a/tests/python/unittest/test_pass_verify_gpu_code.py +++ b/tests/python/unittest/test_pass_verify_gpu_code.py @@ -16,6 +16,7 @@ # under the License. """Test gpu code verifier""" import tvm +from tvm import te def get_verify_pass(valid, **kwargs): def verify_pass(stmt): @@ -31,15 +32,15 @@ def check_shared_memory(dtype): tvm_type = tvm.runtime.DataType(dtype) type_size = tvm_type.bits // 8 * tvm_type.lanes - A = tvm.placeholder((N,), name='A', dtype=dtype) - B = tvm.compute((N, ), lambda i: A[i], name='B') + A = te.placeholder((N,), name='A', dtype=dtype) + B = te.compute((N, ), lambda i: A[i], name='B') - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) AA = s.cache_read(A, "shared", [B]) o, i = s[B].split(s[B].op.axis[0], M) s[AA].compute_at(s[B], o) - s[B].bind(o, tvm.thread_axis("blockIdx.x")) - s[B].bind(i, tvm.thread_axis("threadIdx.x")) + s[B].bind(o, te.thread_axis("blockIdx.x")) + s[B].bind(i, te.thread_axis("threadIdx.x")) # shared memory usage: M * sizeof(dtype) Bytes # thread usage: M @@ -68,14 +69,14 @@ def test_local_memory(): N = 1024 M = 128 - A = tvm.placeholder((N,), name='A', dtype='float32') - B = tvm.compute((N, ), lambda i: A[i], name='B') + A = te.placeholder((N,), name='A', dtype='float32') + B = te.compute((N, ), lambda i: A[i], name='B') - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) AA = s.cache_read(A, "local", [B]) o, i = s[B].split(s[B].op.axis[0], M) s[AA].compute_at(s[B], o) - s[B].bind(o, tvm.thread_axis("blockIdx.x")) + s[B].bind(o, te.thread_axis("blockIdx.x")) # local memory usage: M * 4B # thread usage: M @@ -103,14 +104,14 @@ def test_num_thread(): N = 1024 M = 128 - A = tvm.placeholder((N,), name='A', dtype='float32') - B = tvm.compute((N, ), lambda i: A[i], name='B') + A = te.placeholder((N,), name='A', dtype='float32') + B = te.compute((N, ), lambda i: A[i], name='B') - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) o, i = s[B].split(s[B].op.axis[0], M) - s[B].bind(o, tvm.thread_axis('threadIdx.x')) - s[B].bind(i, tvm.thread_axis("threadIdx.y")) + s[B].bind(o, te.thread_axis('threadIdx.x')) + s[B].bind(i, te.thread_axis("threadIdx.y")) # shared memory usage: 0 # thread usage: N @@ -153,14 +154,14 @@ def test_num_thread(): def test_multiple_kernels(): N = 1024 - A = tvm.placeholder((N, N), name='A') - B = tvm.compute((N, N), lambda i, j: A[i, j]) - C = tvm.compute((N, N), lambda i, j: B[i, j]) + A = te.placeholder((N, N), name='A') + B = te.compute((N, N), lambda i, j: A[i, j]) + C = te.compute((N, N), lambda i, j: B[i, j]) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) - s[C].bind(s[C].op.axis[1], tvm.thread_axis("threadIdx.x")) - s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x")) + s[C].bind(s[C].op.axis[1], te.thread_axis("threadIdx.x")) + s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x")) # shared memory usage: 0 # thread usage: N @@ -187,14 +188,14 @@ def test_multiple_kernels(): def test_wrong_bind(): N = 1024 - A = tvm.placeholder((N, N-1), name='A') - B = tvm.compute((N, N-1), lambda i, j: A[i, j]) + A = te.placeholder((N, N-1), name='A') + B = te.compute((N, N-1), lambda i, j: A[i, j]) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) # bind a thread axis to two loop axes with different lengths - s[B].bind(s[B].op.axis[0], tvm.thread_axis("threadIdx.x")) - s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x")) + s[B].bind(s[B].op.axis[0], te.thread_axis("threadIdx.x")) + s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x")) for target in ['opencl', 'cuda']: if not tvm.context(target).exist: diff --git a/tests/python/unittest/test_pass_verify_memory.py b/tests/python/unittest/test_pass_verify_memory.py index e76b6e55144f3..336f341d61cfd 100644 --- a/tests/python/unittest/test_pass_verify_memory.py +++ b/tests/python/unittest/test_pass_verify_memory.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te # The following DLDeviceType/TVMDeviceExtType values # are originally defined in dlpack.h and c_runtime_api.h. @@ -26,16 +27,16 @@ def lower(sch, args): binds = {} arg_list = [] for x in args: - if isinstance(x, tvm.tensor.Tensor): - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name) + if isinstance(x, te.tensor.Tensor): + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name) assert x not in binds binds[x] = buf arg_list.append(buf) else: raise ValueError("args must be Tensor, Buffer or Var") sch = sch.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64) func = tvm.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True) @@ -46,15 +47,15 @@ def lower(sch, args): # So VerifyMemory pass is expected to succeed. # def test_verify_memory_all_bind(): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B") # B is bound to threads. - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=64) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) func = lower(s, [A, B]) @@ -66,12 +67,12 @@ def test_verify_memory_all_bind(): # So VerifyMemory pass fails when device type is GPU. # def test_verify_memory_not_bind(): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B") # B is not bound to threads. - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) func = lower(s, [A, B]) @@ -85,17 +86,17 @@ def test_verify_memory_not_bind(): # So VerifyMemory pass fails when device type is GPU. # def test_verify_memory_partially_bind(): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B") - C = tvm.compute(B.shape, lambda i: B[i] + 2.0, name="C") - D = tvm.compute(C.shape, lambda i: C[i] + 2.0, name="D") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B") + C = te.compute(B.shape, lambda i: B[i] + 2.0, name="C") + D = te.compute(C.shape, lambda i: C[i] + 2.0, name="D") # C is bound to threads, but B and D are not. - s = tvm.create_schedule([B.op, C.op, D.op]) + s = te.create_schedule([B.op, C.op, D.op]) bx, tx = s[C].split(C.op.axis[0], factor=64) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) func = lower(s, [A, B, C, D]) diff --git a/tests/python/unittest/test_pass_virtual_thread.py b/tests/python/unittest/test_pass_virtual_thread.py index 48a769faed31e..a6675ffe9ba11 100644 --- a/tests/python/unittest/test_pass_virtual_thread.py +++ b/tests/python/unittest/test_pass_virtual_thread.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_virtual_thread(): - m = tvm.var('m') - A = tvm.placeholder((m, ), name='A') - A1 = tvm.compute((m,), lambda i: A[i], name='A1') - A2 = tvm.compute((m,), lambda i: A1[i] + 3, name='A2') + m = te.var('m') + A = te.placeholder((m, ), name='A') + A1 = te.compute((m,), lambda i: A[i], name='A1') + A2 = te.compute((m,), lambda i: A1[i] + 3, name='A2') - s = tvm.create_schedule(A2.op) - vx = tvm.thread_axis("vthread", name="vx") + s = te.create_schedule(A2.op) + vx = te.thread_axis("vthread", name="vx") xo, xi = s[A2].split(A2.op.axis[0], nparts=2) s[A2].bind(xo, vx) xo, xi = s[A2].split(xi, 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) stmt = tvm.ir_pass.Simplify(stmt) stmt = tvm.ir_pass.InjectVirtualThread(stmt) diff --git a/tests/python/unittest/test_runtime_error.py b/tests/python/unittest/test_runtime_error.py index ac019a0aab40c..70166b327cb69 100644 --- a/tests/python/unittest/test_runtime_error.py +++ b/tests/python/unittest/test_runtime_error.py @@ -16,6 +16,7 @@ # under the License. """Test runtime error handling""" import tvm +from tvm import te import tvm.testing def test_op_translation(): diff --git a/tests/python/unittest/test_runtime_extension.py b/tests/python/unittest/test_runtime_extension.py index 5207b0956941d..1dd9bc8a19f2a 100644 --- a/tests/python/unittest/test_runtime_extension.py +++ b/tests/python/unittest/test_runtime_extension.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np @tvm.register_extension @@ -29,9 +30,9 @@ def _tvm_handle(self): def test_dltensor_compatible(): dtype = 'int64' - n = tvm.var('n') - Ab = tvm.decl_buffer((n,), dtype) - i = tvm.var('i') + n = te.var('n') + Ab = tvm.tir.decl_buffer((n,), dtype) + i = te.var('i') ib = tvm.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n - 1, "i") as i: diff --git a/tests/python/unittest/test_runtime_graph.py b/tests/python/unittest/test_runtime_graph.py index da5bea1f19ff1..ee2cd718e45f6 100644 --- a/tests/python/unittest/test_runtime_graph.py +++ b/tests/python/unittest/test_runtime_graph.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import json from tvm import rpc @@ -22,9 +23,9 @@ def test_graph_simple(): n = 4 - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) node0 = {"op": "null", "name": "x", "inputs": []} node1 = {"op": "tvm_op", "name": "add", diff --git a/tests/python/unittest/test_runtime_graph_debug.py b/tests/python/unittest/test_runtime_graph_debug.py index aeb4809e6c866..658d9eb95ef94 100644 --- a/tests/python/unittest/test_runtime_graph_debug.py +++ b/tests/python/unittest/test_runtime_graph_debug.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te import numpy as np import json from tvm import rpc @@ -24,9 +25,9 @@ def test_graph_simple(): n = 4 - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) node0 = {"op": "null", "name": "x", "inputs": []} node1 = {"op": "tvm_op", "name": "add", diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py index a718ed8342e63..8ca61c1920bae 100644 --- a/tests/python/unittest/test_runtime_heterogeneous.py +++ b/tests/python/unittest/test_runtime_heterogeneous.py @@ -20,6 +20,7 @@ import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime, util import topi @@ -132,9 +133,9 @@ def check_device(device, target_device): shape = (4,) # Create module for add whose target is the device. - tensor_a = tvm.placeholder(shape, name="A") - tensor_b = tvm.placeholder(shape, name="B") - elemwise_add = tvm.compute(shape, lambda *i: tensor_a(*i) + tensor_a = te.placeholder(shape, name="A") + tensor_b = te.placeholder(shape, name="B") + elemwise_add = te.compute(shape, lambda *i: tensor_a(*i) + tensor_b(*i), name="elemwise_add") target = topi.cpp.TEST_create_target(device) schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add]) @@ -144,13 +145,13 @@ def check_device(device, target_device): # Insert copy. Neither compute nor schedule is required for the copy # node. The compute will be performed at runtime which is just data # copy from the input to the output. - tensor_copy = tvm.placeholder(shape, name="__copy") + tensor_copy = te.placeholder(shape, name="__copy") # Create module for sub whose target is the host. - tensor_c = tvm.placeholder(shape, name="C") - elemwise_sub = tvm.compute(shape, lambda *i: tensor_copy(*i) + tensor_c = te.placeholder(shape, name="C") + elemwise_sub = te.compute(shape, lambda *i: tensor_copy(*i) - tensor_c(*i), name="elemwise_sub") - schedule_sub = tvm.create_schedule(elemwise_sub.op) + schedule_sub = te.create_schedule(elemwise_sub.op) lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c, elemwise_sub], name="elemwise_sub") @@ -321,17 +322,17 @@ def check_device(device, target_device): # Insert copy nodes for data transferring between add and sub nodes. # Transfers data from gpu to cpu. - copy_add_sub = tvm.placeholder(shape, name="__copy0") + copy_add_sub = te.placeholder(shape, name="__copy0") # Transfers data from cpu to gpu. - copy_sub_add = tvm.placeholder(shape, name="__copy1") + copy_sub_add = te.placeholder(shape, name="__copy1") # Create a module containing adds on the device. - tensor_a = tvm.placeholder(shape, name="A") - tensor_b = tvm.placeholder(shape, name="B") - tensor_d = tvm.placeholder(shape, name="D") - elemwise_add0 = tvm.compute(shape, lambda *i: tensor_a(*i) + tensor_a = te.placeholder(shape, name="A") + tensor_b = te.placeholder(shape, name="B") + tensor_d = te.placeholder(shape, name="D") + elemwise_add0 = te.compute(shape, lambda *i: tensor_a(*i) + tensor_b(*i), name="elemwise_add0") - elemwise_add1 = tvm.compute(shape, lambda *i: copy_sub_add(*i) + elemwise_add1 = te.compute(shape, lambda *i: copy_sub_add(*i) + tensor_d(*i), name="elemwise_add1") target = topi.cpp.TEST_create_target(device) add_schedule0 = topi.cpp.cuda.schedule_injective( @@ -345,10 +346,10 @@ def check_device(device, target_device): add_schedule1, [tensor_d, copy_sub_add, elemwise_add1], name="elemwise_add1") # Create module for sub whose target is the host. - tensor_c = tvm.placeholder(shape, name="C") - elemwise_sub = tvm.compute(shape, lambda *i: copy_add_sub(*i) + tensor_c = te.placeholder(shape, name="C") + elemwise_sub = te.compute(shape, lambda *i: copy_add_sub(*i) - tensor_c(*i), name="elemwise_sub") - sub_schedule = tvm.create_schedule(elemwise_sub.op) + sub_schedule = te.create_schedule(elemwise_sub.op) lower_sub = tvm.lower(sub_schedule, [copy_add_sub, tensor_c, elemwise_sub], name="elemwise_sub") diff --git a/tests/python/unittest/test_runtime_measure.py b/tests/python/unittest/test_runtime_measure.py index 7413a3732086e..25361a1912636 100644 --- a/tests/python/unittest/test_runtime_measure.py +++ b/tests/python/unittest/test_runtime_measure.py @@ -18,6 +18,7 @@ import ctypes import tvm +from tvm import te from tvm.contrib.util import tempdir @@ -32,8 +33,8 @@ def my_debug(filename): with open(filename, "a") as fout: fout.write("c") - X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename)) - s = tvm.create_schedule(X.op) + X = te.compute((), lambda : tvm.tir.call_packed("my_debug", filename)) + s = te.create_schedule(X.op) func = tvm.build(s, [X]) x = tvm.nd.empty((), dtype="int32") diff --git a/tests/python/unittest/test_runtime_micro.py b/tests/python/unittest/test_runtime_micro.py index f6114dae6f1b0..9e39898fa084e 100644 --- a/tests/python/unittest/test_runtime_micro.py +++ b/tests/python/unittest/test_runtime_micro.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime, util from tvm import relay import tvm.micro as micro @@ -76,11 +77,11 @@ def test_add(): dtype = "float32" # Construct TVM expression. - tvm_shape = tvm.convert(shape) - A = tvm.placeholder(tvm_shape, name="A", dtype=dtype) - B = tvm.placeholder(tvm_shape, name="B", dtype=dtype) - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name="C") - s = tvm.create_schedule(C.op) + tvm_shape = tvm.runtime.convert(shape) + A = te.placeholder(tvm_shape, name="A", dtype=dtype) + B = te.placeholder(tvm_shape, name="B", dtype=dtype) + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C") + s = te.create_schedule(C.op) func_name = "fadd" c_mod = tvm.build(s, [A, B, C], target="c", name=func_name) @@ -105,12 +106,12 @@ def test_workspace_add(): dtype = "float32" # Construct TVM expression. - tvm_shape = tvm.convert(shape) - A = tvm.placeholder(tvm_shape, name="A", dtype=dtype) - B = tvm.placeholder(tvm_shape, name="B", dtype=dtype) - B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name="B") - C = tvm.compute(A.shape, lambda *i: B(*i) + 1, name="C") - s = tvm.create_schedule(C.op) + tvm_shape = tvm.runtime.convert(shape) + A = te.placeholder(tvm_shape, name="A", dtype=dtype) + B = te.placeholder(tvm_shape, name="B", dtype=dtype) + B = te.compute(A.shape, lambda *i: A(*i) + 1, name="B") + C = te.compute(A.shape, lambda *i: B(*i) + 1, name="C") + s = te.create_schedule(C.op) func_name = "fadd_two_workspace" c_mod = tvm.build(s, [A, C], target="c", name=func_name) diff --git a/tests/python/unittest/test_runtime_module_export.py b/tests/python/unittest/test_runtime_module_export.py index ee82da65c8f45..35bafb4ba3c7b 100644 --- a/tests/python/unittest/test_runtime_module_export.py +++ b/tests/python/unittest/test_runtime_module_export.py @@ -17,6 +17,7 @@ from tvm import relay from tvm.relay import testing import tvm +from tvm import te from tvm.contrib import util header_file_dir_path = util.tempdir() @@ -95,9 +96,9 @@ def verify_multi_dso_mod_export(obj_format): with relay.build_config(opt_level=3): _, resnet18_cpu_lib, _ = relay.build_module.build(resnet18_mod, "llvm", params=resnet18_params) - A = tvm.placeholder((1024,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((1024,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm", name="myadd") from tvm.contrib import util temp = util.tempdir() @@ -144,9 +145,9 @@ def verify_json_import_dso(obj_format): f.write(subgraph_json) # Get Json and module. - A = tvm.placeholder((1024,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((1024,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm", name="myadd") try: ext_lib = tvm.runtime.load_module(subgraph_path, "examplejson") @@ -179,9 +180,9 @@ def verify_multi_c_mod_export(): with relay.build_config(opt_level=3): _, resnet18_cpu_lib, _ = relay.build_module.build(resnet18_mod, "llvm", params=resnet18_params) - A = tvm.placeholder((1024,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((1024,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "c", name="myadd") engine_module = generate_engine_module() from tvm.contrib import util diff --git a/tests/python/unittest/test_runtime_module_load.py b/tests/python/unittest/test_runtime_module_load.py index 1cbc157a154cc..dfe03dbbec388 100644 --- a/tests/python/unittest/test_runtime_module_load.py +++ b/tests/python/unittest/test_runtime_module_load.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import cc, util import ctypes import os @@ -29,6 +30,7 @@ os.environ["TVM_USE_RUNTIME_LIB"] = "1" os.environ["TVM_FFI"] = "ctypes" import tvm +from tvm import te import numpy as np path_dso = sys.argv[1] dtype = sys.argv[2] @@ -46,9 +48,9 @@ def test_dso_module_load(): temp = util.tempdir() def save_object(names): - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.var('i') # for i in 0 to n-1: stmt = tvm.tir.For( i, 0, n - 1, 0, 0, @@ -88,15 +90,15 @@ def save_object(names): def test_device_module_dump(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. num_thread = 8 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) def check_device(device): ctx = tvm.context(device, 0) @@ -150,10 +152,10 @@ def test_combine_module_llvm(): """Test combine multiple module into one shared lib.""" # graph nn = 12 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) def check_llvm(): ctx = tvm.cpu(0) diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py index ed23a0bc9d9d7..e3143794cc34e 100644 --- a/tests/python/unittest/test_runtime_ndarray.py +++ b/tests/python/unittest/test_runtime_ndarray.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def enabled_ctx_list(): @@ -55,10 +56,10 @@ def test_fp16_conversion(): n = 100 for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]: - A = tvm.placeholder((n,), dtype=src) - B = tvm.compute((n,), lambda i: A[i].astype(dst)) + A = te.placeholder((n,), dtype=src) + B = te.compute((n,), lambda i: A[i].astype(dst)) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) func = tvm.build(s, [A, B], 'llvm') x_tvm = tvm.nd.array(100 * np.random.randn(n).astype(src) - 50) diff --git a/tests/python/unittest/test_runtime_packed_func.py b/tests/python/unittest/test_runtime_packed_func.py index 4f7377008c763..3570fe1496080 100644 --- a/tests/python/unittest/test_runtime_packed_func.py +++ b/tests/python/unittest/test_runtime_packed_func.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import tvm.testing import numpy as np @@ -32,12 +33,12 @@ def my_packed_func(*args): assert y == 10 def test_get_callback_with_node(): - x = tvm.convert(10) + x = tvm.runtime.convert(10) def test(y): assert y.handle != x.handle return y - f2 = tvm.convert(test) + f2 = tvm.runtime.convert(test) # register into global function table @tvm.register_func def my_callback_with_node(y, f): @@ -54,9 +55,9 @@ def my_callback_with_node(y, f): def test_return_func(): def addy(y): def add(x): - return tvm.convert(x + y) + return tvm.runtime.convert(x + y) return add - myf = tvm.convert(addy) + myf = tvm.runtime.convert(addy) f = myf(10) assert f(11).value == 21 @@ -67,7 +68,7 @@ def test_convert(): def myfunc(*args): assert(tuple(args) == targs) - f = tvm.convert(myfunc) + f = tvm.runtime.convert(myfunc) assert isinstance(f, tvm.runtime.PackedFunc) def test_byte_array(): @@ -76,15 +77,15 @@ def test_byte_array(): def myfunc(ss): assert ss == a - f = tvm.convert(myfunc) + f = tvm.runtime.convert(myfunc) f(a) def test_empty_array(): def myfunc(ss): assert tuple(ss) == () - x = tvm.convert(()) - tvm.convert(myfunc)(x) + x = tvm.runtime.convert(()) + tvm.runtime.convert(myfunc)(x) def test_ctx(): @@ -99,25 +100,25 @@ def test_ctx_func(ctx): def test_trace_default_action(): n = 2 - x = tvm.placeholder((n,n,n), name="X", dtype="float32") - y = tvm.compute(x.shape, lambda i, j, k: tvm.trace([i, j, k, x[i][j][k]])) - s = tvm.create_schedule(y.op) + x = te.placeholder((n,n,n), name="X", dtype="float32") + y = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([i, j, k, x[i][j][k]])) + s = te.create_schedule(y.op) f = tvm.build(s, [x, y], target="llvm") xnd = tvm.nd.array(np.ones((n,n,n), dtype=x.dtype)) ynd = tvm.nd.array(np.zeros((n,n,n), dtype=y.dtype)) f(xnd, ynd) def test_trace_expr_assign(): - @tvm.register_func("tvm.trace_callback2") + @tvm.register_func("tvm.tir.trace_callback2") def trace_buffer(x): return def check_assign(dtype): n = 4 - x = tvm.placeholder((n,n,n), name="X", dtype=dtype) - y = tvm.compute(x.shape, lambda i, j, k: tvm.trace([x[i][j][k]], "tvm.trace_callback2")) - z = tvm.compute(x.shape, lambda i, j, k: tvm.trace([y[i][j][k]], "tvm.trace_callback2")) - s = tvm.create_schedule(z.op) + x = te.placeholder((n,n,n), name="X", dtype=dtype) + y = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([x[i][j][k]], "tvm.tir.trace_callback2")) + z = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([y[i][j][k]], "tvm.tir.trace_callback2")) + s = te.create_schedule(z.op) f = tvm.build(s, [x, y, z], "llvm") xnd = tvm.nd.array(np.ones((n,n,n), dtype=x.dtype)) @@ -133,17 +134,17 @@ def check_assign(dtype): check_assign(t) def test_trace_expr_sum_generated(): - @tvm.register_func("tvm.trace_callback3") + @tvm.register_func("tvm.tir.trace_callback3") def trace_buffer(x): return def check_expr_sum(dtype): n = 4 - a = tvm.placeholder((n,n,n), name="a", dtype=dtype) - b = tvm.placeholder((n,n,n), name="b", dtype=dtype) - c = tvm.compute(a.shape, lambda i, j, k: tvm.trace([a[i][j][k]],"tvm.trace_callback3") - + tvm.trace([b[i][j][k]],"tvm.trace_callback3")) - s = tvm.create_schedule(c.op) + a = te.placeholder((n,n,n), name="a", dtype=dtype) + b = te.placeholder((n,n,n), name="b", dtype=dtype) + c = te.compute(a.shape, lambda i, j, k: tvm.tir.trace([a[i][j][k]],"tvm.tir.trace_callback3") + + tvm.tir.trace([b[i][j][k]],"tvm.tir.trace_callback3")) + s = te.create_schedule(c.op) f = tvm.build(s, [a, b, c]) xnd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=a.dtype))) ynd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=b.dtype))) @@ -155,22 +156,22 @@ def check_expr_sum(dtype): check_expr_sum(t) def test_trace_expr_sum_args(): - @tvm.register_func("tvm.trace_silent") + @tvm.register_func("tvm.tir.trace_silent") def silent(*args): return def check_expr_sum(dtype): n = 4 - a = tvm.placeholder((n,n,n), name="a", dtype=dtype) - b = tvm.placeholder((n,n,n), name="b", dtype=dtype) - e = tvm.placeholder((n,n,n), name="e", dtype=dtype) - d = tvm.placeholder((n,n,n), name="d", dtype=dtype) - - c = tvm.compute(a.shape, lambda i, j, k: tvm.trace([i, j, k, a[i][j][k]], "tvm.trace_silent") - + tvm.trace([i, j, k, b[i][j][k]], "tvm.trace_silent") - + tvm.trace([i, j, k, d[i][j][k]], "tvm.trace_silent") - + tvm.trace([i, j, k, e[i][j][k]], "tvm.trace_silent")) - s = tvm.create_schedule(c.op) + a = te.placeholder((n,n,n), name="a", dtype=dtype) + b = te.placeholder((n,n,n), name="b", dtype=dtype) + e = te.placeholder((n,n,n), name="e", dtype=dtype) + d = te.placeholder((n,n,n), name="d", dtype=dtype) + + c = te.compute(a.shape, lambda i, j, k: tvm.tir.trace([i, j, k, a[i][j][k]], "tvm.tir.trace_silent") + + tvm.tir.trace([i, j, k, b[i][j][k]], "tvm.tir.trace_silent") + + tvm.tir.trace([i, j, k, d[i][j][k]], "tvm.tir.trace_silent") + + tvm.tir.trace([i, j, k, e[i][j][k]], "tvm.tir.trace_silent")) + s = te.create_schedule(c.op) f = tvm.build(s, [a, b, d, e, c]) a_nd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=a.dtype))) b_nd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=b.dtype))) @@ -187,17 +188,17 @@ def check_expr_sum(dtype): check_expr_sum(t) def test_trace_expr_sum_custom(): - @tvm.register_func("tvm.trace_callback4") + @tvm.register_func("tvm.tir.trace_callback4") def trace_buffer(x): return def check_expr_sum_custom(dtype): n = 4 - a = tvm.placeholder((n,n), name="a", dtype=dtype) - b = tvm.placeholder((n,n), name="b", dtype=dtype) - c = tvm.compute(a.shape, lambda i,j: tvm.trace([a[i][j]], "tvm.trace_callback4") - + tvm.trace([b[i][j]], "tvm.trace_callback4")) - s = tvm.create_schedule(c.op) + a = te.placeholder((n,n), name="a", dtype=dtype) + b = te.placeholder((n,n), name="b", dtype=dtype) + c = te.compute(a.shape, lambda i,j: tvm.tir.trace([a[i][j]], "tvm.tir.trace_callback4") + + tvm.tir.trace([b[i][j]], "tvm.tir.trace_callback4")) + s = te.create_schedule(c.op) f = tvm.build(s, [a, b, c]) npa = np.array([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=a.dtype) npb = np.array([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=a.dtype) @@ -211,20 +212,20 @@ def check_expr_sum_custom(dtype): check_expr_sum_custom(t) def test_trace_can_change_traced_value_int(): - @tvm.register_func("tvm.trace_change_int_first") + @tvm.register_func("tvm.tir.trace_change_int_first") def trace_buffer(x): return 13 - @tvm.register_func("tvm.trace_change_int_second") + @tvm.register_func("tvm.tir.trace_change_int_second") def trace_buffer(x): return 14 def check_assign(dtype): n = 4 - x = tvm.placeholder((n,), name="X", dtype=dtype) - y = tvm.compute(x.shape, lambda i: tvm.trace([x[i]], "tvm.trace_change_int_first")) - z = tvm.compute(x.shape, lambda i: tvm.trace([y[i]], "tvm.trace_change_int_second")) - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), name="X", dtype=dtype) + y = te.compute(x.shape, lambda i: tvm.tir.trace([x[i]], "tvm.tir.trace_change_int_first")) + z = te.compute(x.shape, lambda i: tvm.tir.trace([y[i]], "tvm.tir.trace_change_int_second")) + s = te.create_schedule(z.op) f = tvm.build(s, [x, y, z], "llvm") xnd = tvm.nd.array(np.ones((n,), dtype=x.dtype)) @@ -240,20 +241,20 @@ def check_assign(dtype): check_assign(t) def test_trace_can_change_traced_value_float(): - @tvm.register_func("tvm.trace_change_float_first") + @tvm.register_func("tvm.tir.trace_change_float_first") def trace_buffer(x): return 13.0 - @tvm.register_func("tvm.trace_change_float_second") + @tvm.register_func("tvm.tir.trace_change_float_second") def trace_buffer(x): return 14.0 def check_assign(dtype): n = 4 - x = tvm.placeholder((n,), name="X", dtype=dtype) - y = tvm.compute(x.shape, lambda i: tvm.trace([x[i]], "tvm.trace_change_float_first")) - z = tvm.compute(x.shape, lambda i: tvm.trace([y[i]], "tvm.trace_change_float_second")) - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), name="X", dtype=dtype) + y = te.compute(x.shape, lambda i: tvm.tir.trace([x[i]], "tvm.tir.trace_change_float_first")) + z = te.compute(x.shape, lambda i: tvm.tir.trace([y[i]], "tvm.tir.trace_change_float_second")) + s = te.create_schedule(z.op) f = tvm.build(s, [x, y, z], "llvm") xnd = tvm.nd.array(np.ones((n,), dtype=x.dtype)) diff --git a/tests/python/unittest/test_runtime_rpc.py b/tests/python/unittest/test_runtime_rpc.py index 75169da9a2ceb..1d9b79eca8751 100644 --- a/tests/python/unittest/test_runtime_rpc.py +++ b/tests/python/unittest/test_runtime_rpc.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import tvm.testing import os import logging @@ -34,9 +35,9 @@ def test_bigendian_rpc(): if host is None: return def verify_rpc(remote, target, shape, dtype): - A = tvm.placeholder(shape, dtype=dtype) - B = tvm.compute(A.shape, lambda i: A[i]+tvm.const(1, A.dtype)) - s = tvm.create_schedule(B.op) + A = te.placeholder(shape, dtype=dtype) + B = te.compute(A.shape, lambda i: A[i]+tvm.tir.const(1, A.dtype)) + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], target, name="myadd") ctx = remote.cpu(0) @@ -116,10 +117,10 @@ def test_rpc_remote_module(): server = rpc.Server("localhost") client = rpc.connect(server.host, server.port) # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) def check_remote(remote): if not tvm.runtime.enabled("llvm"): @@ -155,10 +156,10 @@ def check_remote_link_cl(remote): return temp = util.tempdir() ctx = remote.cl(0) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=32) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd") # Option 1: save modules separately and rely on remote compiler path_o = temp.relpath("myadd.o") diff --git a/tests/python/unittest/test_runtime_vm_profiler.py b/tests/python/unittest/test_runtime_vm_profiler.py index 849a9ef3f8237..064b733de7bd7 100644 --- a/tests/python/unittest/test_runtime_vm_profiler.py +++ b/tests/python/unittest/test_runtime_vm_profiler.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm.runtime import profiler_vm from tvm import relay from tvm.relay.testing import resnet diff --git a/tests/python/unittest/test_schedule_bound_inference.py b/tests/python/unittest/test_schedule_bound_inference.py index 9c3d1df17f2ba..abb8d7e146515 100644 --- a/tests/python/unittest/test_schedule_bound_inference.py +++ b/tests/python/unittest/test_schedule_bound_inference.py @@ -15,81 +15,82 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_bound1(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule([A2.op]) + s = te.create_schedule([A2.op]) xo, xi = s[A2].split(s[A2].op.axis[0], 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value == 8) def test_bound2(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + s = te.create_schedule(A2.op) xo, yo, xi, yi = s[A2].tile(A2.op.axis[0], A2.op.axis[1], 8, 8) # test normalize not affecting schedule _ = s.normalize() s[A1].compute_at(s[A2], yo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value == 8) assert(bounds[A1.op.axis[1]].extent.value == 8) def test_bound3(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) s[A1].set_scope("shared") xo, xi = s[A2].split(A2.op.axis[0], 32) xi0, xi1 = s[A2].split(xi, nparts=16) - s[A2].bind(xi0, tvm.thread_axis("threadIdx.x")) + s[A2].bind(xi0, te.thread_axis("threadIdx.x")) yo, yi = s[A2].split(A2.op.axis[1], 16) # test normalize not affecting schedule _ = s.normalize() s[A2].reorder(xo, xi0, yo, xi1, yi) s[A1].compute_at(s[A2], yo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value==32) assert(bounds[A1.op.axis[1]].extent.value==16) def test_bound_split_divisible(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((8 * m, l), name='A') - B = tvm.compute((8 * m, l), lambda i, j: A[i, j], name='B') - s = tvm.create_schedule(B.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((8 * m, l), name='A') + B = te.compute((8 * m, l), lambda i, j: A[i, j], name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], 8) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert bounds[xo].extent == m assert bounds[xi].extent.value == 8 def test_bound_tile_divisible(): - m = tvm.var('m') - l = tvm.var('l') + m = te.var('m') + l = te.var('l') shape = (8 * m, 32 * l) - A = tvm.placeholder(shape, name='A') - B = tvm.compute(shape, lambda i, j: A[i, j], name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder(shape, name='A') + B = te.compute(shape, lambda i, j: A[i, j], name='B') + s = te.create_schedule(B.op) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], 8, 32) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert bounds[xo].extent == m assert bounds[xi].extent.value == 8 @@ -97,21 +98,21 @@ def test_bound_tile_divisible(): assert bounds[yi].extent.value == 32 def test_bound_fusesplit1(): - m = tvm.var('m') - l = tvm.var('l') - split1 = tvm.var('s') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - - s = tvm.create_schedule(A2.op) + m = te.var('m') + l = te.var('l') + split1 = te.var('s') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + + s = te.create_schedule(A2.op) fused_axes = s[A2].fuse(A2.op.axis[0], A2.op.axis[1]) xo, xi = s[A2].split(fused_axes, split1) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - idxdiv = tvm.indexdiv + idxdiv = tvm.tir.indexdiv assert(tvm.ir_pass.Simplify( bounds[A1.op.axis[0]].min - idxdiv(xo * split1, l)).value == 0) @@ -119,7 +120,7 @@ def test_bound_fusesplit1(): for i in range(1, 6): for j in range(1, 6): for k in range(1, 6): - vars = tvm.convert({split1: tvm.const(i, "int32"), l: tvm.const(j, "int32"), xo.var: tvm.const(k, "int32")}) + vars = tvm.runtime.convert({split1: tvm.tir.const(i, "int32"), l: tvm.tir.const(j, "int32"), xo.var: tvm.tir.const(k, "int32")}) comp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value exp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(expected_extent, vars)).value assert(comp_ext == exp_ext) @@ -127,21 +128,21 @@ def test_bound_fusesplit1(): assert(tvm.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0) def test_bound_fusesplit2(): - m = tvm.var("m") - l = tvm.convert(6) - split = tvm.convert(3) - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - - s = tvm.create_schedule(A2.op) + m = te.var("m") + l = tvm.runtime.convert(6) + split = tvm.runtime.convert(3) + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + + s = te.create_schedule(A2.op) fused_axes = s[A2].fuse(A2.op.axis[0], A2.op.axis[1]) xo, xi = s[A2].split(fused_axes, split) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - vars = tvm.convert({xo.var: tvm.const(5, "int32")}) + vars = tvm.runtime.convert({xo.var: tvm.tir.const(5, "int32")}) assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2) assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3) assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1) @@ -149,113 +150,113 @@ def test_bound_fusesplit2(): def test_bound_warp(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) s[A1].set_scope("warp") xo, xi = s[A2].split(A2.op.axis[0], 32) xi0, xi1 = s[A2].split(xi, factor=16) - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") s[A2].bind(xi1, tx) - s[A2].bind(xi0, tvm.thread_axis("threadIdx.y")) + s[A2].bind(xi0, te.thread_axis("threadIdx.y")) y = s[A2].op.axis[1] s[A1].compute_at(s[A2], y) xo, xi = s[A1].split(s[A1].op.axis[0], factor=16) s[A1].bind(xi, tx) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value==16) def test_bound_scan(): - m = tvm.var("m") - n = tvm.var("n") - X = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: X[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) - s_scan = tvm.scan(s_init, s_update, s_state) + m = te.var("m") + n = te.var("n") + X = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: X[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) + s_scan = tvm.te.scan(s_init, s_update, s_state) assert tuple(s_scan.shape) == (m, n) - s = tvm.create_schedule(s_scan.op) + s = te.create_schedule(s_scan.op) XX = s.cache_read(X, "local", s_update) xo, xi = s[s_update].split(s_update.op.axis[1], factor=4) s[XX].compute_at(s[s_update], xo) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) assert bounds[XX.op.axis[1]].extent.value == 4 def test_bound_conv1d(): - n = tvm.var('n') - A = tvm.compute((n+2), lambda i: 1, name='A') + n = te.var('n') + A = te.compute((n+2), lambda i: 1, name='A') def computeB(ii): i = ii + 1 return A[i-1] + A[i] + A[i+1] - B = tvm.compute(n, computeB, name='B') - s = tvm.create_schedule(B.op) + B = te.compute(n, computeB, name='B') + s = te.create_schedule(B.op) s[A].compute_at(s[B], B.op.axis[0]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[A.op.axis[0]].extent.value == 3) def test_bound_blur(): - n = tvm.convert(12) - A = tvm.compute((n, n), lambda i, j: 1, name='A') + n = tvm.runtime.convert(12) + A = te.compute((n, n), lambda i, j: 1, name='A') def computeB(ii, jj): # set the correct center i = ii + 1 j = jj + 1 return A[i][j] + A[i-1][j] + A[i+1][j] + A[i][j+1] + A[i][j-1] - B = tvm.compute((n-2, n-2), computeB, name='B') - s = tvm.create_schedule(B.op) + B = te.compute((n-2, n-2), computeB, name='B') + s = te.create_schedule(B.op) s[A].compute_at(s[B], B.op.axis[1]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[A.op.axis[0]].extent.value == 3) assert(bounds[A.op.axis[1]].extent.value == 3) def test_bound_rfactor(): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - k = tvm.reduce_axis((0, n)) - B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k, where=(i>1)), name='B') + n = te.var('n') + A = te.placeholder((n,), name='A') + k = te.reduce_axis((0, n)) + B = te.compute((1,), lambda i: te.sum(A[k], axis=k, where=(i>1)), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) kf, ki = s[B].split(k, nparts=4) BF = s.rfactor(B, kf) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[BF.op.axis[0]].extent.value == 4) assert(bounds[BF.op.axis[1]].extent.value == 1) def test_bound_group_schedule(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g = s.create_group(outputs=x1, inputs=x, include_inputs=True) g.compute_at(s[x2], x2.op.axis[0]) assert s[x1].group == g assert s[x].group == g s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert bounds[x.op.axis[0]].extent.value == 1 assert bounds[x.op.axis[1]].extent == n def test_bound_nest_group(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g1 = s.create_group(outputs=x, inputs=x, include_inputs=True) g2 = s.create_group(outputs=x1, inputs=x, include_inputs=True) assert s[x].group == g1 @@ -263,7 +264,7 @@ def test_bound_nest_group(): g2.compute_at(s[x2], x2.op.axis[0]) g1.compute_at(s[x1], s[x1].op.axis[1]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert bounds[x.op.axis[0]].extent.value == 1 assert bounds[x.op.axis[1]].extent.value == 1 assert bounds[x1.op.axis[0]].extent.value == 1 @@ -271,18 +272,18 @@ def test_bound_nest_group(): def test_bound_nest_thread(): - m = tvm.var('m') - A = tvm.placeholder((m), name='A') - A1 = tvm.compute((m,), lambda i: A[i], name='A1') - A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2') - A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3') + m = te.var('m') + A = te.placeholder((m), name='A') + A1 = te.compute((m,), lambda i: A[i], name='A1') + A2 = te.compute((m,), lambda i: A1[i] + 2, name='A2') + A3 = te.compute((m,), lambda i: A2[i] + 3, name='A3') - s = tvm.create_schedule(A3.op) + s = te.create_schedule(A3.op) s[A2].set_scope("shared") s[A1].set_scope("local") - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") bx, tx = s[A3].split(A3.op.axis[0], factor=32) s[A3].bind(bx, block_x) s[A3].bind(tx, thread_x) @@ -291,31 +292,31 @@ def test_bound_nest_thread(): s[A2].bind(xi, thread_x) s[A1].compute_at(s[A3], tx) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[A1.op.axis[0]].extent.value==1) assert(bounds[A2.op.axis[0]].extent.value==32) assert(bounds[A3.op.axis[0]].extent == m) def test_gemm_bound(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((n, n), name='B') - k = tvm.reduce_axis((0, n), name='k') - C = tvm.compute( + n = tvm.runtime.convert(nn) + A = te.placeholder((n, n), name='A') + B = te.placeholder((n, n), name='B') + k = te.reduce_axis((0, n), name='k') + C = te.compute( (n, n), - lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k), name='CC') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xtile, ytile = 32, 32 scale = 8 num_thread = 8 block_factor = scale * num_thread - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_y = tvm.thread_axis("threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_y = te.thread_axis("threadIdx.y") CC = s.cache_write(C, "local") AA = s.cache_read(A, "shared", [CC]) @@ -347,7 +348,7 @@ def test_gemm_bound(): s[BB].bind(ty, thread_y) s[BB].bind(tx, thread_x) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[BB.op.axis[0]].extent.value==64) assert(bounds[AA.op.axis[0]].extent.value==64) assert(bounds[CC.op.axis[0]].extent.value == 8) @@ -356,54 +357,54 @@ def test_gemm_bound(): def test_bound_tensor_compute_op(): def intrin_test(): - m1 = tvm.var("m1") - n1 = tvm.var("n1") - a = tvm.placeholder((m1, n1), name='a') - c = tvm.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c') + m1 = te.var("m1") + n1 = te.var("n1") + a = te.placeholder((m1, n1), name='a') + c = te.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c') - Ab = tvm.decl_buffer(a.shape, name="Abuf", offset_factor=1) - Cb = tvm.decl_buffer(c.shape, name="Cbuf", offset_factor=1) + Ab = tvm.tir.decl_buffer(a.shape, name="Abuf", offset_factor=1) + Cb = tvm.tir.decl_buffer(c.shape, name="Cbuf", offset_factor=1) def intrin_func(ins, outs): aa = ins[0] cc = outs[0] def _body(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r"))) + ib.emit(tvm.tir.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r"))) return ib.get() return _body() with tvm.build_config(offset_factor=1): return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb}) test_func = intrin_test() - A = tvm.placeholder((20,20), name='A') - B = tvm.compute(A.shape, lambda i,j : A[i,j], name='B') - C = tvm.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C') - s = tvm.create_schedule(C.op) - bounds = tvm.schedule.InferBound(s) + A = te.placeholder((20,20), name='A') + B = te.compute(A.shape, lambda i,j : A[i,j], name='B') + C = te.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C') + s = te.create_schedule(C.op) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[B.op.axis[0]].extent.value == 10) def test_bound_simplification_failure(): # Check that the bounds are not expanded - A = tvm.compute((2,), lambda j: j, "A") + A = te.compute((2,), lambda j: j, "A") def _check(B, A=A): - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) stmt = tvm.lower(s, [B, A], simple_mode=True) if not bounds[A.op.axis[0]].extent.value <= 2: print(stmt) assert bounds[A.op.axis[0]].extent.value <= 2 - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv # These are hard to simplify, moreover we don't simplify them - _check(tvm.compute((10,), lambda i: A[tvm.min(3*i, 4*i) + tvm.min(-3*i, -2*i)])) - _check(tvm.compute((10,), lambda i: A[tvm.min(3*i, 4*i) + tvm.max(-3*i, -4*i)])) - _check(tvm.compute((10,), lambda i: A[-2*tdiv(i,2) - tvm.min(i, 0-i)])) - _check(tvm.compute((10,), lambda i: A[i + (0 - i)])) + _check(te.compute((10,), lambda i: A[tvm.te.min(3*i, 4*i) + tvm.te.min(-3*i, -2*i)])) + _check(te.compute((10,), lambda i: A[tvm.te.min(3*i, 4*i) + tvm.te.max(-3*i, -4*i)])) + _check(te.compute((10,), lambda i: A[-2*tdiv(i,2) - tvm.te.min(i, 0-i)])) + _check(te.compute((10,), lambda i: A[i + (0 - i)])) # This would cause out of bounds, but we nevertheless include it - _check(tvm.compute((10,), lambda i: A[i])) + _check(te.compute((10,), lambda i: A[i])) if __name__ == "__main__": test_bound_nest_thread() diff --git a/tests/python/unittest/test_schedule_graph.py b/tests/python/unittest/test_schedule_graph.py index d77c1d470aba7..d6d38e5f05c97 100644 --- a/tests/python/unittest/test_schedule_graph.py +++ b/tests/python/unittest/test_schedule_graph.py @@ -15,96 +15,97 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_scan(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: x[0, i], name="s_init") - x_trans = tvm.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans") - s_up1 = tvm.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1") - s_update = tvm.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: x[0, i], name="s_init") + x_trans = te.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans") + s_up1 = te.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1") + s_update = te.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update") + s_scan = tvm.te.scan(s_init, s_update, s_state) def test_getbody(): - body = tvm.schedule.ScanGetBody(s_scan.op) + body = tvm.te.schedule.ScanGetBody(s_scan.op) assert set(body) == set([s_scan.op, s_update.op, s_up1.op]) def test_attach_path(): - s = tvm.create_schedule(s_scan.op) + s = te.create_schedule(s_scan.op) s[x_trans].compute_at(s[s_update], s_update.op.axis[0]) - apath = tvm.schedule.CreateAttachPath(s) + apath = tvm.te.schedule.CreateAttachPath(s) assert(tuple(apath[s_update.op]) == tuple([s_scan.op.scan_axis])) assert(tuple(apath[x_trans.op]) == tuple([s_update.op.axis[0], s_scan.op.scan_axis])) def test_fix_pt(): - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body) assert(fxpt[s_scan.spatial_axis_[0]].value != 0) def test_scan_fix_point(): - m = tvm.var("m") - n = tvm.var("n") - l = tvm.var("l") - x = tvm.compute((l, m, n), lambda *i: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((l, m, n)) - s_init = tvm.compute((1, m, n), lambda _, i, j: x[0, i, j], name="s_init") + m = te.var("m") + n = te.var("n") + l = te.var("l") + x = te.compute((l, m, n), lambda *i: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((l, m, n)) + s_init = te.compute((1, m, n), lambda _, i, j: x[0, i, j], name="s_init") def test_scan0(): - s_update = tvm.compute((l, m, n), + s_update = te.compute((l, m, n), lambda t, i, j: x[t, j, i] + s_state[t-1, i, j], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body) + s_scan = tvm.te.scan(s_init, s_update, s_state) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 1) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 1) def test_scan1(): - s_update = tvm.compute((l, m, n), + s_update = te.compute((l, m, n), lambda t, i, j: x[t, j, i] + s_state[t-1, j, i], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body) + s_scan = tvm.te.scan(s_init, s_update, s_state) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 0) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0) def test_scan3_not_exact_reach(): - s_h1 = tvm.compute((l, n, m), lambda t, j, i: s_state[t-1, i, j], name="h1") - s_h2 = tvm.compute((l, m, n), lambda t, i, j: s_state[t-1, i, 10] * 2, name="h1") - s_update = tvm.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op) + s_h1 = te.compute((l, n, m), lambda t, j, i: s_state[t-1, i, j], name="h1") + s_h2 = te.compute((l, m, n), lambda t, i, j: s_state[t-1, i, 10] * 2, name="h1") + s_update = te.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update") + s_scan = tvm.te.scan(s_init, s_update, s_state) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 1) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0) def test_scan4_reach_other(): - s_h1 = tvm.compute((l, n, m), lambda t, j, i: s_state[t-1, j, j], name="h1") - s_h2 = tvm.compute((l, m, n), lambda t, i, j: s_state[t-1, i, j] * 2, name="h1") - s_update = tvm.compute((l, m, n), + s_h1 = te.compute((l, n, m), lambda t, j, i: s_state[t-1, j, j], name="h1") + s_h2 = te.compute((l, m, n), lambda t, i, j: s_state[t-1, i, j] * 2, name="h1") + s_update = te.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op) + s_scan = tvm.te.scan(s_init, s_update, s_state) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 0) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0) def test_scan5_multi_output(): - m = tvm.var("m") - n = tvm.var("n") - x1 = tvm.placeholder((m, n)) - s1 = tvm.placeholder((m, n)) - x2 = tvm.placeholder((m, n)) - s2 = tvm.placeholder((m, n)) - s1_init = tvm.compute((1, n), lambda _, i: x1[0, i]) - s2_init = tvm.compute((1, n), lambda _, i: x2[0, i]) - s1_update = tvm.compute((m, n), lambda t, i: s1[t-1, i] + x1[t, i]) - s2_update = tvm.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) - r0, r1 = tvm.scan([s1_init, s2_init], + m = te.var("m") + n = te.var("n") + x1 = te.placeholder((m, n)) + s1 = te.placeholder((m, n)) + x2 = te.placeholder((m, n)) + s2 = te.placeholder((m, n)) + s1_init = te.compute((1, n), lambda _, i: x1[0, i]) + s2_init = te.compute((1, n), lambda _, i: x2[0, i]) + s1_update = te.compute((m, n), lambda t, i: s1[t-1, i] + x1[t, i]) + s2_update = te.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) + r0, r1 = tvm.te.scan([s1_init, s2_init], [s1_update, s2_update], [s1, s2]) - body = tvm.schedule.ScanGetBody(r0.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(r0.op) + body = tvm.te.schedule.ScanGetBody(r0.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(r0.op) assert(fxpt[r1.op.spatial_axis_[0]].value == 1) test_scan0() @@ -114,17 +115,17 @@ def test_scan5_multi_output(): test_scan5_multi_output() def test_create_read_graph(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j]) - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j]) + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3) - g = tvm.schedule.CreateReadGraph([A2.op]) + g = tvm.te.schedule.CreateReadGraph([A2.op]) assert g[A2.op][0] == A1 assert g[A1.op][0] == A - post_order = tvm.schedule.PostDFSOrder([A2.op], g) + post_order = tvm.te.schedule.PostDFSOrder([A2.op], g) assert(post_order[0] == A.op) assert(post_order[1] == A1.op) diff --git a/tests/python/unittest/test_schedule_lstm.py b/tests/python/unittest/test_schedule_lstm.py index 21cf8e81badd1..23c748688137f 100644 --- a/tests/python/unittest/test_schedule_lstm.py +++ b/tests/python/unittest/test_schedule_lstm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_lstm_cell_inline(): num_step = 128 @@ -22,52 +23,52 @@ def test_lstm_cell_inline(): num_hidden = 1152 batch_size = 4 # Global transition matrix - X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X") - Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h") - Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h") + X = te.placeholder((num_step - 1, batch_size, num_input), name="X") + Wi2h = te.placeholder((4, num_hidden, num_input), name="Wi2h") + Wh2h = te.placeholder((4, num_hidden, num_hidden), name="Wh2h") # h: output hidden state, c: cell state. - s_state_h = tvm.placeholder((num_step, batch_size, num_hidden)) - s_state_c = tvm.placeholder((num_step, batch_size, num_hidden)) - s_init_c = tvm.compute((1, batch_size, num_hidden), + s_state_h = te.placeholder((num_step, batch_size, num_hidden)) + s_state_c = te.placeholder((num_step, batch_size, num_hidden)) + s_init_c = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_c") - s_init_h = tvm.compute((1, batch_size, num_hidden), + s_init_h = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_h") # LSTM transition - k = tvm.reduce_axis((0, num_input), name="ki2h") - s_i2h = tvm.compute( + k = te.reduce_axis((0, num_input), name="ki2h") + s_i2h = te.compute( (num_step, 4, batch_size, num_hidden), - lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k), + lambda t, x, i, j: te.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k), name="s_i2h") - k = tvm.reduce_axis((0, num_hidden), name="ki2h") - s_h2h = tvm.compute( + k = te.reduce_axis((0, num_hidden), name="ki2h") + s_h2h = te.compute( (num_step, 4, batch_size, num_hidden), - lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), + lambda t, x, i, j: te.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), name="s_h2h") # Gate rules - gates = tvm.compute(s_i2h.shape, lambda *i: + gates = te.compute(s_i2h.shape, lambda *i: s_i2h(*i) + s_h2h(*i), name="gates") gshape = (num_step, batch_size, num_hidden) - in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate") - in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform") - forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate") - out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate") - next_c = tvm.compute(gshape, + in_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 0, i, j]), name="in_gate") + in_transform = te.compute(gshape, lambda t, i, j: te.tanh(gates[t, 1, i, j]), name="in_transform") + forget_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 2, i, j]), name="forget_gate") + out_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 3, i, j]), name="out_gate") + next_c = te.compute(gshape, lambda t, i, j: forget_gate[t, i, j] * s_state_c[t - 1, i, j] + in_gate[t, i, j] * in_transform[t, i, j], name="next_c") - next_h = tvm.compute(gshape, - lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h") - update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c") - update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h") + next_h = te.compute(gshape, + lambda t, i, j: out_gate[t, i, j] * te.tanh(next_c[t, i, j]), name="next_h") + update_c = te.compute(gshape, lambda *i: next_c(*i), name="update_c") + update_h = te.compute(gshape, lambda *i: next_h(*i), name="update_h") # schedule - scan_h, scan_c = tvm.scan( + scan_h, scan_c = tvm.te.scan( [s_init_h, s_init_c], [update_h, update_c], [s_state_h, s_state_c], inputs=[X], name="lstm_scan") # schedule - s = tvm.create_schedule(scan_h.op) + s = te.create_schedule(scan_h.op) # Inline gate computations s[gates].compute_inline() s[in_gate].compute_inline() diff --git a/tests/python/unittest/test_schedule_schedule_ops.py b/tests/python/unittest/test_schedule_schedule_ops.py index 2fc84bb43b168..d83dc96032152 100644 --- a/tests/python/unittest/test_schedule_schedule_ops.py +++ b/tests/python/unittest/test_schedule_schedule_ops.py @@ -15,66 +15,67 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_schedule0(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - s = tvm.create_schedule(A1.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + s = te.create_schedule(A1.op) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule1(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') - s = tvm.create_schedule(A1.op) + s = te.create_schedule(A1.op) xo, xi = s[A1].split(A1.op.axis[0], 8) s[A1].pragma(xo, "auto_unroll_max_step", 10) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule2(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_scan(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: x[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) - res = tvm.scan(s_init, s_update, s_state) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: x[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) + res = tvm.te.scan(s_init, s_update, s_state) assert tuple(res.shape) == (m, n) - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s = s.normalize() ir = tvm.lower(s, [s_state], simple_mode=True) assert not hasattr(ir.body.body.body.body[1].body.body[1].body, "condition") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[res.op.scan_axis].min.value == 1) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_inline_multi_reduce(): @@ -83,64 +84,64 @@ def argmax_comp(x, y): val = tvm.tir.Select((x[1] >= y[1]), x[1], y[1]) return idx, val def argmax_init(idx_typ, val_typ): - return tvm.const(-1, idx_typ), tvm.min_value(val_typ) - - argmax = tvm.comm_reducer(argmax_comp, argmax_init, name='argmax') - m = tvm.var('m') - n = tvm.var('n') - val = tvm.placeholder((m, n), name='val', dtype='float32') - val1 = tvm.compute((m, n), lambda i, j: val[i, j]+1, name='val1') - val2 = tvm.compute((m, n), lambda i, j: tvm.exp(val1[i, j]), name='val2') - k = tvm.reduce_axis((0, n), 'k') - T_idx, T_val = tvm.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T') - s = tvm.create_schedule(T_idx.op) + return tvm.tir.const(-1, idx_typ), tvm.te.min_value(val_typ) + + argmax = te.comm_reducer(argmax_comp, argmax_init, name='argmax') + m = te.var('m') + n = te.var('n') + val = te.placeholder((m, n), name='val', dtype='float32') + val1 = te.compute((m, n), lambda i, j: val[i, j]+1, name='val1') + val2 = te.compute((m, n), lambda i, j: te.exp(val1[i, j]), name='val2') + k = te.reduce_axis((0, n), 'k') + T_idx, T_val = te.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T') + s = te.create_schedule(T_idx.op) s[val1].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_auto_inline(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') - C = tvm.placeholder((m, n), name='C') - T1 = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='T1') - T2 = tvm.compute((m, n), lambda i, j: T1(i, j) + C(i, j), name='T2') - - s = tvm.create_schedule(T2.op) - tvm.schedule.AutoInlineElemWise(s) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') + C = te.placeholder((m, n), name='C') + T1 = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='T1') + T2 = te.compute((m, n), lambda i, j: T1(i, j) + C(i, j), name='T2') + + s = te.create_schedule(T2.op) + tvm.te.schedule.AutoInlineElemWise(s) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_const_bound(): n = 128 - A = tvm.placeholder((n,), name='A') - A1 = tvm.compute((n,), lambda i: A[i] + 1, name='A1') - s = tvm.create_schedule(A1.op) + A = te.placeholder((n,), name='A') + A1 = te.compute((n,), lambda i: A[i] + 1, name='A1') + s = te.create_schedule(A1.op) xo, xi = s[A1].split(A1.op.axis[0], 8) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_inline_mixed(): - n = tvm.var('n') - A = tvm.placeholder((n, ), name='A') - A1 = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='A1') - A2 = tvm.compute(A.shape, lambda *i: A1(*i) + 2, name='A2') - C = tvm.compute((n,), lambda i: A2[i] + A1[i], name='C') + n = te.var('n') + A = te.placeholder((n, ), name='A') + A1 = te.compute(A.shape, lambda *i: A(*i) + 1, name='A1') + A2 = te.compute(A.shape, lambda *i: A1(*i) + 2, name='A2') + C = te.compute((n,), lambda i: A2[i] + A1[i], name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=8) s[A1].compute_at(s[C], xo) s[A2].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def check(x): if isinstance(x, tvm.tir.Call): assert x.func != A2 @@ -148,42 +149,42 @@ def check(x): def test_scan_inline1(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state1 = tvm.placeholder((m, n)) - s_state2 = tvm.placeholder((m, n)) - s_init1 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_init2 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_x1 = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="x1") - s_x2 = tvm.compute((m, n), lambda t, i: s_state2[t-1, i] + 1 , name="x2") - s_update1 = tvm.compute((m, n), lambda t, i: s_x1[t, i], "u1") - s_update2 = tvm.compute((m, n), lambda t, i: s_x2[t, i], "u2") - res1, res2 = tvm.scan([s_init1, s_init2], + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state1 = te.placeholder((m, n)) + s_state2 = te.placeholder((m, n)) + s_init1 = te.compute((1, n), lambda _, i: x[0, i]) + s_init2 = te.compute((1, n), lambda _, i: x[0, i]) + s_x1 = te.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="x1") + s_x2 = te.compute((m, n), lambda t, i: s_state2[t-1, i] + 1 , name="x2") + s_update1 = te.compute((m, n), lambda t, i: s_x1[t, i], "u1") + s_update2 = te.compute((m, n), lambda t, i: s_x2[t, i], "u2") + res1, res2 = tvm.te.scan([s_init1, s_init2], [s_update1, s_update2], [s_state1, s_state2]) - s = tvm.create_schedule(res1.op) + s = te.create_schedule(res1.op) s[s_x1].compute_inline() stmt = tvm.lower(s, [x, res1, res2]) def test_scan_inline2(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state1 = tvm.placeholder((m, n)) - s_state2 = tvm.placeholder((m, n)) - s_init1 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_init2 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_xx = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="xx") - s_x1 = tvm.compute((m, n), lambda t, i: s_xx[t, i] + 1, name="x1") - s_x2 = tvm.compute((m, n), lambda t, i: s_xx[t, i] + s_state2[t-1, 2], name="x2") - s_update1 = tvm.compute((m, n), lambda t, i: s_x1[t, i], "u1") - s_update2 = tvm.compute((m, n), lambda t, i: s_x2[t, i], "u2") - res1, res2 = tvm.scan([s_init1, s_init2], + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state1 = te.placeholder((m, n)) + s_state2 = te.placeholder((m, n)) + s_init1 = te.compute((1, n), lambda _, i: x[0, i]) + s_init2 = te.compute((1, n), lambda _, i: x[0, i]) + s_xx = te.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="xx") + s_x1 = te.compute((m, n), lambda t, i: s_xx[t, i] + 1, name="x1") + s_x2 = te.compute((m, n), lambda t, i: s_xx[t, i] + s_state2[t-1, 2], name="x2") + s_update1 = te.compute((m, n), lambda t, i: s_x1[t, i], "u1") + s_update2 = te.compute((m, n), lambda t, i: s_x2[t, i], "u2") + res1, res2 = tvm.te.scan([s_init1, s_init2], [s_update1, s_update2], [s_state1, s_state2]) - s = tvm.create_schedule(res1.op) + s = te.create_schedule(res1.op) s[s_xx].compute_inline() s[s_x1].compute_inline() s[s_x2].compute_inline() @@ -191,122 +192,122 @@ def test_scan_inline2(): def test_schedule_cache(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') - C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') + C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) AA = s.cache_read(A, "shared", readers=[C]) CC = s.cache_write(C, "shared") s[AA].compute_at(s[CC], CC.op.axis[0]) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_middle_cache(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') - C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') - D = tvm.compute((m, n), lambda i, j: C(i , j) , name='D') + C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') + D = te.compute((m, n), lambda i, j: C(i , j) , name='D') - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) AA = s.cache_read(A, "local", readers=[C]) BB = s.cache_read(B, "local", readers=[C]) CC = s.cache_read(C, "local", readers=[D]) DD = s.cache_write(D, "local") #s[AA].compute_at(s[CC], CC.op.axis[0]) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout1(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') - C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') + C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].reorder(C.op.axis[1], C.op.axis[0]) CC = s.cache_write(C, "global") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout2(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m*4, n), name='A') - B = tvm.placeholder((m*4, n), name='B') - C = tvm.compute(A.shape, lambda i, j: A(i, j) * B(i, j), name='C') - s = tvm.create_schedule(C.op) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m*4, n), name='A') + B = te.placeholder((m*4, n), name='B') + C = te.compute(A.shape, lambda i, j: A(i, j) * B(i, j), name='C') + s = te.create_schedule(C.op) x, y = C.op.axis xo, xi = s[C].split(x, factor=4) s[C].reorder(xo, y, xi) CC = s.cache_write(C, "global") s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout3(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m*4, n), name='A') - B = tvm.placeholder((m*4, n), name='B') - k = tvm.reduce_axis((0, n), "k") - C = tvm.compute((A.shape[0],), - lambda i: tvm.sum(A(i, k) * B(i, k), axis=k), name='C') - s = tvm.create_schedule(C.op) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m*4, n), name='A') + B = te.placeholder((m*4, n), name='B') + k = te.reduce_axis((0, n), "k") + C = te.compute((A.shape[0],), + lambda i: te.sum(A(i, k) * B(i, k), axis=k), name='C') + s = te.create_schedule(C.op) x = C.op.axis[0] xo, xi = s[C].split(x, factor=4) CC = s.cache_write(C, "global") s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout4(): def _compute(*indice): return A(*indice) + 1, B(*indice) / 2 - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m*4, n), name='A') - B = tvm.placeholder((m*4, n), name='B') - C1, C2 = tvm.compute(A.shape, _compute, name='C') - s = tvm.create_schedule([C1.op, C2.op]) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m*4, n), name='A') + B = te.placeholder((m*4, n), name='B') + C1, C2 = te.compute(A.shape, _compute, name='C') + s = te.create_schedule([C1.op, C2.op]) C1_cache, C2_cache = s.cache_write([C1, C2], "local") s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def intrin_gemv(m, n): - w = tvm.placeholder((m, n), name='w') - x = tvm.placeholder((n,), name='x') - k = tvm.reduce_axis((0, n), name='k') - z = tvm.compute((m,), lambda i: - tvm.sum(w[i, k] * x[k], axis=k), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, + w = te.placeholder((m, n), name='w') + x = te.placeholder((n,), name='x') + k = te.reduce_axis((0, n), name='k') + z = te.compute((m,), lambda i: + te.sum(w[i, k] * x[k], axis=k), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, name="W", offset_factor=16, - strides=[tvm.var('ldw'), 1]) + strides=[te.var('ldw'), 1]) def intrin_func(ins, outs): ww, xx = ins zz = outs[0] ww_ptr = ww.access_ptr("r") xx_ptr = xx.access_ptr("r") zz_ptr = zz.access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemm", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) - reset = tvm.call_packed( + reset = tvm.tir.call_packed( "fill_zero", zz_ptr, n) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) return body, reset, update @@ -320,36 +321,36 @@ def test_schedule_tensor_compute1(): # basic: split, reorder, tile M, N, L = 2048, 1024, 512 factor, rfactor = 16, 16 - A = tvm.placeholder((N//factor, L//rfactor, factor, rfactor), name='A') - B = tvm.placeholder((M, L//rfactor, rfactor), name='B') - k = tvm.reduce_axis((0, L//rfactor), name='k') + A = te.placeholder((N//factor, L//rfactor, factor, rfactor), name='A') + B = te.placeholder((M, L//rfactor, rfactor), name='B') + k = te.reduce_axis((0, L//rfactor), name='k') gemv = intrin_gemv(factor, rfactor) - C = tvm.compute((N, M//factor, factor), + C = te.compute((N, M//factor, factor), lambda i, j: gemv(A[i, k, 0:factor, 0:factor], B[j, k, 0:rfactor], reduce_axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) ai, aj, ax = s[C].op.axis aio, aii = s[C].split(ai, 16) s[C].reorder(aio, aj, aii) aioo, ajo, aioi, aji = s[C].tile(aio, aj, 16, 4) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def intrin_vadd(n, cache_read=False, cache_write=False): scope_ubuf = 'local' dtype = 'float32' - x = tvm.placeholder((n,), dtype=dtype, name='vx') - y = tvm.placeholder((n,), dtype=dtype, name='vy') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), dtype=dtype, name='vx') + y = te.placeholder((n,), dtype=dtype, name='vy') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') + s = te.create_schedule(z.op) def create_buffer(t): - return tvm.decl_buffer(t.shape, t.dtype, + return tvm.tir.decl_buffer(t.shape, t.dtype, name='W'+t.name, scope=scope_ubuf, offset_factor=16) @@ -363,7 +364,7 @@ def create_buffer(t): def intrin_func(ins, outs): ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) + ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) return ib.get() with tvm.build_config(offset_factor=16): @@ -377,20 +378,20 @@ def test_schedule_tensor_compute2(): dtype = 'float32' scope_ubuf = 'local' - A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype) + A = te.placeholder((M//factor, factor), name="A", dtype=dtype) + B = te.placeholder((M//factor, factor), name="B", dtype=dtype) vadd = intrin_vadd(factor, True, True) - C = tvm.compute((M//factor, factor), + C = te.compute((M//factor, factor), lambda i: vadd(A[i, 0:factor], B[i, 0:factor]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) AL = s.cache_read(A, scope_ubuf, C) BL = s.cache_read(B, scope_ubuf, C) CL = s.cache_write(C, scope_ubuf) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_tensor_compute3(): @@ -398,48 +399,48 @@ def test_schedule_tensor_compute3(): M = 1024 factor = 16 dtype = 'float32' - A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype) - Bi = tvm.compute((M//factor, factor), lambda i, j: B[i, j] + 5, name="Bi") + A = te.placeholder((M//factor, factor), name="A", dtype=dtype) + B = te.placeholder((M//factor, factor), name="B", dtype=dtype) + Bi = te.compute((M//factor, factor), lambda i, j: B[i, j] + 5, name="Bi") vadd = intrin_vadd(factor) - C = tvm.compute((M//factor, factor), + C = te.compute((M//factor, factor), lambda i: vadd(A[i, 0:factor], Bi[i, 0:factor]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[Bi].compute_at(s[C], C.op.axis[0]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_loop_dep_reduce(): - X = tvm.placeholder(shape=(10,), name="x") + X = te.placeholder(shape=(10,), name="x") def f(n): - rv = tvm.reduce_axis((0, n)) - return tvm.sum(X[rv], axis=rv) - Y = tvm.compute(X.shape, f, name="y") - s = tvm.create_schedule([Y.op]) + rv = te.reduce_axis((0, n)) + return te.sum(X[rv], axis=rv) + Y = te.compute(X.shape, f, name="y") + s = te.create_schedule([Y.op]) f = tvm.build(s, [X, Y]) def test_loop_dep_reduce_cache_write(): - X = tvm.placeholder(shape=(10,), name="x") + X = te.placeholder(shape=(10,), name="x") def f(n): - rv = tvm.reduce_axis((0, n)) - init = lambda dtype: tvm.tir.Select(n > 1, tvm.const(0, dtype), n.astype(dtype)) - sum = tvm.comm_reducer(lambda x, y: tvm.max(x + y, n.astype('float32')), init, name='sum') + rv = te.reduce_axis((0, n)) + init = lambda dtype: tvm.tir.Select(n > 1, tvm.tir.const(0, dtype), n.astype(dtype)) + sum = te.comm_reducer(lambda x, y: tvm.te.max(x + y, n.astype('float32')), init, name='sum') return sum(X[rv], axis=rv) - Y = tvm.compute(X.shape, f, name="y") - s = tvm.create_schedule([Y.op]) + Y = te.compute(X.shape, f, name="y") + s = te.create_schedule([Y.op]) s.cache_write(Y, 'local') f = tvm.build(s, [X, Y]) def test_reduction_and_dummy_fuse_split(): n = 10 - X = tvm.placeholder(shape=(n,), dtype='int32', name="X") - k = tvm.reduce_axis((0, n)) - Y = tvm.compute((), lambda: tvm.sum(X[k], k), name="Y") - s = tvm.create_schedule([Y.op]) + X = te.placeholder(shape=(n,), dtype='int32', name="X") + k = te.reduce_axis((0, n)) + Y = te.compute((), lambda: te.sum(X[k], k), name="Y") + s = te.create_schedule([Y.op]) ax = s[Y.op].fuse(*Y.op.axis) axo, axi = s[Y.op].split(ax, nparts=20) f = tvm.build(s, [Y, X]) @@ -449,10 +450,10 @@ def test_reduction_and_dummy_fuse_split(): assert args[0].asnumpy() == n n = 10 - X = tvm.placeholder(shape=(n,), dtype='int32', name="X") - k = tvm.reduce_axis((0, n)) - Y = tvm.compute((n,), lambda i: tvm.sum(X[k], k), name="Y") - s = tvm.create_schedule([Y.op]) + X = te.placeholder(shape=(n,), dtype='int32', name="X") + k = te.reduce_axis((0, n)) + Y = te.compute((n,), lambda i: te.sum(X[k], k), name="Y") + s = te.create_schedule([Y.op]) ax = s[Y.op].fuse(*(list(Y.op.axis) + list(Y.op.reduce_axis))) f = tvm.build(s, [Y, X]) @@ -463,14 +464,14 @@ def test_reduction_and_dummy_fuse_split(): def test_schedule_compute_inline(): shape = [10, 1024] - A = tvm.placeholder(shape, name="A") - B = tvm.placeholder(shape, name="B") - C = tvm.compute(shape, lambda *index:A(*index)+ B(*index), name = "C") + A = te.placeholder(shape, name="A") + B = te.placeholder(shape, name="B") + C = te.compute(shape, lambda *index:A(*index)+ B(*index), name = "C") def _compute(*index) : return C(*index) , C(*index) * B(*index) - F,E = tvm.compute(shape, _compute, name = "F") + F,E = te.compute(shape, _compute, name = "F") - s = tvm.create_schedule([F.op, E.op]) + s = te.create_schedule([F.op, E.op]) AL = s.cache_read(A, "local", [C]) BL = s.cache_read(B, "local", [C,E]) CL = s.cache_write(C, "local") @@ -478,8 +479,8 @@ def _compute(*index) : s[C].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) if __name__ == "__main__": test_loop_dep_reduce() diff --git a/tests/python/unittest/test_schedule_tensor_core.py b/tests/python/unittest/test_schedule_tensor_core.py index cd9e062dc07bf..5dbe04f93d385 100644 --- a/tests/python/unittest/test_schedule_tensor_core.py +++ b/tests/python/unittest/test_schedule_tensor_core.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from topi.testing import conv2d_nhwc_python from tvm.contrib import nvcc @@ -28,17 +29,17 @@ def intrin_wmma_load_matrix(shape, scope): row, col = n, l elif scope == "wmma.matrix_b": row, col = l, m - A = tvm.placeholder((row, col), name='A', dtype='float16') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=row * col) - C = tvm.compute((row, col), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col) + A = te.placeholder((row, col), name='A', dtype='float16') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=row * col) + C = te.compute((row, col), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col) def intrin_func(ins, outs): ib = tvm.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_load_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync', BC.data, n, m, l, BC.elem_offset // (row * col), BA.access_ptr('r'), col, 'row_major')) return ib.get() @@ -48,16 +49,16 @@ def intrin_func(ins, outs): def intrin_wmma_gemm(shape): n, m, l = shape - A = tvm.placeholder((n, l), name='A', dtype='float16') - B = tvm.placeholder((l, m), name='B', dtype='float16') - k = tvm.reduce_axis((0, l), name="k") - C = tvm.compute((n, m), + A = te.placeholder((n, l), name='A', dtype='float16') + B = te.placeholder((l, m), name='B', dtype='float16') + k = te.reduce_axis((0, l), name="k") + C = te.compute((n, m), lambda ii, jj: - tvm.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), + te.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), name='C') - BA = tvm.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=n * l) - BB = tvm.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=l * m) - BC = tvm.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) + BA = tvm.tir.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=n * l) + BB = tvm.tir.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=l * m) + BC = tvm.tir.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) def intrin_func(ins, outs): BA, BB = ins @@ -65,12 +66,12 @@ def intrin_func(ins, outs): def init(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0)) + ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0)) return ib.get() def update(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_mma_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync', BC.data, BC.elem_offset // (n * m), BA.data, BA.elem_offset // (n * l), BB.data, BB.elem_offset // (l * m), @@ -84,17 +85,17 @@ def update(): def intrin_wmma_store_matrix(shape): n, m, l = shape - A = tvm.placeholder((n, m), name='A', dtype='float32') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) - C = tvm.compute((n, m), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m) + A = te.placeholder((n, m), name='A', dtype='float32') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) + C = te.compute((n, m), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m) def intrin_func(ins, outs): ib = tvm.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_store_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync', BA.data, n, m, l, BA.elem_offset // (n * m), BC.access_ptr('w'), m, 'row_major')) return ib.get() @@ -117,15 +118,15 @@ def test_tensor_core_batch_matmal(): assert (m % 8 == 0) assert (l % 16 == 0) nn, mm, ll = n // 32, m // 8, l // 16 - A = tvm.placeholder((batch_size, nn, ll, 32, 16), name='A', dtype='float16') - B = tvm.placeholder((batch_size, ll, mm, 16, 8), name='B', dtype='float16') - k1 = tvm.reduce_axis((0, ll), name='k1') - k2 = tvm.reduce_axis((0, 16), name='k2') - C = tvm.compute((batch_size, nn, mm, 32, 8), + A = te.placeholder((batch_size, nn, ll, 32, 16), name='A', dtype='float16') + B = te.placeholder((batch_size, ll, mm, 16, 8), name='B', dtype='float16') + k1 = te.reduce_axis((0, ll), name='k1') + k2 = te.reduce_axis((0, 16), name='k2') + C = te.compute((batch_size, nn, mm, 32, 8), lambda b, i, j, ii, jj: - tvm.sum(A[b, i, k1, ii, k2].astype('float') * B[b, k1, j, k2, jj].astype('float'), axis=[k1, k2]), + te.sum(A[b, i, k1, ii, k2].astype('float') * B[b, k1, j, k2, jj].astype('float'), axis=[k1, k2]), name='Fragment_C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) warp_size = 32 kernel_size = 16 @@ -135,12 +136,12 @@ def test_tensor_core_batch_matmal(): warp_col_tiles = 2 chunk = 4 - block_x = tvm.thread_axis('blockIdx.x') - block_y = tvm.thread_axis('blockIdx.y') - block_z = tvm.thread_axis('blockIdx.z') - thread_x = tvm.thread_axis('threadIdx.x') - thread_y = tvm.thread_axis('threadIdx.y') - thread_z = tvm.thread_axis('threadIdx.z') + block_x = te.thread_axis('blockIdx.x') + block_y = te.thread_axis('blockIdx.y') + block_z = te.thread_axis('blockIdx.z') + thread_x = te.thread_axis('threadIdx.x') + thread_y = te.thread_axis('threadIdx.y') + thread_z = te.thread_axis('threadIdx.z') AS = s.cache_read(A, 'shared', [C]) BS = s.cache_read(B, 'shared', [C]) @@ -271,30 +272,30 @@ def test_tensor_core_batch_conv(): assert (in_channels % block_size == 0) assert (out_channels % block_size == 0) - kh = tvm.reduce_axis((0, kernel_h), name='kh') - kw = tvm.reduce_axis((0, kernel_w), name='kw') - ic = tvm.reduce_axis((0, in_channels // block_size), name='ic') - ii = tvm.reduce_axis((0, block_size), name='ii') + kh = te.reduce_axis((0, kernel_h), name='kh') + kw = te.reduce_axis((0, kernel_w), name='kw') + ic = te.reduce_axis((0, in_channels // block_size), name='ic') + ii = te.reduce_axis((0, block_size), name='ii') # Algorithm - A = tvm.placeholder(data_shape, name='A', dtype="float16") - W = tvm.placeholder(kernel_shape, name='W', dtype="float16") - Apad = tvm.compute( + A = te.placeholder(data_shape, name='A', dtype="float16") + W = te.placeholder(kernel_shape, name='W', dtype="float16") + Apad = te.compute( (batch_size // block_size, height + 2 * pad_h, width + 2 * pad_w, in_channels // block_size, block_size, block_size), - lambda n, h, w, i, nn, ii: tvm.if_then_else( - tvm.all(h >= pad_h, h - pad_h < height, + lambda n, h, w, i, nn, ii: tvm.tir.if_then_else( + tvm.tir.all(h >= pad_h, h - pad_h < height, w >= pad_w, w - pad_w < width), - A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.const(0., "float16")), + A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.tir.const(0., "float16")), name='Apad') - Conv = tvm.compute(output_shape, - lambda n, h, w, o, nn, oo: tvm.sum( + Conv = te.compute(output_shape, + lambda n, h, w, o, nn, oo: te.sum( Apad[n, h * stride_h + kh, w * stride_w + kw, ic, nn, ii].astype("float32") * W[kh, kw, ic, o, ii, oo].astype("float32"), axis=[ic, kh, kw, ii]), name="Conv") - s = tvm.create_schedule(Conv.op) + s = te.create_schedule(Conv.op) s[Apad].compute_inline() AS = s.cache_read(Apad, 'shared', [Conv]) @@ -303,12 +304,12 @@ def test_tensor_core_batch_conv(): WF = s.cache_read(WS, 'wmma.matrix_b', [Conv]) ConvF = s.cache_write(Conv, 'wmma.accumulator') - block_x = tvm.thread_axis('blockIdx.x') - block_y = tvm.thread_axis('blockIdx.y') - block_z = tvm.thread_axis('blockIdx.z') - thread_x = tvm.thread_axis('threadIdx.x') - thread_y = tvm.thread_axis('threadIdx.y') - thread_z = tvm.thread_axis('threadIdx.z') + block_x = te.thread_axis('blockIdx.x') + block_y = te.thread_axis('blockIdx.y') + block_z = te.thread_axis('blockIdx.z') + thread_x = te.thread_axis('threadIdx.x') + thread_y = te.thread_axis('threadIdx.y') + thread_z = te.thread_axis('threadIdx.z') nc, hc, wc, oc, nnc, ooc = Conv.op.axis block_k = s[Conv].fuse(hc, wc) diff --git a/tests/python/unittest/test_schedule_tensorize.py b/tests/python/unittest/test_schedule_tensorize.py index ac60c2d34ebde..c23a878c5a397 100644 --- a/tests/python/unittest/test_schedule_tensorize.py +++ b/tests/python/unittest/test_schedule_tensorize.py @@ -15,39 +15,40 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def intrin_vadd(n): - x = tvm.placeholder((n,), name='vx') - y = tvm.placeholder((n,), name='vy') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') + x = te.placeholder((n,), name='vx') + y = te.placeholder((n,), name='vy') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') def intrin_func(ins, outs): xx, yy = ins zz = outs[0] - return tvm.call_packed("vadd", xx, yy, zz) + return tvm.tir.call_packed("vadd", xx, yy, zz) with tvm.build_config(offset_factor=16): return tvm.decl_tensor_intrin(z.op, intrin_func) def intrin_gemv(m, n): - w = tvm.placeholder((m, n), name='w') - x = tvm.placeholder((n,), name='x') - k = tvm.reduce_axis((0, n), name='k') - z = tvm.compute((m,), lambda i: - tvm.sum(w[i, k] * x[k], axis=k), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, + w = te.placeholder((m, n), name='w') + x = te.placeholder((n,), name='x') + k = te.reduce_axis((0, n), name='k') + z = te.compute((m,), lambda i: + te.sum(w[i, k] * x[k], axis=k), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, name="W", offset_factor=16, - strides=[tvm.var('ldw'), 1]) + strides=[te.var('ldw'), 1]) def intrin_func(ins, outs): ww, xx = ins zz = outs[0] ww_ptr = ww.access_ptr("r") xx_ptr = xx.access_ptr("r") zz_ptr = zz.access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) - reset = tvm.call_packed( + reset = tvm.tir.call_packed( "fill_zero", zz_ptr, n) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) return body, reset, update @@ -57,24 +58,24 @@ def intrin_func(ins, outs): binds={w: Wb}) def intrin_gemv_no_reset(m, n): - w = tvm.placeholder((m, n), name='w') - x = tvm.placeholder((n,), name='x') - k = tvm.reduce_axis((0, n), name='k') - z = tvm.compute((m,), lambda i: - tvm.sum(w[i, k] * x[k], axis=k), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, + w = te.placeholder((m, n), name='w') + x = te.placeholder((n,), name='x') + k = te.reduce_axis((0, n), name='k') + z = te.compute((m,), lambda i: + te.sum(w[i, k] * x[k], axis=k), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, name="W", offset_factor=16, - strides=[tvm.var('ldw'), 1]) + strides=[te.var('ldw'), 1]) def intrin_func(ins, outs): ww, xx = ins zz = outs[0] ww_ptr = ww.access_ptr("r") xx_ptr = xx.access_ptr("r") zz_ptr = zz.access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) return body, None, update @@ -86,17 +87,17 @@ def intrin_func(ins, outs): def test_tensorize_vadd(): m = 128 - x = tvm.placeholder((m,), name='x') - y = tvm.placeholder((m,), name='y') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') + x = te.placeholder((m,), name='x') + y = te.placeholder((m,), name='y') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') def check(factor): - s = tvm.create_schedule(z.op) + s = te.create_schedule(z.op) xo, xi = s[z].split(z.op.axis[0], factor=factor) vadd = intrin_vadd(factor) s[z].tensorize(xi, vadd) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[z], dom_map) assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor) @@ -106,7 +107,7 @@ def check(factor): body = fmatch(s[z], out_dom, in_dom, vadd) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(vadd.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [x, y, z]) check(16) @@ -116,20 +117,20 @@ def test_tensorize_matmul(): n = 1024 m = n l = n - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: - tvm.sum(B[j, k] * A[i, k], axis=k), name='C') + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: + te.sum(B[j, k] * A[i, k], axis=k), name='C') def check(factor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis yo, yi = s[C].split(y, factor=factor) gemv = intrin_gemv(factor, l) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) assert tvm.ir_pass.Equal(out_dom[x].extent, 1) @@ -139,12 +140,12 @@ def check(factor): body = fmatch(s[C], out_dom, in_dom, gemv) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) def check_rfactor(factor, rfactor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) @@ -153,7 +154,7 @@ def check_rfactor(factor, rfactor): gemv = intrin_gemv(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) assert tvm.ir_pass.Equal(out_dom[x].extent, 1) @@ -163,11 +164,11 @@ def check_rfactor(factor, rfactor): body = fmatch(s[C], out_dom, in_dom, gemv) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) def check_rfactor_no_reset(factor, rfactor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) @@ -176,7 +177,7 @@ def check_rfactor_no_reset(factor, rfactor): gemv = intrin_gemv_no_reset(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) assert tvm.ir_pass.Equal(out_dom[x].extent, 1) @@ -186,11 +187,11 @@ def check_rfactor_no_reset(factor, rfactor): body = fmatch(s[C], out_dom, in_dom, gemv) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) def check_rfactor_no_reset_multi_reduction(factor, rfactor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) @@ -200,7 +201,7 @@ def check_rfactor_no_reset_multi_reduction(factor, rfactor): gemv = intrin_gemv_no_reset(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) assert tvm.ir_pass.Equal(out_dom[x].extent, 1) @@ -210,7 +211,7 @@ def check_rfactor_no_reset_multi_reduction(factor, rfactor): body = fmatch(s[C], out_dom, in_dom, gemv) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) check(16) @@ -221,28 +222,28 @@ def check_rfactor_no_reset_multi_reduction(factor, rfactor): # This tests whether algorithm and intrinsics expressions are simplified # as much as possible first and then checked for equality. See Issue #696 def test_tensorize_op(): - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod def op_intrin(): bh = 9 bw = 9 - x = tvm.placeholder((5, 5), name='A') - y = tvm.compute((bh, bw), + x = te.placeholder((5, 5), name='A') + y = te.compute((bh, bw), lambda i, j: x[idxd(j,3) + idxm(i,3), idxm(j,3)+ idxd(i,3)]) def intrin_func(ins, outs): xx, = ins zz = outs[0] - return tvm.call_packed("op", xx, zz) + return tvm.tir.call_packed("op", xx, zz) with tvm.build_config(offset_factor=2): return tvm.decl_tensor_intrin(y.op, intrin_func) - A = tvm.placeholder((5, 5), name='A') - B = tvm.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)]) + A = te.placeholder((5, 5), name='A') + B = te.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)]) bt = op_intrin() - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) x,y = B.op.axis s[B].tensorize(x, bt) @@ -255,16 +256,16 @@ def test_tensorize_tensor_compute_op(): # an intrinsic called "multivadd" whose definition (pattern) # is a loop of another intrinsic called "vadd" def intrin_multivadd(n): - n_a = tvm.var("n_a") - Ab = tvm.decl_buffer((n, ), tvm.float32, strides=[n_a]) + n_a = te.var("n_a") + Ab = tvm.tir.decl_buffer((n, ), "float32", strides=[n_a]) - n_b = tvm.var("n_b") - Bb = tvm.decl_buffer((n, ), tvm.float32, strides=[n_b]) + n_b = te.var("n_b") + Bb = tvm.tir.decl_buffer((n, ), "float32", strides=[n_b]) - n_c = tvm.var("n_c") - Cb = tvm.decl_buffer((n, ), tvm.float32, strides=[n_c]) + n_c = te.var("n_c") + Cb = tvm.tir.decl_buffer((n, ), "float32", strides=[n_c]) - z = tvm.compute((n,), lambda i: tvm.call_extern("float32", 'vadd', + z = te.compute((n,), lambda i: tvm.tir.call_extern("float32", 'vadd', Ab.access_ptr("w", offset=n_a*i), Bb.access_ptr("r", offset=n_b*i), Cb.access_ptr("r", offset=n_c*i))) @@ -272,26 +273,26 @@ def intrin_multivadd(n): # replace the pattern with the multivadd call. I need to figure out # how to pass it the right parameters. def intrin_func(ins, outs): - return tvm.call_packed("multivadd") + return tvm.tir.call_packed("multivadd") with tvm.build_config(): return tvm.decl_tensor_intrin(z.op, intrin_func, name="multivadd") def intrin_vadd(n): dtype = 'float32' - x = tvm.placeholder((n,), dtype=dtype, name='vx') - y = tvm.placeholder((n,), dtype=dtype, name='vy') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), dtype=dtype, name='vx') + y = te.placeholder((n,), dtype=dtype, name='vy') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') + s = te.create_schedule(z.op) def create_buffer(t): - return tvm.decl_buffer(t.shape, t.dtype, + return tvm.tir.decl_buffer(t.shape, t.dtype, name='W'+t.name, offset_factor=16) def intrin_func(ins, outs): ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("float32", 'vadd', + ib.emit(tvm.tir.call_extern("float32", 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) return ib.get() @@ -306,19 +307,19 @@ def intrin_func(ins, outs): factor = 16 dtype = 'float32' - A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype) + A = te.placeholder((M//factor, factor), name="A", dtype=dtype) + B = te.placeholder((M//factor, factor), name="B", dtype=dtype) vadd = intrin_vadd(factor) - C = tvm.compute((M//factor, factor), + C = te.compute((M//factor, factor), lambda i: vadd(A[i, 0:factor], B[i, 0:factor]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) multivadd = intrin_multivadd(64) s[C].tensorize(C.op.axis[0], multivadd) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + dom_map = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) # The loop that we tried to tensorize still exists in the code # That means tensorize didn't work as expected assert isinstance(stmt.body.body.body, tvm.tir.For) diff --git a/tests/python/unittest/test_testing.py b/tests/python/unittest/test_testing.py index b17d8893a9556..ecf520d251f15 100644 --- a/tests/python/unittest/test_testing.py +++ b/tests/python/unittest/test_testing.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm.testing import check_numerical_grads def test_check_numerical_grads(): diff --git a/tests/python/unittest/test_tvm_intrin.py b/tests/python/unittest/test_tvm_intrin.py index 23e921d3f1ce7..5bb1c65387501 100644 --- a/tests/python/unittest/test_tvm_intrin.py +++ b/tests/python/unittest/test_tvm_intrin.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi from tvm.contrib import util, clang import numpy as np @@ -23,10 +24,10 @@ def test_nearbyint(): - m = tvm.var("m",) - A = tvm.placeholder((m,), name='A') - A_rounded = tvm.compute((m,), lambda *i: tvm.nearbyint(A(*i)), name='A') - s = tvm.create_schedule(A_rounded.op) + m = te.var("m",) + A = te.placeholder((m,), name='A') + A_rounded = te.compute((m,), lambda *i: tvm.tir.nearbyint(A(*i)), name='A') + s = te.create_schedule(A_rounded.op) f = tvm.build(s, [A, A_rounded], "llvm") ctx = tvm.cpu(0) n = 10 diff --git a/tests/web/prepare_test_libs.py b/tests/web/prepare_test_libs.py index ada40e66f7576..a0e2c13eab826 100644 --- a/tests/web/prepare_test_libs.py +++ b/tests/web/prepare_test_libs.py @@ -16,6 +16,7 @@ # under the License. # Prepare test library for js. import tvm +from tvm import te from tvm.contrib import emscripten import os @@ -23,10 +24,10 @@ def prepare_test_libs(base_path): target = "llvm -target=asmjs-unknown-emscripten -system-lib" if not tvm.runtime.enabled(target): raise RuntimeError("Target %s is not enbaled" % target) - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) fadd1 = tvm.build(s, [A, B], target, name="add_one") obj_path = os.path.join(base_path, "test_add_one.bc") fadd1.save(obj_path) diff --git a/tests/web/websock_rpc_test.py b/tests/web/websock_rpc_test.py index 92b0ad350bc04..8be8ce04cb75d 100644 --- a/tests/web/websock_rpc_test.py +++ b/tests/web/websock_rpc_test.py @@ -21,6 +21,7 @@ """ import tvm +from tvm import te import os from tvm import rpc from tvm.contrib import util, emscripten @@ -33,10 +34,10 @@ def test_rpc_array(): if not tvm.runtime.enabled("rpc"): return # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) remote = rpc.connect(proxy_host, proxy_port, key="js") target = "llvm -target=asmjs-unknown-emscripten -system-lib" def check_remote(): diff --git a/tests/webgl/test_local_gemm.py b/tests/webgl/test_local_gemm.py index ff3c1a77bb8d8..6bd22bf0057b8 100644 --- a/tests/webgl/test_local_gemm.py +++ b/tests/webgl/test_local_gemm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_local_gemm(): @@ -24,17 +25,17 @@ def test_local_gemm(): return nn = 1024 - n = tvm.var('n') - n = tvm.convert(nn) + n = te.var('n') + n = tvm.runtime.convert(nn) m = n l = n - A = tvm.placeholder((n, l), name='A', dtype='int32') - B = tvm.placeholder((m, l), name='B', dtype='int32') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + A = te.placeholder((n, l), name='A', dtype='int32') + B = te.placeholder((m, l), name='B', dtype='int32') + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k), name='CC') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].opengl() print(tvm.lower(s, [A, B, C], simple_mode=True)) diff --git a/tests/webgl/test_local_multi_stage.py b/tests/webgl/test_local_multi_stage.py index 578639962bb7d..54a554b74ed9b 100644 --- a/tests/webgl/test_local_multi_stage.py +++ b/tests/webgl/test_local_multi_stage.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_local_multi_stage(): @@ -23,12 +24,12 @@ def test_local_multi_stage(): if not tvm.runtime.enabled("llvm"): return - n = tvm.var("n") - A = tvm.placeholder((n,), name='A', dtype="int32") - B = tvm.compute((n,), lambda i: A[i] + 1, name="B") - C = tvm.compute((n,), lambda i: B[i] * 2, name="C") + n = te.var("n") + A = te.placeholder((n,), name='A', dtype="int32") + B = te.compute((n,), lambda i: A[i] + 1, name="B") + C = te.compute((n,), lambda i: B[i] * 2, name="C") - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[B].opengl() s[C].opengl() diff --git a/tests/webgl/test_local_save_load.py b/tests/webgl/test_local_save_load.py index 0a63a77cf52e6..cca68020c0c23 100644 --- a/tests/webgl/test_local_save_load.py +++ b/tests/webgl/test_local_save_load.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import rpc from tvm.contrib import util, emscripten @@ -25,11 +26,11 @@ def test_local_save_load(): if not tvm.runtime.enabled("llvm"): return - n = tvm.var("n") - A = tvm.placeholder((n,), name='A', dtype='int32') - B = tvm.placeholder((n,), name='B', dtype='int32') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + n = te.var("n") + A = te.placeholder((n,), name='A', dtype='int32') + B = te.placeholder((n,), name='B', dtype='int32') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = te.create_schedule(C.op) s[C].opengl() f = tvm.build(s, [A, B, C], "opengl", target_host="llvm", name="myadd") diff --git a/tests/webgl/test_local_topi_conv2d_nchw.py b/tests/webgl/test_local_topi_conv2d_nchw.py index c03d9dcc9007d..484143a1cdd52 100644 --- a/tests/webgl/test_local_topi_conv2d_nchw.py +++ b/tests/webgl/test_local_topi_conv2d_nchw.py @@ -20,6 +20,7 @@ import os import numpy as np import tvm +from tvm import te import topi from tvm.contrib.pickle_memoize import memoize from topi.util import get_const_tuple @@ -27,8 +28,8 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding): in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') B = topi.nn.conv2d_nchw(A, W, stride, padding) C = topi.nn.relu(B) diff --git a/tests/webgl/test_local_topi_dense.py b/tests/webgl/test_local_topi_dense.py index d57bfd20f186e..60dfe1ff690f0 100644 --- a/tests/webgl/test_local_topi_dense.py +++ b/tests/webgl/test_local_topi_dense.py @@ -20,15 +20,16 @@ """ import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple from tvm.contrib.pickle_memoize import memoize def verify_dense(batch, in_dim, out_dim, use_bias=True): - A = tvm.placeholder((batch, in_dim), name='A') - B = tvm.placeholder((out_dim, in_dim), name='B') - C = tvm.placeholder((out_dim,), name='C') + A = te.placeholder((batch, in_dim), name='A') + B = te.placeholder((out_dim, in_dim), name='B') + C = te.placeholder((out_dim,), name='C') D = topi.nn.dense(A, B, C if use_bias else None) D = topi.nn.relu(D) dtype = A.dtype diff --git a/tests/webgl/test_local_topi_pooling.py b/tests/webgl/test_local_topi_pooling.py index c1b66604c6a7a..3adae7bba51c2 100644 --- a/tests/webgl/test_local_topi_pooling.py +++ b/tests/webgl/test_local_topi_pooling.py @@ -20,6 +20,7 @@ """ import numpy as np import tvm +from tvm import te import topi import math from topi.util import get_const_tuple @@ -29,7 +30,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode): kw = kh sw = sh ph, pw = padding - A = tvm.placeholder((n, ic, ih, iw), name='A') + A = te.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode) B = topi.nn.relu(B) @@ -91,7 +92,7 @@ def test_pool(): def verify_global_pool(n, c, h, w, pool_type): - A = tvm.placeholder((n, c, h, w), name='A') + A = te.placeholder((n, c, h, w), name='A') B = topi.nn.global_pool(A, pool_type=pool_type) B = topi.nn.relu(B) diff --git a/tests/webgl/test_local_topi_softmax.py b/tests/webgl/test_local_topi_softmax.py index 5d9ed9345e763..c0ddbf21419ac 100644 --- a/tests/webgl/test_local_topi_softmax.py +++ b/tests/webgl/test_local_topi_softmax.py @@ -22,15 +22,16 @@ import os import numpy as np import tvm +from tvm import te import topi import logging from topi.util import get_const_tuple def verify_softmax(m, n): - A = tvm.placeholder((m, n), name='A') + A = te.placeholder((m, n), name='A') B = topi.nn.softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) @@ -59,10 +60,10 @@ def test_softmax(): def verify_log_softmax(m, n): - A = tvm.placeholder((m, n), name='A') + A = te.placeholder((m, n), name='A') B = topi.nn.log_softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) diff --git a/tests/webgl/test_remote_save_load.py b/tests/webgl/test_remote_save_load.py index 1e2ca0f8dd5c8..34bbb3fa0f002 100644 --- a/tests/webgl/test_remote_save_load.py +++ b/tests/webgl/test_remote_save_load.py @@ -30,6 +30,7 @@ import numpy as np import tvm +from tvm import te from tvm import rpc from tvm.contrib import util, emscripten @@ -45,11 +46,11 @@ def try_remote_save_load(): return # Build the module. - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = te.create_schedule(C.op) s[C].opengl() target_host = "llvm -target=asmjs-unknown-emscripten -system-lib" f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd") diff --git a/tests/webgl/test_static_webgl_library.py b/tests/webgl/test_static_webgl_library.py index 365f821845ae9..929da4ca294cd 100644 --- a/tests/webgl/test_static_webgl_library.py +++ b/tests/webgl/test_static_webgl_library.py @@ -20,6 +20,7 @@ import os, shutil, SimpleHTTPServer, SocketServer import tvm +from tvm import te from tvm.contrib import emscripten, util import numpy as np @@ -30,11 +31,11 @@ def try_static_webgl_library(): os.chdir(os.path.join(curr_path, "../../lib")) # Create OpenGL module. - n = tvm.var("n") - A = tvm.placeholder((n,), name='A', dtype="float") - B = tvm.compute((n,), lambda *i: A[i], name="B") + n = te.var("n") + A = te.placeholder((n,), name='A', dtype="float") + B = te.compute((n,), lambda *i: A[i], name="B") - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].opengl() target_host = "llvm -target=asmjs-unknown-emscripten -system-lib" diff --git a/topi/python/topi/argwhere.py b/topi/python/topi/argwhere.py index c2a9adea0c2ad..7d8429b959046 100644 --- a/topi/python/topi/argwhere.py +++ b/topi/python/topi/argwhere.py @@ -24,12 +24,12 @@ def hybrid_argwhere_1d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 1-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -47,12 +47,12 @@ def hybrid_argwhere_2d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 2-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -73,12 +73,12 @@ def hybrid_argwhere_3d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 3-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -102,12 +102,12 @@ def hybrid_argwhere_4d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 4-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -134,12 +134,12 @@ def hybrid_argwhere_5d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 5-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -168,12 +168,12 @@ def argwhere(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor Tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ if len(condition.shape) == 1: diff --git a/topi/python/topi/arm_cpu/bitserial_conv2d.py b/topi/python/topi/arm_cpu/bitserial_conv2d.py index d28ec09925c28..1f6f68c37b4b1 100644 --- a/topi/python/topi/arm_cpu/bitserial_conv2d.py +++ b/topi/python/topi/arm_cpu/bitserial_conv2d.py @@ -18,6 +18,7 @@ """Bitserial conv2d schedule on arm cpu""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm import relay from .. import tag @@ -34,8 +35,8 @@ def _kernel_vec_spatial_pack_nhwc(kernel, kernel_bits, VC, use_bitpack=True): kernel_q = kernel KH, KW, KB, CI, CO = kernel_q.shape kvshape = (CO//VC, KH, KW, KB, VC, CI) - return tvm.compute(kvshape, lambda co, dh, dw, b, vc, ci: \ - kernel_q[dh][dw][b][ci][co*VC+vc], name='kernel_vec') + return te.compute(kvshape, lambda co, dh, dw, b, vc, ci: \ + kernel_q[dh][dw][b][ci][co*VC+vc], name='kernel_vec') @autotvm.register_topi_compute("bitserial_conv2d_nhwc.arm_cpu") def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, weight_bits, @@ -69,8 +70,8 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, w OW = (PAD_W - KW) // WSTR + 1 oshape = (1, OH, OW, CO) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod # Pad input channels of weights and data when it is not a multiple of 8 if CI_packed % 8 != 0: @@ -108,7 +109,7 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, w data_q = bitpack(data, activation_bits, pack_axis=3, bit_axis=3, pack_type='uint8') kernel_vec = _kernel_vec_spatial_pack_nhwc(kernel, weight_bits, VC, len(kernel.shape) == 4) - idxm = tvm.indexmod + idxm = tvm.tir.indexmod if idxm(kernel_vec.shape[-1], 8) != 0 and CI_PAD != 0: kernel_vec = pad(kernel_vec, [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, CI_PAD]) @@ -125,78 +126,79 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, w else: data_pad = data_q - data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \ - data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') - ib = tvm.reduce_axis((0, IB), name='ib') - kb = tvm.reduce_axis((0, KB), name='kb') + data_vec = te.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \ + data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec') + ci = te.reduce_axis((0, CI), name='ci') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') + ib = te.reduce_axis((0, IB), name='ib') + kb = te.reduce_axis((0, KB), name='kb') def _bipolar_conv(n, h, w, co, vh, vw, vc): - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( kernel_vec[co, dh, dw, kb, vc, ci].astype('uint16') & data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('uint16')) - << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci]) + << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci]) def _unipolar_conv(n, h, w, co, vh, vw, vc): - return tvm.sum( - ((tvm.popcount(kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & - data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('int16')) - - tvm.popcount(~kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & - data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci]).astype('int16')) + return te.sum( + ((tvm.tir.popcount(kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & + data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('int16')) - + tvm.tir.popcount(~kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & + data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci]).astype('int16')) << (kb + ib).astype('int16')), axis=[dh, dw, kb, ib, ci]) if unipolar: - conv_vec = tvm.compute(ovshape, _unipolar_conv, name='conv_vec', tag='unipolar') + conv_vec = te.compute(ovshape, _unipolar_conv, name='conv_vec', tag='unipolar') else: - conv_vec = tvm.compute(ovshape, _bipolar_conv, name='conv_vec', tag='bipolar') + conv_vec = te.compute(ovshape, _bipolar_conv, name='conv_vec', tag='bipolar') - conv = tvm.compute(oshape, - lambda n, h, w, co: - conv_vec[n, - idxd(h, VH), idxd(w, VW), idxd(co, VC), - idxm(h, VH), idxm(w, VW), idxm(co, VC)].astype(out_dtype), - name='conv', tag='spatial_bitserial_conv_nhwc') + conv = te.compute(oshape, + lambda n, h, w, co: + conv_vec[n, + idxd(h, VH), idxd(w, VW), idxd(co, VC), + idxm(h, VH), idxm(w, VW), idxm(co, VC)].astype(out_dtype), + name='conv', tag='spatial_bitserial_conv_nhwc') return conv def _intrin_popcount(m, k_i, w_b, x_b, unipolar): pack_dtype = 'uint8' - w = tvm.placeholder((w_b, m, k_i), dtype=pack_dtype, name='w') - x = tvm.placeholder((x_b, k_i,), dtype=pack_dtype, name='x') - k = tvm.reduce_axis((0, k_i), name='k') - bw = tvm.reduce_axis((0, w_b), name='bw') - bx = tvm.reduce_axis((0, x_b), name='bx') + w = te.placeholder((w_b, m, k_i), dtype=pack_dtype, name='w') + x = te.placeholder((x_b, k_i,), dtype=pack_dtype, name='x') + k = te.reduce_axis((0, k_i), name='k') + bw = te.reduce_axis((0, w_b), name='bw') + bx = te.reduce_axis((0, x_b), name='bx') if unipolar: dtype = 'int16' - z = tvm.compute((m,), lambda i: - tvm.sum((tvm.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) - - tvm.popcount(~w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype))) - << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') + z = te.compute( + (m,), lambda i: + te.sum((tvm.tir.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) - + tvm.tir.popcount(~w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype))) + << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') else: dtype = 'uint16' - z = tvm.compute((m,), lambda i: - tvm.sum(tvm.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) - << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, - name="W", - offset_factor=k_i, - strides=[tvm.var('ldw'), tvm.var('ldw'), 1]) # stride can be inferred - Xb = tvm.decl_buffer(x.shape, x.dtype, - name="X", - offset_factor=k_i, - strides=[tvm.var('ldw'), 1]) - Zb = tvm.decl_buffer(z.shape, z.dtype, - name="Z", - offset_factor=1, - strides=[1]) + z = te.compute((m,), lambda i: + te.sum(tvm.tir.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) + << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, + name="W", + offset_factor=k_i, + strides=[te.var('ldw'), te.var('ldw'), 1]) # stride can be inferred + Xb = tvm.tir.decl_buffer(x.shape, x.dtype, + name="X", + offset_factor=k_i, + strides=[te.var('ldw'), 1]) + Zb = tvm.tir.decl_buffer(z.shape, z.dtype, + name="Z", + offset_factor=1, + strides=[1]) def _intrin_func(ins, outs): ww, xx = ins zz = outs[0] - args_1 = tvm.const(1, 'uint32') - args_2 = tvm.const(2, 'uint32') + args_1 = tvm.tir.const(1, 'uint32') + args_2 = tvm.tir.const(2, 'uint32') if unipolar: vpadd = "llvm.arm.neon.vpadd.v8i8" @@ -214,7 +216,7 @@ def _intrin_func(ins, outs): def _instr(index): irb = tvm.ir_builder.create() if index == 1: # reduce reset - irb.emit(zz.vstore(0, tvm.const(0, return_dtype))) + irb.emit(zz.vstore(0, tvm.tir.const(0, return_dtype))) return irb.get() # body and reduce update cnts8 = [None] * 8 @@ -227,40 +229,44 @@ def _instr(index): w_ = ww.vload([bw, i, 0], 'uint8x16').astype(full_dtype) x_ = xx.vload([bx, 0], 'uint8x16').astype(full_dtype) if unipolar: - cnts = tvm.popcount(w_ & x_) - tvm.popcount(~w_ & x_) + cnts = tvm.tir.popcount(w_ & x_) - tvm.tir.popcount(~w_ & x_) else: - cnts = tvm.popcount(w_ & x_) - upper_half = tvm.call_pure_intrin(half_dtype, 'vectorhigh', cnts) - lower_half = tvm.call_pure_intrin(half_dtype, 'vectorlow', cnts) + cnts = tvm.tir.popcount(w_ & x_) + upper_half = tvm.tir.call_pure_intrin(half_dtype, 'vectorhigh', cnts) + lower_half = tvm.tir.call_pure_intrin(half_dtype, 'vectorlow', cnts) cnts8[i] = upper_half + lower_half for i in range(m//2): - cnts4[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts8[i*2], cnts8[i*2+1]) + cnts4[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts8[i*2], cnts8[i*2+1]) for i in range(m//4): - cnts2[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts4[i*2], cnts4[i*2+1]) - cnts = tvm.call_pure_intrin(full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) - shifted_cnts = cnts << tvm.const(bw+bx, pack_dtype) - out = tvm.call_llvm_intrin(return_dtype, vpadalu, - args_2, zz.vload(0, return_dtype), shifted_cnts) + cnts2[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts4[i*2], cnts4[i*2+1]) + cnts = tvm.tir.call_pure_intrin( + full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) + shifted_cnts = cnts << tvm.tir.const(bw+bx, pack_dtype) + out = tvm.tir.call_llvm_intrin( + return_dtype, vpadalu, + args_2, zz.vload(0, return_dtype), shifted_cnts) else: # ki == 8 for i in range(m): w_ = ww.vload([bw, i, 0], 'uint8x8').astype(half_dtype) x_ = xx.vload([bx, 0], 'uint8x8').astype(half_dtype) if unipolar: - cnts8[i] = tvm.popcount(w_ & x_) - tvm.popcount(~w_ & x_) + cnts8[i] = tvm.tir.popcount(w_ & x_) - tvm.tir.popcount(~w_ & x_) else: - cnts8[i] = tvm.popcount(w_ & x_) + cnts8[i] = tvm.tir.popcount(w_ & x_) for i in range(m//2): - cnts4[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts8[i*2], cnts8[i*2+1]) + cnts4[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts8[i*2], cnts8[i*2+1]) for i in range(m//4): - cnts2[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts4[i*2], cnts4[i*2+1]) - cnts = tvm.call_pure_intrin(full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) - shifted_cnts = cnts << tvm.const(bw+bx, pack_dtype) - out = tvm.call_llvm_intrin(return_dtype, vpadalu, - args_2, zz.vload(0, return_dtype), shifted_cnts) + cnts2[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts4[i*2], cnts4[i*2+1]) + cnts = tvm.tir.call_pure_intrin( + full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) + shifted_cnts = cnts << tvm.tir.const(bw+bx, pack_dtype) + out = tvm.tir.call_llvm_intrin( + return_dtype, vpadalu, + args_2, zz.vload(0, return_dtype), shifted_cnts) irb.emit(zz.vstore(0, out)) return irb.get() # body, reset, update @@ -325,7 +331,7 @@ def _schedule_spatial_conv2d_nhwc(cfg, s, data_pad, data_vec, kernel_vec, @autotvm.register_topi_schedule("bitserial_conv2d_nhwc.arm_cpu") def schedule_bitserial_conv2d_nhwc(cfg, outs): """Arm cpu schedule for bitserial conv2d""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -335,7 +341,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'spatial_bitserial_conv_nhwc' in op.tag: @@ -347,7 +353,7 @@ def traverse(op): data_q = data_vec.op.input_tensors[0] data = data_q.op.input_tensors[0] data_pad = None - if isinstance(data_q.op, tvm.tensor.ComputeOp) and "pad" in data_q.op.tag: + if isinstance(data_q.op, te.tensor.ComputeOp) and "pad" in data_q.op.tag: data_pad = data_q data_q = data data = data.op.input_tensors[0] diff --git a/topi/python/topi/arm_cpu/bitserial_dense.py b/topi/python/topi/arm_cpu/bitserial_dense.py index 3f1889c8d7ff9..beed79da49d08 100644 --- a/topi/python/topi/arm_cpu/bitserial_dense.py +++ b/topi/python/topi/arm_cpu/bitserial_dense.py @@ -18,6 +18,7 @@ """Schedule for bitserial dense operator.""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from topi.util import get_const_tuple from .. import tag @@ -32,15 +33,15 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype, out_d Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype) @@ -83,23 +84,23 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype, out_d wvshape = (out_dim//VY, in_dim//VK, WB, VY, VK) oshape = (batch, out_dim) - k = tvm.reduce_axis((0, in_dim), name='k') - db = tvm.reduce_axis((0, DB), name='db') - wb = tvm.reduce_axis((0, WB), name='wb') + k = te.reduce_axis((0, in_dim), name='k') + db = te.reduce_axis((0, DB), name='db') + wb = te.reduce_axis((0, WB), name='wb') # Tile data and weights - weight_vec = tvm.compute(wvshape, lambda yo, ko, wb, vy, vk: - weight_packed[yo*VY+vy][wb][ko*VK+vk], name='weight_vec') - matmul_unipolar = tvm.compute(oshape, lambda x, y: tvm.sum( - (tvm.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & - data_packed[x, db, k].astype(out_dtype)) - - tvm.popcount(~weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & - data_packed[x, db, k].astype(out_dtype))) + weight_vec = te.compute(wvshape, lambda yo, ko, wb, vy, vk: + weight_packed[yo*VY+vy][wb][ko*VK+vk], name='weight_vec') + matmul_unipolar = te.compute(oshape, lambda x, y: te.sum( + (tvm.tir.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & + data_packed[x, db, k].astype(out_dtype)) - + tvm.tir.popcount(~weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & + data_packed[x, db, k].astype(out_dtype))) << (wb+db).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense_unipolar') - matmul = tvm.compute(oshape, lambda x, y: tvm.sum( - tvm.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & - data_packed[x, db, k].astype(out_dtype)) + matmul = te.compute(oshape, lambda x, y: te.sum( + tvm.tir.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & + data_packed[x, db, k].astype(out_dtype)) << (wb+db).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense') cfg.add_flop(batch * out_dim * in_dim * binary_op_multiplier(pack_dtype)) @@ -124,8 +125,8 @@ def schedule_bitserial_dense(cfg, outs): s: Schedule The computation schedule for bitserial_dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(cfg, s, data_vec, weight_vec, output, unipolar): @@ -162,7 +163,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif op.tag == 'bitserial_dense' or 'bitserial_dense_unipolar': diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py index 2144d260c5b1a..72ed4da510bd8 100644 --- a/topi/python/topi/arm_cpu/conv2d.py +++ b/topi/python/topi/arm_cpu/conv2d.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm import tvm.contrib.nnpack @@ -27,9 +28,9 @@ from ..nn.util import get_const_int, get_pad_tuple from ..nn.winograd_util import winograd_transform_matrices from .conv2d_spatial_pack import conv2d_spatial_pack_nchw, \ - conv2d_spatial_pack_nhwc, \ - schedule_conv2d_spatial_pack_nchw, \ - schedule_conv2d_spatial_pack_nhwc + conv2d_spatial_pack_nhwc, \ + schedule_conv2d_spatial_pack_nchw, \ + schedule_conv2d_spatial_pack_nhwc @autotvm.register_topi_compute("conv2d_nchw_spatial_pack.arm_cpu") @@ -42,7 +43,7 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ @autotvm.register_topi_schedule("conv2d_nchw_spatial_pack.arm_cpu") def schedule_conv2d_nchw_spatial_pack(cfg, outs): """Create schedule for conv2d_nchw""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): # schedule conv2d @@ -59,7 +60,7 @@ def _callback(op): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec, @@ -79,7 +80,7 @@ def conv2d_nhwc_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ @autotvm.register_topi_schedule("conv2d_nhwc_spatial_pack.arm_cpu") def schedule_conv2d_nhwc_spatial_pack(cfg, outs): """Create schedule for conv2d_nhwc""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'spatial_conv_output_NHWC' in op.tag: @@ -100,7 +101,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.arm_cpu") def schedule_conv2d_nchw_winograd(cfg, outs): """Create schedule for conv2d_nchw_winograd""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_conv2d_output' in op.tag: @@ -136,8 +137,8 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til assert KH == 3 and KW == 3 and HSTR == 1 and WSTR == 1 data_pad = nn.pad(data, (0, 0, pt, pl), (0, 0, pb, pr), name="data_pad") - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod r = KW m = tile_size @@ -158,48 +159,48 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til VK = cfg['tile_k'].size[-1] # pack input tile - input_tile = tvm.compute((C, idxd(P, VP), alpha, alpha, VP), - lambda c, b, eps, nu, bb: - data_pad[idxd(b*VP + bb, nH*nW), c, - idxm(idxd(b*VP + bb, nW), nH) * m + eps, - idxm(b*VP + bb, nW) * m + nu], - name='d') + input_tile = te.compute((C, idxd(P, VP), alpha, alpha, VP), + lambda c, b, eps, nu, bb: + data_pad[idxd(b*VP + bb, nH*nW), c, + idxm(idxd(b*VP + bb, nW), nH) * m + eps, + idxm(b*VP + bb, nW) * m + nu], + name='d') # transform kernel if pre_computed: U = kernel else: - r_kh = tvm.reduce_axis((0, KH), 'r_kh') - r_kw = tvm.reduce_axis((0, KW), 'r_kw') - U = tvm.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk: - tvm.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) * - G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U') + r_kh = te.reduce_axis((0, KH), 'r_kh') + r_kw = te.reduce_axis((0, KW), 'r_kw') + U = te.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk: + te.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) * + G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U') # transform image - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - V = tvm.compute((alpha, alpha, idxd(P, VP), C, VP), lambda eps, nu, b, c, bb: - tvm.sum(input_tile[c][b][r_eps][r_nu][bb].astype(out_dtype) * - B[r_eps][eps] * B[r_nu][nu], axis=[r_eps, r_nu]), name='V') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + V = te.compute((alpha, alpha, idxd(P, VP), C, VP), lambda eps, nu, b, c, bb: + te.sum(input_tile[c][b][r_eps][r_nu][bb].astype(out_dtype) * + B[r_eps][eps] * B[r_nu][nu], axis=[r_eps, r_nu]), name='V') # batch gemm - c = tvm.reduce_axis((0, C), name='c') - M = tvm.compute((alpha, alpha, K, P), lambda eps, nu, k, b: - tvm.sum(U[eps][nu][idxd(k, VK)][c][idxm(k, VK)] * - V[eps][nu][idxd(b, VP)][c][idxm(b, VP)], axis=c), name='M') + c = te.reduce_axis((0, C), name='c') + M = te.compute((alpha, alpha, K, P), lambda eps, nu, k, b: + te.sum(U[eps][nu][idxd(k, VK)][c][idxm(k, VK)] * + V[eps][nu][idxd(b, VP)][c][idxm(b, VP)], axis=c), name='M') # inverse transform - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - Y = tvm.compute((K, P, m, m), lambda k, b, vh, vw: - tvm.sum(M[r_eps][r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], - axis=[r_eps, r_nu]), name='Y') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + Y = te.compute((K, P, m, m), lambda k, b, vh, vw: + te.sum(M[r_eps][r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], + axis=[r_eps, r_nu]), name='Y') # unpack output - output = tvm.compute((N, K, H, W), lambda n, k, h, w: - Y[k][n * nH * nW + idxd(h, m) * nW + idxd(w, m), - idxm(h, m), idxm(w, m)], - name='output', tag='winograd_conv2d_output') + output = te.compute((N, K, H, W), lambda n, k, h, w: + Y[k][n * nH * nW + idxd(h, m) * nW + idxd(w, m), + idxm(h, m), idxm(w, m)], + name='output', tag='winograd_conv2d_output') # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * K * H * W * KH * KW * C) @@ -220,7 +221,7 @@ def _schedule_winograd(cfg, s, output, last): s[d].compute_inline() # transform kernel - if isinstance(U.op, tvm.tensor.ComputeOp): + if isinstance(U.op, tvm.te.ComputeOp): kernel, G = U.op.input_tensors s[G].compute_inline() eps, nu, k, c, kk, = s[U].op.axis @@ -236,7 +237,7 @@ def _schedule_winograd(cfg, s, output, last): s[U].vectorize(kk) s[U].parallel(k) - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # transform image @@ -310,7 +311,7 @@ def conv2d_nchw_winograd_nnpack(cfg, data, kernel, strides, padding, dilation, o @autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack.arm_cpu") def schedule_conv2d_nchw_winograd_nnpack(cfg, outs): """Create schedule for conv2d_nchw_winograd_nnpack""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_nnpack_conv2d_output' in op.tag: @@ -348,7 +349,7 @@ def _conv2d_arm_cpu_winograd_nnpack( transformed_kernel = tvm.contrib.nnpack.convolution_inference_weight_transform( kernel, algorithm=cfg['winograd_nnpack_algorithm'].val) if autotvm.GLOBAL_SCOPE.in_tuning: - transformed_kernel = tvm.compute(transformed_kernel.shape, lambda *args: 0.0) + transformed_kernel = te.compute(transformed_kernel.shape, lambda *args: 0.0) with tvm.tag_scope("winograd_nnpack_conv2d_output"): output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( @@ -369,8 +370,8 @@ def _schedule_winograd_nnpack(cfg, s, output, last): (X, TK) = output.op.input_tensors[:2] # transform kernel - assert isinstance(TK.op, (tvm.tensor.ComputeOp, tvm.tensor.ExternOp, tvm.tensor.PlaceholderOp)) - if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, tvm.tensor.ComputeOp): + assert isinstance(TK.op, (te.tensor.ComputeOp, te.tensor.ExternOp, te.tensor.PlaceholderOp)) + if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, te.tensor.ComputeOp): # kernel transformation will be pre-computed during compilation, so we skip # this part to make tuning records correct s[TK].pragma(s[TK].op.axis[0], 'debug_skip_region') @@ -415,7 +416,7 @@ def conv2d_nchw_winograd_nnpack_without_weight_transform( @autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack_without_weight_transform.arm_cpu") def schedule_conv2d_nchw_winograd_nnpack_without_weight_transform(cfg, outs): """TOPI schedule callback""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_nnpack_conv2d_output' in op.tag: diff --git a/topi/python/topi/arm_cpu/conv2d_alter_op.py b/topi/python/topi/arm_cpu/conv2d_alter_op.py index bfbf5d6d62b09..3a22611ed1289 100644 --- a/topi/python/topi/arm_cpu/conv2d_alter_op.py +++ b/topi/python/topi/arm_cpu/conv2d_alter_op.py @@ -20,6 +20,7 @@ import logging import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -58,7 +59,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): data, kernel = tinfos out_dtype = out_type.dtype - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv if topi_tmpl == "conv2d_nchw_spatial_pack.arm_cpu": assert data_layout == "NCHW" and kernel_layout == "OIHW" @@ -69,7 +70,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OIHW%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nchw_spatial_pack.arm_cpu") @@ -86,7 +87,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OHWI%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), KH, KW, CI, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), KH, KW, CI, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nhwc_spatial_pack.arm_cpu") @@ -113,10 +114,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['tile_size'] = tile_size new_data = data - new_kernel = tvm.placeholder((KH + tile_size - 1, - KW + tile_size -1, - idxd(CO, VC), CI, VC), - kernel.dtype) + new_kernel = te.placeholder((KH + tile_size - 1, + KW + tile_size -1, + idxd(CO, VC), CI, VC), + kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], 'conv2d_nchw_winograd.arm_cpu') @@ -141,7 +142,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): out_dtype=weight_dtype) new_data = data - new_kernel = tvm.placeholder((CO, CI, 8, 8), "float32") + new_kernel = te.placeholder((CO, CI, 8, 8), "float32") new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, None, strides, padding, dilation, out_dtype], @@ -160,7 +161,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): # Store the same config for the altered operator (workload) new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "depthwise_conv2d_nchw_spatial_pack.arm_cpu") diff --git a/topi/python/topi/arm_cpu/conv2d_int8.py b/topi/python/topi/arm_cpu/conv2d_int8.py index 5d177fe76ab63..06412b656b4b7 100644 --- a/topi/python/topi/arm_cpu/conv2d_int8.py +++ b/topi/python/topi/arm_cpu/conv2d_int8.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,no-member """Conv2D int8 schedule on ARM""" - -import tvm +from tvm import te from tvm import autotvm from .. import tag from ..util import get_const_tuple @@ -55,8 +54,8 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, # If no config was set, we can fallback to NCHW config. if cfg.is_fallback: - _get_default_config(cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kh, kw), dtype=kernel.dtype), + _get_default_config(cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kh, kw), dtype=kernel.dtype), strides, padding, out_dtype) return nn.conv2d_NCHWc_int8_compute(data, kernel, @@ -71,7 +70,7 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, @autotvm.register_topi_schedule("conv2d_NCHWc_int8.arm_cpu") def schedule_conv2d_NCHWc_int8(cfg, outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -81,7 +80,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_NCHWc_int8' in op.tag: @@ -89,9 +88,9 @@ def traverse(op): kernel_vec = conv_out.op.input_tensors[1] data_vec = conv_out.op.input_tensors[0] data = data_vec.op.input_tensors[0] \ - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ + if isinstance(data_vec.op, te.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ else data_vec - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] diff --git a/topi/python/topi/arm_cpu/conv2d_spatial_pack.py b/topi/python/topi/arm_cpu/conv2d_spatial_pack.py index 032ac76ff6a22..3bb9dc73e2db8 100644 --- a/topi/python/topi/arm_cpu/conv2d_spatial_pack.py +++ b/topi/python/topi/arm_cpu/conv2d_spatial_pack.py @@ -18,6 +18,7 @@ """Conv2D spatial pack implementation for ARM CPU""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from .. import nn from ..util import get_const_tuple @@ -98,46 +99,46 @@ def conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, dilation, if dilation_h != 1 or dilation_w != 1: # undilate input data dvshape = (N, OH // VH, OW // VW, CI, KH, KW, VH, VW) - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, kh, kw, vh, vw: - data_pad[n][ci][(h*VH+vh)*HSTR+kh*dilation_h] - [(w*VW+vw)*WSTR+kw*dilation_w], - name='data_vec_undilated') + data_vec = te.compute(dvshape, lambda n, h, w, ci, kh, kw, vh, vw: + data_pad[n][ci][(h*VH+vh)*HSTR+kh*dilation_h] + [(w*VW+vw)*WSTR+kw*dilation_w], + name='data_vec_undilated') else: dvshape = (N, OH // VH, OW // VW, CI, VH*HSTR + KH-1, VW*WSTR + KW-1) - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw: - data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], - name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw: + data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], + name='data_vec') if pre_packed: kernel_vec = kernel else: - kernel_vec = tvm.compute(kvshape, lambda co, ci, kh, kw, vc: - kernel[co*VC+vc][ci][kh][kw], - name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc: + kernel[co*VC+vc][ci][kh][kw], + name='kernel_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + ci = te.reduce_axis((0, CI), name='ci') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') if dilation_h != 1 or dilation_w != 1: - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, ci, kh, kw, vh, vw].astype(out_dtype) * - kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), - axis=[ci, kh, kw]), name='conv') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, ci, kh, kw, vh, vw].astype(out_dtype) * + kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), + axis=[ci, kh, kw]), name='conv') else: - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, ci, vh*HSTR+kh, vw*WSTR+kw].astype(out_dtype) * - kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), - axis=[ci, kh, kw]), name='conv') - - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - - output = tvm.compute(oshape, lambda n, co, h, w: - conv[n, - idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), - idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], - name='output_unpack', tag='spatial_conv2d_output') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, ci, vh*HSTR+kh, vw*WSTR+kw].astype(out_dtype) * + kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), + axis=[ci, kh, kw]), name='conv') + + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + + output = te.compute(oshape, lambda n, co, h, w: + conv[n, + idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), + idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], + name='output_unpack', tag='spatial_conv2d_output') return output def schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec, @@ -216,7 +217,7 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_ dilated_kernel_w = (KW - 1) * dilation_w + 1 pad_top, pad_left, pad_down, pad_right = \ - get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w)) + get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w)) HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) OH = (IH + pad_top + pad_down - dilated_kernel_h) // HSTR + 1 @@ -257,40 +258,41 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_ if dilation_h != 1 or dilation_w != 1: # undilate input data dvshape = (N, OHO, OWO, KH, KW, IC, OHI, OWI) - data_vec = tvm.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, owi: - data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h] - [(owo*OWI+owi)*WSTR+kw*dilation_w][ic], - name='data_vec_undilated') + data_vec = te.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, owi: + data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h] + [(owo*OWI+owi)*WSTR+kw*dilation_w][ic], + name='data_vec_undilated') else: dvshape = (N, OHO, OWO, KH + (OHI-1)*HSTR, KW + (OWI-1)*WSTR, IC) - data_vec = tvm.compute(dvshape, lambda n, oho, owo, ohi, owi, ic: - data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic], - name='data_vec') - kernel_vec = tvm.compute(kvshape, lambda oco, kh, kw, ic, oci: \ - kernel[kh][kw][ic][oco*OCI+oci], - name='kernel_vec') + data_vec = te.compute(dvshape, lambda n, oho, owo, ohi, owi, ic: + data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic], + name='data_vec') + kernel_vec = te.compute(kvshape, lambda oco, kh, kw, ic, oci: \ + kernel[kh][kw][ic][oco*OCI+oci], + name='kernel_vec') - ic = tvm.reduce_axis((0, IC), name='ic') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + ic = te.reduce_axis((0, IC), name='ic') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') if dilation_h != 1 or dilation_w != 1: - conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ - tvm.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) * - kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), - axis=[ic, kh, kw]), name='conv') + conv = te.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ + te.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) * + kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), + axis=[ic, kh, kw]), name='conv') else: - conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ - tvm.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, ic].astype(out_dtype) * - kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), - axis=[ic, kh, kw]), name='conv') - - idiv = tvm.indexdiv - imod = tvm.indexmod - output = tvm.compute(oshape, lambda n, oho, owo, oc: - conv[n][idiv(oho, OHI)][idiv(owo, OWI)][idiv(oc, OCI)]\ - [imod(oho, OHI)][imod(owo, OWI)][imod(oc, OCI)], - name='output_unpack', tag='spatial_conv_output_NHWC') + conv = te.compute( + ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ + te.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, ic].astype(out_dtype) * + kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), + axis=[ic, kh, kw]), name='conv') + + idiv = tvm.tir.indexdiv + imod = tvm.tir.indexmod + output = te.compute(oshape, lambda n, oho, owo, oc: + conv[n][idiv(oho, OHI)][idiv(owo, OWI)][idiv(oc, OCI)]\ + [imod(oho, OHI)][imod(owo, OWI)][imod(oc, OCI)], + name='output_unpack', tag='spatial_conv_output_NHWC') return output def schedule_conv2d_spatial_pack_nhwc(cfg, s, op, output): diff --git a/topi/python/topi/arm_cpu/conv2d_transpose.py b/topi/python/topi/arm_cpu/conv2d_transpose.py index 93ff02900f37c..7eaa5eeb7c90e 100644 --- a/topi/python/topi/arm_cpu/conv2d_transpose.py +++ b/topi/python/topi/arm_cpu/conv2d_transpose.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from ..nn import dilate, pad, get_pad_tuple @@ -31,10 +32,10 @@ def conv2d_transpose_nchw(cfg, Input, Filter, strides, padding, out_dtype): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, num_filter, filter_height, filter_width] strides : tuple of two ints @@ -48,7 +49,7 @@ def conv2d_transpose_nchw(cfg, Input, Filter, strides, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2) @@ -105,31 +106,31 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n ovshape = (N, CO // VC, OH // VH, OW // VW, VH, VW, VC) oshape = (N, CO, OH, OW) - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw: - data_pad[n][ci][h*VH + vh][w*VW + vw], - name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw: + data_pad[n][ci][h*VH + vh][w*VW + vw], + name='data_vec') - kernel_vec = tvm.compute(kvshape, lambda co, ci, kh, kw, vc: - kernel[ci][co*VC+vc][kh][kw], - name='kernel_vec_conv2d_transpose') + kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc: + kernel[ci][co*VC+vc][kh][kw], + name='kernel_vec_conv2d_transpose') - ci = tvm.reduce_axis((0, CI), name='ci') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + ci = te.reduce_axis((0, CI), name='ci') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, ci, vh + kh, vw + kw].astype(out_dtype) * - kernel_vec[co, ci, KH - 1 - kh, KW - 1 - kw, vc].astype(out_dtype), - axis=[ci, kh, kw]), name='conv') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, ci, vh + kh, vw + kw].astype(out_dtype) * + kernel_vec[co, ci, KH - 1 - kh, KW - 1 - kw, vc].astype(out_dtype), + axis=[ci, kh, kw]), name='conv') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - output = tvm.compute(oshape, lambda n, co, h, w: - conv[n, - idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), - idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], - name='output_unpack', tag='spatial_conv2d_transpose_output') + output = te.compute(oshape, lambda n, co, h, w: + conv[n, + idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), + idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], + name='output_unpack', tag='spatial_conv2d_transpose_output') return output @@ -137,7 +138,7 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n @autotvm.register_topi_schedule("conv2d_transpose_nchw.arm_cpu") def schedule_conv2d_transpose_nchw(cfg, outs): """Schedule conv2d transpose for arm cpu""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'spatial_conv2d_transpose_output' in op.tag: @@ -155,7 +156,7 @@ def _callback(op): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec, diff --git a/topi/python/topi/arm_cpu/depthwise_conv2d.py b/topi/python/topi/arm_cpu/depthwise_conv2d.py index 8d668f3e91881..5214972b255e6 100644 --- a/topi/python/topi/arm_cpu/depthwise_conv2d.py +++ b/topi/python/topi/arm_cpu/depthwise_conv2d.py @@ -18,6 +18,7 @@ """Depthwise convolution schedule for ARM CPU""" import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -48,8 +49,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(cfg, s, data, data_pad, kernel, output): A, B, C = data, kernel, output @@ -129,7 +130,7 @@ def _callback(op): kernel = op.input_tensors[1] data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] _schedule(cfg, s, data, data_pad, kernel, output) @@ -147,10 +148,10 @@ def depthwise_conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dila cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, multiplier, filter_height, filter_width] or pre-packed 5-D with shape [num_filter_chunk, multiplier, filter_height, filter_width, num_filter_block] @@ -169,7 +170,7 @@ def depthwise_conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dila Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ @@ -179,8 +180,8 @@ def depthwise_conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dila @autotvm.register_topi_schedule("depthwise_conv2d_nchw_spatial_pack.arm_cpu") def schedule_depthwise_conv2d_nchw_spatial_pack(cfg, outs): """Create the schedule for depthwise_conv2d_nchw_spatial_pack""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'spatial_depthwise_conv2d_nchw_output': @@ -192,7 +193,7 @@ def _callback(op): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, outs[0]) @@ -284,50 +285,50 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_dtype, if dilation_h != 1 or dilation_w != 1: # undilate input data dvshape = (N, OH // VH, OW // VW, C, KH, KW, VH, VW) - data_vec = tvm.compute(dvshape, lambda n, h, w, c, kh, kw, vh, vw: - data_pad[n][c][(h * VH + vh) * HSTR + kh * dilation_h] - [(w*VW+vw)*WSTR+kw*dilation_w], - name='data_vec_undilated') + data_vec = te.compute(dvshape, lambda n, h, w, c, kh, kw, vh, vw: + data_pad[n][c][(h * VH + vh) * HSTR + kh * dilation_h] + [(w*VW+vw)*WSTR+kw*dilation_w], + name='data_vec_undilated') else: dvshape = (N, OH // VH, OW // VW, C, VH*HSTR + KH-1, VW*WSTR + KW-1) - data_vec = tvm.compute(dvshape, lambda n, h, w, c, vh, vw: - data_pad[n][c][h * VH * HSTR + vh][w * VW * WSTR + vw], - name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, c, vh, vw: + data_pad[n][c][h * VH * HSTR + vh][w * VW * WSTR + vw], + name='data_vec') if pre_packed: kernel_vec = kernel else: - kernel_vec = tvm.compute(kvshape, lambda co, m, kh, kw, vc: - kernel[co*VC+vc][m][kh][kw], - name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, m, kh, kw, vc: + kernel[co*VC+vc][m][kh][kw], + name='kernel_vec') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod if dilation_h != 1 or dilation_w != 1: - conv = tvm.compute( + conv = te.compute( ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, idxdiv(co * VC + vc, M), kh, kw, vh, vw] - .astype(out_dtype) * - kernel_vec[idxdiv(co, M), idxmod(co, M), kh, kw, vc].astype(out_dtype), - axis=[kh, kw]), name='depthwise_conv') + te.sum(data_vec[n, h, w, idxdiv(co * VC + vc, M), kh, kw, vh, vw] + .astype(out_dtype) * + kernel_vec[idxdiv(co, M), idxmod(co, M), kh, kw, vc].astype(out_dtype), + axis=[kh, kw]), name='depthwise_conv') else: - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, idxdiv((co * VC + vc), M), vh * HSTR + kh, - vw * WSTR + kw].astype(out_dtype) * - kernel_vec[idxdiv(co, M), - idxmod(co, M), - kh, kw, vc].astype(out_dtype), - axis=[kh, kw]), name='depthwise_conv') - - output = tvm.compute(oshape, lambda n, co, h, w: - conv[n, - idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), - idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], - name='output_unpack', tag='spatial_depthwise_conv2d_nchw_output') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, idxdiv((co * VC + vc), M), vh * HSTR + kh, + vw * WSTR + kw].astype(out_dtype) * + kernel_vec[idxdiv(co, M), + idxmod(co, M), + kh, kw, vc].astype(out_dtype), + axis=[kh, kw]), name='depthwise_conv') + + output = te.compute(oshape, lambda n, co, h, w: + conv[n, + idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), + idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], + name='output_unpack', tag='spatial_depthwise_conv2d_nchw_output') return output def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, @@ -343,10 +344,10 @@ def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, data_pad = data_vec.op.input_tensors[0] if data_pad.op.name == "data_pad": - assert isinstance(data_pad.op, tvm.tensor.ComputeOp) + assert isinstance(data_pad.op, tvm.te.ComputeOp) has_padding = True else: - assert isinstance(data_pad.op, tvm.tensor.PlaceholderOp) + assert isinstance(data_pad.op, tvm.te.PlaceholderOp) has_padding = False cfg.define_knob('data_pad_inline', [0, 1, 2, 3, 4]) diff --git a/topi/python/topi/arm_cpu/injective.py b/topi/python/topi/arm_cpu/injective.py index 644a7e3fb5233..696b708958253 100644 --- a/topi/python/topi/arm_cpu/injective.py +++ b/topi/python/topi/arm_cpu/injective.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable """Schedule for pooling operators""" import tvm +from tvm import te from ..util import is_empty_shape def schedule_injective_from_existing(sch, out): @@ -58,14 +59,14 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) x = outs[0] if list(s[x].op.axis): # do not vectorize for broadcast (io, ii) = s[x].split(list(s[x].op.axis)[-1], 8) s[x].vectorize(ii) - tvm.schedule.AutoInlineInjective(s) + tvm.te.schedule.AutoInlineInjective(s) if not is_empty_shape(x.shape): schedule_injective_from_existing(s, x) @@ -85,10 +86,10 @@ def schedule_concatenate(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) x = outs[0] - tvm.schedule.AutoInlineInjective(s) + tvm.te.schedule.AutoInlineInjective(s) if len(s[x].op.axis) >= 4: fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2]) s[x].parallel(fused) diff --git a/topi/python/topi/arm_cpu/tensor_intrin.py b/topi/python/topi/arm_cpu/tensor_intrin.py index 2f300a18e117b..7a656b8af0404 100644 --- a/topi/python/topi/arm_cpu/tensor_intrin.py +++ b/topi/python/topi/arm_cpu/tensor_intrin.py @@ -18,6 +18,7 @@ """Conv2D int8 schedule on ARM""" import tvm +from tvm import te def dot_int8_int8_int32(int32_lanes, dtype='uint'): """ @@ -57,27 +58,27 @@ def dot_int8_int8_int32(int32_lanes, dtype='uint'): """ num_int8_elements = 4 # 4 int8 elements in int32 - data = tvm.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data') - kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel') + data = te.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data') + kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int32_lanes,), - lambda i: tvm.sum(data[k].astype('%s32' % dtype) * - kernel[i, k].astype('%s32' % dtype), - axis=k), name="C") + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int32_lanes,), + lambda i: te.sum(data[k].astype('%s32' % dtype) * + kernel[i, k].astype('%s32' % dtype), + axis=k), name="C") - a_buffer = tvm.decl_buffer(data.shape, dtype='%s8' % dtype, name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='%s8' % dtype, name="b_buffer", - offset_factor=1, - strides=[tvm.var('s'), 1]) + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='%s8' % dtype, name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='%s8' % dtype, name="b_buffer", + offset_factor=1, + strides=[te.var('s'), 1]) def _intrin_func(ins, outs): def _instr(index): ib = tvm.ir_builder.create() if index == 1: - ib.emit(outs[0].vstore(0, tvm.const(0, '%s32x%d' % (dtype, int32_lanes)))) + ib.emit(outs[0].vstore(0, tvm.tir.const(0, '%s32x%d' % (dtype, int32_lanes)))) return ib.get() dtype_a = '%s8x%d' % (dtype, num_int8_elements) @@ -85,21 +86,21 @@ def _instr(index): dtype_c = '%s32x%d' % (dtype, int32_lanes) a_int8 = ins[0].vload([0], dtype_a) - re_int32 = tvm.call_pure_intrin('%s32' % dtype, 'reinterpret', a_int8) + re_int32 = tvm.tir.call_pure_intrin('%s32' % dtype, 'reinterpret', a_int8) # broadcast a vec_ai32 = re_int32.astype(dtype_c) - vec_a = tvm.call_pure_intrin(dtype_b, 'reinterpret', vec_ai32) + vec_a = tvm.tir.call_pure_intrin(dtype_b, 'reinterpret', vec_ai32) vec_b = ins[1].vload([0, 0], dtype_b) vec_c = outs[0].vload([0], dtype_c) inst = 'udot' if dtype == 'uint' else 'sdot' inst = 'llvm.aarch64.neon.%s.v%di32.v%di8' % ( inst, int32_lanes, int32_lanes * num_int8_elements) - vdot = tvm.call_llvm_intrin(dtype_c, - inst, - tvm.const(2, 'uint32'), - vec_c, vec_a, vec_b) + vdot = tvm.tir.call_llvm_intrin(dtype_c, + inst, + tvm.tir.const(2, 'uint32'), + vec_c, vec_a, vec_b) ib.emit(outs[0].vstore(0, vdot)) return ib.get() diff --git a/topi/python/topi/bifrost/conv2d.py b/topi/python/topi/bifrost/conv2d.py index 816024ebdb25c..92e874afa2a54 100644 --- a/topi/python/topi/bifrost/conv2d.py +++ b/topi/python/topi/bifrost/conv2d.py @@ -19,6 +19,7 @@ """conv2d schedule on ARM Mali (Bifrost) GPU""" import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -41,10 +42,10 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height, filter_width, num_filter_block] @@ -63,7 +64,7 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, @@ -87,7 +88,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): s: Schedule The computation schedule for conv2d """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): # schedule conv2d @@ -104,7 +105,7 @@ def _callback(op): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec) @@ -125,12 +126,12 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): BW, TW, VW = cfg["tile_ow"].size # schedule padding - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data s[data_pad].compute_inline() # schedule data packing - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated': + if isinstance(data_vec.op, te.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated': _, h, w, ci, _, _, vh, vw = s[data_vec].op.axis else: _, h, w, ci, vh, vw = s[data_vec].op.axis @@ -140,7 +141,7 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): if vw.dom.extent.value < max_unroll: s[data_vec].unroll(vw) - if isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and kernel_vec.name == 'kernel_vec': + if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec': if autotvm.GLOBAL_SCOPE.in_tuning: # kernel packing will be pre-computed during compilation, so we skip # this part to make tuning records correct @@ -151,8 +152,8 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): fused = s[kernel_vec].fuse(co, ci, kh, kw, vc) fused, vec = s[kernel_vec].split(fused, VC) bb, tt = s[kernel_vec].split(fused, max_threads) - s[kernel_vec].bind(bb, tvm.thread_axis("blockIdx.x")) - s[kernel_vec].bind(tt, tvm.thread_axis("threadIdx.x")) + s[kernel_vec].bind(bb, te.thread_axis("blockIdx.x")) + s[kernel_vec].bind(tt, te.thread_axis("threadIdx.x")) if VC in vec_size: s[kernel_vec].vectorize(vec) @@ -193,7 +194,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.bifrost") def schedule_conv2d_nchw_winograd(cfg, outs): - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_conv2d_output' in op.tag: @@ -210,7 +211,7 @@ def _decl_winograd_kernel_transform(kernel, tile_size, G): Parameters ---------- - kernel : tvm.Tensor + kernel : tvm.te.Tensor The kernel to transform tile_size : int @@ -218,7 +219,7 @@ def _decl_winograd_kernel_transform(kernel, tile_size, G): Returns ------- - U : tvm.Tensor + U : tvm.te.Tensor Transformed kernel """ @@ -238,22 +239,22 @@ def upround(x, align): # Padded Kernel [K_round, C, KH, KW] # Pad the number of kernels to multiple of ALIGN - padded_kernel = tvm.compute((K_round, C, KH, KW), - lambda k, c, h, w: - tvm.if_then_else(k < K, - kernel[k][c][h][w], - tvm.const(0, out_dtype)), - name='padded_kernel') + padded_kernel = te.compute((K_round, C, KH, KW), + lambda k, c, h, w: + tvm.tir.if_then_else(k < K, + kernel[k][c][h][w], + tvm.tir.const(0, out_dtype)), + name='padded_kernel') # U [alpha, alpha, K_round, C] # Perform the kernel transform - r_kh = tvm.reduce_axis((0, KH), 'r_kh') - r_kw = tvm.reduce_axis((0, KW), 'r_kw') - U = tvm.compute((alpha, alpha, K_round, C), - lambda eps, nu, k, c: - tvm.sum(padded_kernel[k][c][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), - name='U') + r_kh = te.reduce_axis((0, KH), 'r_kh') + r_kw = te.reduce_axis((0, KW), 'r_kw') + U = te.compute((alpha, alpha, K_round, C), + lambda eps, nu, k, c: + te.sum(padded_kernel[k][c][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), + name='U') return U @@ -307,10 +308,10 @@ def upround(x, align): cfg.define_knob("data_transform_wgy", [1, 2, 4, 8, 16, 32, 64]) # Pack input tile - input_tile = tvm.compute((N, C, H + 2, W + 2), - lambda n, c, h, w: - data_pad[n][c][h][w], - name='d') + input_tile = te.compute((N, C, H + 2, W + 2), + lambda n, c, h, w: + data_pad[n][c][h][w], + name='d') if pre_computed: U = kernel @@ -319,33 +320,33 @@ def upround(x, align): # V [alpha * alpha, C, P_round) # Perform the image transform - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - V = tvm.compute((alpha * alpha, C, P_round), - lambda epsnu, c, b: - tvm.sum(input_tile[b // (nH*nW)][c][b // nW % nH * m + r_eps][b % nW * m +r_nu]\ - * B[r_eps][epsnu // alpha] * B[r_nu][epsnu % alpha], - axis=[r_eps, r_nu]), - name='V') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + V = te.compute((alpha * alpha, C, P_round), + lambda epsnu, c, b: + te.sum(input_tile[b // (nH*nW)][c][b // nW % nH * m + r_eps][b % nW * m +r_nu]\ + * B[r_eps][epsnu // alpha] * B[r_nu][epsnu % alpha], + axis=[r_eps, r_nu]), + name='V') # Winograd GEMM is a wrapper around batched GEMM to convert U to a 3D Tensor _, M = decl_winograd_gemm(cfg, U, V) # Y [K, P, m, m] # Winograd output transform - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - Y = tvm.compute((K, P, m, m), lambda k, b, vh, vw: - tvm.sum(M[r_eps * alpha + r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], - axis=[r_eps, r_nu]), name='Y') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + Y = te.compute((K, P, m, m), lambda k, b, vh, vw: + te.sum(M[r_eps * alpha + r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], + axis=[r_eps, r_nu]), name='Y') # Output [N, K, H, W] # Unpack back to NCHW format # The last term ensures alignment is not lost to bound inference - output = tvm.compute((N, K, H, W), lambda n, k, h, w: - Y[k][n * nH * nW + (h//m) * nW + w//m][h % m][w % m] - + tvm.const(0, out_dtype) * M[(alpha*alpha)-1][K_round-1][P_round-1], - name='output', tag='winograd_conv2d_output') + output = te.compute((N, K, H, W), lambda n, k, h, w: + Y[k][n * nH * nW + (h//m) * nW + w//m][h % m][w % m] + + tvm.tir.const(0, out_dtype) * M[(alpha*alpha)-1][K_round-1][P_round-1], + name='output', tag='winograd_conv2d_output') return output @@ -363,7 +364,7 @@ def _schedule_winograd(cfg, s, op): d, B = s[V].op.input_tensors data_pad = s[d].op.input_tensors[0] - if isinstance(U.op, tvm.tensor.ComputeOp): + if isinstance(U.op, tvm.te.ComputeOp): padded_kernel, G = s[U].op.input_tensors kernel = s[padded_kernel].op.input_tensors[0] s[G].compute_inline() @@ -390,7 +391,7 @@ def _schedule_winograd(cfg, s, op): yo, xo, yi, xi = tile_and_bind(s, U, k, c, 1, 4) # Dilation - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # Pad data @@ -485,7 +486,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): data, kernel = tinfos out_dtype = out_type.dtype - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv if topi_tmpl == "conv2d_nchw_spatial_pack.bifrost": assert data_layout == "NCHW" and kernel_layout == "OIHW" @@ -496,7 +497,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OIHW%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nchw_spatial_pack.bifrost") @@ -519,7 +520,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['tile_size'] = tile_size new_data = data - new_kernel = tvm.placeholder( + new_kernel = te.placeholder( (KH + tile_size - 1, KW + tile_size -1, CO, CI), kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], diff --git a/topi/python/topi/bifrost/dense.py b/topi/python/topi/bifrost/dense.py index 2a85db753226a..710484235fbbc 100644 --- a/topi/python/topi/bifrost/dense.py +++ b/topi/python/topi/bifrost/dense.py @@ -16,10 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable """dense schedule on ARM Mali Biforst GPU""" - -from __future__ import absolute_import as _abs - -import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -47,8 +44,8 @@ def schedule_dense(cfg, outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -79,10 +76,10 @@ def _callback(op): by, ty, yi = cfg['tile_y'].apply(s, output, y) bx, tx, xi = cfg['tile_x'].apply(s, output, x) - s[output].bind(by, tvm.thread_axis('blockIdx.y')) - s[output].bind(bx, tvm.thread_axis('blockIdx.x')) - s[output].bind(ty, tvm.thread_axis('threadIdx.y')) - s[output].bind(tx, tvm.thread_axis('threadIdx.x')) + s[output].bind(by, te.thread_axis('blockIdx.y')) + s[output].bind(bx, te.thread_axis('blockIdx.x')) + s[output].bind(ty, te.thread_axis('threadIdx.y')) + s[output].bind(tx, te.thread_axis('threadIdx.x')) if cfg['tile_y'].size[-1] < max_unroll: s[output].unroll(yi) @@ -108,6 +105,6 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): axis = axis or s[tensor].op.axis fused = s[tensor].fuse(*axis) bx, tx = s[tensor].split(fused, num_thread) - s[tensor].bind(bx, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(tx, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(bx, te.thread_axis("blockIdx.x")) + s[tensor].bind(tx, te.thread_axis("threadIdx.x")) return bx, tx diff --git a/topi/python/topi/bifrost/depthwise_conv2d.py b/topi/python/topi/bifrost/depthwise_conv2d.py index 4f7b0db7f95f4..7a96705c5a2a2 100644 --- a/topi/python/topi/bifrost/depthwise_conv2d.py +++ b/topi/python/topi/bifrost/depthwise_conv2d.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import util from .. import tag @@ -38,8 +39,8 @@ def schedule_depthwise_conv2d_nchw(outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(pad_data, kernel, conv): raw_data = s[pad_data].op.input_tensors[0] @@ -55,12 +56,12 @@ def tile_and_bind3d(tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None): zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) return zo, zi, yo, yi, xo, xi # set tunable parameters @@ -115,7 +116,7 @@ def traverse(op): if op.tag == 'depthwise_conv2d_nchw': pad_data = op.input_tensors[0] kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() conv = op.output(0) _schedule(pad_data, kernel, conv) diff --git a/topi/python/topi/bifrost/gemm.py b/topi/python/topi/bifrost/gemm.py index cc6cf09de4ce1..3dc0108820948 100644 --- a/topi/python/topi/bifrost/gemm.py +++ b/topi/python/topi/bifrost/gemm.py @@ -16,9 +16,6 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument """GEMM schedules for Mali Bifrost""" - -import tvm - from .transforms import tile_and_bind, tile_and_bind3d, interleave_transpose, \ transpose_interleave from .. import util @@ -31,15 +28,15 @@ def decl_gemm(cfg, A, B): cfg : Config Schedule configuration - A : tvm.Tensor + A : tvm.te.Tensor 2D Tensor, shape [n, k] - B : tvm.Tensor + B : tvm.te.Tensor 2D Tensor, shape [k, m] Returns ------- - C : tvm.Tensor + C : tvm.te.Tensor 2D Tensor, shape [n, m] """ @@ -60,35 +57,35 @@ def decl_gemm(cfg, A, B): if unroll_gemm == 1: # No unrolling case must have the same set of tensors to keep scheduling consistent # Create identity tensors to take the place of A_unrolled, B_unrolled and R - A_unrolled = tvm.compute((n, k_size), lambda i, j: A[i, j], name="A_unrolled") - B_unrolled = tvm.compute((k_size, m), lambda i, j: B[i, j], name="B_unrolled") + A_unrolled = te.compute((n, k_size), lambda i, j: A[i, j], name="A_unrolled") + B_unrolled = te.compute((k_size, m), lambda i, j: B[i, j], name="B_unrolled") # Declare standard GEMM - k = tvm.reduce_axis((0, A.shape[1]), name='k') - C = tvm.compute((n, m), lambda i, j: - tvm.sum(A_unrolled[i, k] * B_unrolled[k, j], axis=k), name='C') + k = te.reduce_axis((0, A.shape[1]), name='k') + C = te.compute((n, m), lambda i, j: + te.sum(A_unrolled[i, k] * B_unrolled[k, j], axis=k), name='C') - R = tvm.compute((n, m), lambda i, j: C[i, j], name="R") + R = te.compute((n, m), lambda i, j: C[i, j], name="R") else: unrolled_k_size = k_size // unroll_gemm # Unroll the two input matrices along the shared k axis - A_unrolled = tvm.compute((unroll_gemm, n, unrolled_k_size), lambda b, i, j: - A[i][unrolled_k_size * b + j], name='A_unrolled') + A_unrolled = te.compute((unroll_gemm, n, unrolled_k_size), lambda b, i, j: + A[i][unrolled_k_size * b + j], name='A_unrolled') - B_unrolled = tvm.compute((unroll_gemm, unrolled_k_size, m), lambda b, i, j: - B[unrolled_k_size * b + i][j], name='B_unrolled') + B_unrolled = te.compute((unroll_gemm, unrolled_k_size, m), lambda b, i, j: + B[unrolled_k_size * b + i][j], name='B_unrolled') # Declare a batched GEMM - k = tvm.reduce_axis((0, unrolled_k_size), name='k') - C = tvm.compute((unroll_gemm, n, m), lambda b, i, j: - tvm.sum(A_unrolled[b][i][k] * B_unrolled[b][k][j], axis=k), name='C') + k = te.reduce_axis((0, unrolled_k_size), name='k') + C = te.compute((unroll_gemm, n, m), lambda b, i, j: + te.sum(A_unrolled[b][i][k] * B_unrolled[b][k][j], axis=k), name='C') # Then declare a reduction to reduce the sub matrices - k = tvm.reduce_axis((0, unroll_gemm), name='k') - R = tvm.compute((n, m), lambda i, j: - tvm.sum(C[k][i][j], axis=k), name='R') + k = te.reduce_axis((0, unroll_gemm), name='k') + R = te.compute((n, m), lambda i, j: + te.sum(C[k][i][j], axis=k), name='R') return R @@ -99,15 +96,15 @@ def decl_batched_gemm(cfg, A, B): cfg : Config Schedule configuration - A : tvm.Tensor + A : tvm.te.Tensor 3D Tensor, shape [b, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 3D Tensor, shape [b, k, m] Returns ------- - C : tvm.Tensor + C : tvm.te.Tensor 3D Tensor, shape [b, n, m] """ @@ -127,9 +124,9 @@ def decl_batched_gemm(cfg, A, B): b_size = util.get_const_int(A.shape[0]) # Declare a batched GEMM - k = tvm.reduce_axis((0, k_size), name='k') - C = tvm.compute((b_size, n, m), lambda b, i, j: - tvm.sum(A[b][i][k] * B[b][k][j], axis=k), name='C') + k = te.reduce_axis((0, k_size), name='k') + C = te.compute((b_size, n, m), lambda b, i, j: + te.sum(A[b][i][k] * B[b][k][j], axis=k), name='C') return C @@ -143,10 +140,10 @@ def decl_winograd_gemm(cfg, A, B): cfg : Config Schedule configuration - A : tvm.Tensor + A : tvm.te.Tensor 4D Tensor, shape [a, a, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 4D Tensor, shape [a * a, k, m] Returns @@ -157,8 +154,8 @@ def decl_winograd_gemm(cfg, A, B): n = util.get_const_int(A.shape[2]) k = util.get_const_int(A.shape[3]) - A_3D = tvm.compute((alpha * alpha, n, k), lambda b, i, j: - A[b // alpha][b % alpha][i][j], name='A_3D') + A_3D = te.compute((alpha * alpha, n, k), lambda b, i, j: + A[b // alpha][b % alpha][i][j], name='A_3D') C = decl_batched_gemm(cfg, A_3D, B) return A_3D, C @@ -171,16 +168,16 @@ def schedule_gemm(cfg, s, A, B, C, batched=False, schedule_transforms=True): cfg : Config Schedule configuration - s : tvm.schedule.Schedule + s : tvm.te.schedule.Schedule Operator schedule - A : tvm.Tensor + A : tvm.te.Tensor 2D/3D Tensor, shape [n, k]/[b, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 2D/3D Tensor, shape [k, m]/[b, k, m] - C : tvm.Tensor + C : tvm.te.Tensor 2D/3D Tensor, shape [n, m]/[b, n, m] batched : bool @@ -287,19 +284,19 @@ def schedule_unrollable_gemm(cfg, s, A, B, C, R): cfg : Config Schedule configuration - s : tvm.schedule.Schedule + s : tvm.te.schedule.Schedule Operator schedule - A : tvm.Tensor + A : tvm.te.Tensor 2D/3D Tensor, shape [n, k]/[b, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 2D/3D Tensor, shape [k, m]/[b, k, m] - C : tvm.Tensor + C : tvm.te.Tensor 2D/3D Tensor, shape [n, m]/[b, n, m] - R : tvm.Tensor + R : tvm.te.Tensor 2D Tensor, shape [n, m] """ @@ -340,21 +337,21 @@ def get_unrollable_gemm_ops(R): Parameters ---------- - R : tvm.Tensor + R : tvm.te.Tensor Reduced tensor, final stage of GEMM Returns ------- - A_unrolled : tvm.Tensor + A_unrolled : tvm.te.Tensor Matrix A unrolled along k - B_unrolled: tvm.Tensor + B_unrolled: tvm.te.Tensor Matrix B unrolled along k - C : tvm.Tensor + C : tvm.te.Tensor Result of batched GEMM - R : tvm.Tensor + R : tvm.te.Tensor Reduction of C, result of unrollable GEMM """ diff --git a/topi/python/topi/bifrost/transforms.py b/topi/python/topi/bifrost/transforms.py index d7fc292f0adec..3feb4e6c87595 100644 --- a/topi/python/topi/bifrost/transforms.py +++ b/topi/python/topi/bifrost/transforms.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te def fuse_and_bind(s, tensor, axis=None, num_thread=None): """Fuse all the axis and bind to GPU threads""" @@ -26,18 +27,18 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): fused = s[tensor].fuse(*axis) max_threads = tvm.target.Target.current(allow_none=False).max_num_threads bx, tx = s[tensor].split(fused, num_thread or max_threads) - s[tensor].bind(bx, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(tx, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(bx, te.thread_axis("blockIdx.x")) + s[tensor].bind(tx, te.thread_axis("threadIdx.x")) return bx, tx def tile_and_bind(s, tensor, y, x, y_factor, x_factor=None): """Tile and bind to GPU threads""" x_factor = x_factor or y_factor yo, xo, yi, xi = s[tensor].tile(y, x, y_factor, x_factor) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) return yo, xo, yi, xi def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None): @@ -47,12 +48,12 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) return zo, yo, xo, zi, yi, xi def pack_tensor(s, tensor, factor, readers): diff --git a/topi/python/topi/broadcast.py b/topi/python/topi/broadcast.py index ba39c9aed35b2..39b2841da8540 100644 --- a/topi/python/topi/broadcast.py +++ b/topi/python/topi/broadcast.py @@ -27,7 +27,7 @@ def broadcast_to(data, shape): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input data shape : list or tuple @@ -35,7 +35,7 @@ def broadcast_to(data, shape): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return _cpp.broadcast_to(data, shape) @@ -45,14 +45,14 @@ def add(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -64,14 +64,14 @@ def subtract(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -83,14 +83,14 @@ def multiply(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -102,14 +102,14 @@ def divide(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -121,14 +121,14 @@ def floor_divide(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -140,14 +140,14 @@ def mod(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -159,14 +159,14 @@ def floor_mod(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -178,14 +178,14 @@ def maximum(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -197,14 +197,14 @@ def minimum(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -216,14 +216,14 @@ def power(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -235,14 +235,14 @@ def left_shift(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -254,14 +254,14 @@ def right_shift(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -273,14 +273,14 @@ def greater(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -292,14 +292,14 @@ def less(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -311,14 +311,14 @@ def equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -330,14 +330,14 @@ def not_equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -349,14 +349,14 @@ def greater_equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -368,14 +368,14 @@ def less_equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -387,14 +387,14 @@ def logical_and(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -406,14 +406,14 @@ def logical_or(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -425,14 +425,14 @@ def bitwise_and(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -444,14 +444,14 @@ def bitwise_or(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -463,14 +463,14 @@ def bitwise_xor(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -482,11 +482,11 @@ def logical_not(data): Parameters ---------- - data : tvm.Tensor or Expr + data : tvm.te.Tensor or Expr Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if the operand are Expr. Otherwise returns Tensor. """ @@ -498,11 +498,11 @@ def bitwise_not(data): Parameters ---------- - data : tvm.Tensor or Expr + data : tvm.te.Tensor or Expr Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if the operand are Expr. Otherwise returns Tensor. """ diff --git a/topi/python/topi/cuda/batch_matmul.py b/topi/python/topi/cuda/batch_matmul.py index e293c7ad41e88..bf801820d25a3 100644 --- a/topi/python/topi/cuda/batch_matmul.py +++ b/topi/python/topi/cuda/batch_matmul.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,too-many-locals,unused-variable """cuda batch_matmul operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm.contrib import cublas from ..util import traverse_inline, get_const_tuple, get_max_power2_factor @@ -35,8 +34,8 @@ def schedule_batch_matmul(outs): s: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(op): C = op.output(0) @@ -60,13 +59,13 @@ def _schedule(op): x_nthreads = min(x_bn, 8) ty, yi = s[C].split(y, nparts=y_nthreads) tx, xi = s[C].split(x, nparts=x_nthreads) - thread_x = tvm.thread_axis((0, x_nthreads), "threadIdx.x") - thread_y = tvm.thread_axis((0, y_nthreads), "threadIdx.y") + thread_x = te.thread_axis((0, x_nthreads), "threadIdx.x") + thread_y = te.thread_axis((0, y_nthreads), "threadIdx.y") s[C].reorder(b, by, bx, ty, tx, yi, xi) - s[C].bind(b, tvm.thread_axis("blockIdx.z")) - s[C].bind(by, tvm.thread_axis("blockIdx.y")) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) + s[C].bind(b, te.thread_axis("blockIdx.z")) + s[C].bind(by, te.thread_axis("blockIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) s[C].bind(ty, thread_y) s[C].bind(tx, thread_x) s[C].pragma(yi, "auto_unroll_max_step", 16) @@ -111,15 +110,15 @@ def batch_matmul_cublas(x, y): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ return cublas.batch_matmul(x, y, False, True) diff --git a/topi/python/topi/cuda/conv1d.py b/topi/python/topi/cuda/conv1d.py index 56918e2bbba2b..d6150c0455f46 100644 --- a/topi/python/topi/cuda/conv1d.py +++ b/topi/python/topi/cuda/conv1d.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Compute definition for conv1d with cuda backend""" -import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -52,8 +52,8 @@ def schedule_conv1d_ncw(cfg, outs): s : Schedule The computation schedule for conv1d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv1d_ncw': @@ -79,7 +79,7 @@ def _callback(op): ##### space definition end ##### if isinstance(kernel.op, - tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -103,14 +103,14 @@ def _callback(op): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, bx, vn, vf, vx, tn, tf, tx, ni, fi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads n_tz = cfg["tile_n"].size[2] * cfg["tile_f"].size[2] @@ -131,8 +131,8 @@ def _callback(op): fused = s[load].fuse(f, x) tz, fused = s[load].split(fused, nparts=n_tz) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) @@ -177,8 +177,8 @@ def schedule_conv1d_nwc(cfg, outs): s : Schedule The computation schedule for conv1d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv1d_nwc': @@ -204,7 +204,7 @@ def _callback(op): ##### space definition end ##### if isinstance(kernel.op, - tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -228,14 +228,14 @@ def _callback(op): bf, vf, tf, fi = cfg["tile_f"].apply(s, output, f) s[output].reorder(bn, bx, bf, vn, vx, vf, tn, tx, tf, ni, xi, fi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bx, tvm.thread_axis("blockIdx.y")) - s[output].bind(bf, tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - - s[output].bind(tf, tvm.thread_axis("threadIdx.x")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bx, te.thread_axis("blockIdx.y")) + s[output].bind(bf, te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + + s[output].bind(tf, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tf) # number of threads n_tz = cfg["tile_n"].size[2] * cfg["tile_x"].size[2] @@ -256,8 +256,8 @@ def _callback(op): fused = s[load].fuse(x, f) tz, fused = s[load].split(fused, nparts=n_tz) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/conv1d_transpose_ncw.py b/topi/python/topi/cuda/conv1d_transpose_ncw.py index 4802a0d144a3f..cf1b66cc5202b 100644 --- a/topi/python/topi/cuda/conv1d_transpose_ncw.py +++ b/topi/python/topi/cuda/conv1d_transpose_ncw.py @@ -18,6 +18,7 @@ """Conv1d transpose template for cuda backend""" import tvm +from tvm import te from tvm import autotvm from .. import nn from ..util import get_const_tuple, traverse_inline @@ -30,9 +31,9 @@ def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype): ---------- cfg: ConfigEntity The config for this template - Input : tvm.Tensor + Input : tvm.te.Tensor 3-D with shape [batch, in_channel, inp_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 3-D with shape [in_channel, num_filter, kernel_size] stride : tuple of one int The spatial stride along width @@ -45,7 +46,7 @@ def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor u 3-D with shape [batch, out_channel, out_width] """ if isinstance(stride, (tuple, list)): @@ -58,21 +59,21 @@ def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype): pad_left = kernel_size - 1 - pad_left pad_right = kernel_size - 1 - pad_right dilated_width = stride * (inp_width - 1) + 1 - data = tvm.compute( + data = te.compute( (batch, inp_channels, pad_left + dilated_width + pad_right), - lambda n, c, x: tvm.if_then_else( - tvm.all(x >= pad_left, - x < pad_left + dilated_width, - tvm.indexmod(x - pad_left, stride).equal(0)), - data[n, c, tvm.indexdiv(x - pad_left, stride)], - tvm.const(0., "float32")), + lambda n, c, x: tvm.tir.if_then_else( + tvm.tir.all(x >= pad_left, + x < pad_left + dilated_width, + tvm.tir.indexmod(x - pad_left, stride).equal(0)), + data[n, c, tvm.tir.indexdiv(x - pad_left, stride)], + tvm.tir.const(0., "float32")), name='data_pad') - dc = tvm.reduce_axis((0, inp_channels), name='dc') - dw = tvm.reduce_axis((0, kernel_size), name='dw') - data_out = tvm.compute( + dc = te.reduce_axis((0, inp_channels), name='dc') + dw = te.reduce_axis((0, kernel_size), name='dw') + data_out = te.compute( (batch, out_channels, out_width), - lambda b, c, w: tvm.sum( + lambda b, c, w: te.sum( data[b, dc, w + dw].astype(out_dtype) * kernel[dc, c, kernel_size - 1 - dw].astype(out_dtype), axis=[dc, dw]), tag="conv1d_transpose_ncw") @@ -97,8 +98,8 @@ def schedule_conv1d_transpose_ncw(cfg, outs): s: Schedule The computation schedule for conv1d transpose. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv1d_transpose_ncw': @@ -123,7 +124,7 @@ def _callback(op): ##### space definition end ##### - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -147,14 +148,14 @@ def _callback(op): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, bx, vn, vf, vx, tn, tf, tx, ni, fi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads n_tz = cfg["tile_n"].size[2] * cfg["tile_f"].size[2] @@ -175,8 +176,8 @@ def _callback(op): fused = s[load].fuse(f, x) tz, fused = s[load].split(fused, nparts=n_tz) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) diff --git a/topi/python/topi/cuda/conv2d.py b/topi/python/topi/cuda/conv2d.py index e1ada325ea631..c24789307340e 100644 --- a/topi/python/topi/cuda/conv2d.py +++ b/topi/python/topi/cuda/conv2d.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Compute definition for conv2d with cuda backend""" -import tvm +from tvm import te from tvm import autotvm from tvm.contrib import cudnn @@ -35,8 +35,8 @@ def conv2d_nchw(cfg, data, kernel, strides, padding, dilation, out_dtype='float3 @autotvm.register_topi_schedule("conv2d_nchw.cuda") def schedule_conv2d_nchw(cfg, outs): """Create the schedule for conv2d_nchw""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv2d_nchw': @@ -55,8 +55,8 @@ def _callback(op): # # @autotvm.register_topi_schedule("conv2d_nhwc.cuda") # def schedule_conv2d_nhwc(cfg, outs): -# outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs -# s = tvm.create_schedule([x.op for x in outs]) +# outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +# s = te.create_schedule([x.op for x in outs]) # # def _callback(op): # if op.tag == 'conv2d_nhwc': diff --git a/topi/python/topi/cuda/conv2d_alter_op.py b/topi/python/topi/cuda/conv2d_alter_op.py index f3e4f4c3b3c92..b59827136c702 100644 --- a/topi/python/topi/cuda/conv2d_alter_op.py +++ b/topi/python/topi/cuda/conv2d_alter_op.py @@ -19,6 +19,7 @@ import logging import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -70,10 +71,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): ic_block_factor = oc_block_factor = 4 # Store the same config for the altered operator (workload) - new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), - dtype=data.dtype) - new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW, \ - oc_block_factor, ic_block_factor), dtype=kernel.dtype) + new_data = te.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), + dtype=data.dtype) + new_kernel = te.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW, \ + oc_block_factor, ic_block_factor), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_layout, out_dtype], "conv2d_NCHWc_int8.cuda") @@ -100,8 +101,8 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): # Store the same config for the altered operator (workload) new_data = data - new_weight = tvm.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO), - dtype=kernel.dtype) + new_weight = te.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO), + dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_weight, strides, padding, dilation, out_dtype], "conv2d_nchw_winograd_without_weight_transform.cuda") @@ -122,11 +123,11 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): ic_block_factor = oc_block_factor = 4 # Store the same config for the altered operator (workload) - new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), - dtype=data.dtype) - new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups, - KH, KW, oc_block_factor, ic_block_factor), - dtype=kernel.dtype) + new_data = te.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), + dtype=data.dtype) + new_kernel = te.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups, + KH, KW, oc_block_factor, ic_block_factor), + dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, groups, out_dtype], "group_conv2d_NCHWc_int8.cuda") diff --git a/topi/python/topi/cuda/conv2d_direct.py b/topi/python/topi/cuda/conv2d_direct.py index 2fab8cf122536..db6bff2e9289c 100644 --- a/topi/python/topi/cuda/conv2d_direct.py +++ b/topi/python/topi/cuda/conv2d_direct.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """The templates for cuda conv2d operators""" import tvm +from tvm import te from tvm import autotvm from ..util import get_const_tuple @@ -50,7 +51,7 @@ def schedule_direct_cuda(cfg, s, conv): pad_data, kernel = s[conv].op.input_tensors s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -74,15 +75,15 @@ def schedule_direct_cuda(cfg, s, conv): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -104,9 +105,9 @@ def schedule_direct_cuda(cfg, s, conv): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/conv2d_hwcn.py b/topi/python/topi/cuda/conv2d_hwcn.py index b0925ae93a163..e45083f53c5e2 100644 --- a/topi/python/topi/cuda/conv2d_hwcn.py +++ b/topi/python/topi/cuda/conv2d_hwcn.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, too-many-locals, too-many-statements, unused-argument """Schedule for conv2d_hwcn with auto fusion""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity @@ -44,8 +45,8 @@ def schedule_conv2d_hwcn(cfg, outs): s: Schedule The computation schedule for conv2d_hwcn. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - sch = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + sch = te.create_schedule([x.op for x in outs]) def schedule(Apad, W, B): """Schedule conv2d_hwcn""" sch[Apad].compute_inline() @@ -93,13 +94,13 @@ def schedule(Apad, W, B): bx, txz, tx, ni = cfg['tile_ni'].apply(sch, Out, ni) sch[Out].reorder(bz, by, bx, tyz, txz, ty, tx, fi, ni) - sch[Out].bind(bz, tvm.thread_axis('blockIdx.z')) - sch[Out].bind(by, tvm.thread_axis('blockIdx.y')) - sch[Out].bind(bx, tvm.thread_axis('blockIdx.x')) - sch[Out].bind(tyz, tvm.thread_axis('vthread')) - sch[Out].bind(txz, tvm.thread_axis('vthread')) - sch[Out].bind(ty, tvm.thread_axis('threadIdx.y')) - sch[Out].bind(tx, tvm.thread_axis('threadIdx.x')) + sch[Out].bind(bz, te.thread_axis('blockIdx.z')) + sch[Out].bind(by, te.thread_axis('blockIdx.y')) + sch[Out].bind(bx, te.thread_axis('blockIdx.x')) + sch[Out].bind(tyz, te.thread_axis('vthread')) + sch[Out].bind(txz, te.thread_axis('vthread')) + sch[Out].bind(ty, te.thread_axis('threadIdx.y')) + sch[Out].bind(tx, te.thread_axis('threadIdx.x')) # Schedule BL local write sch[BL].compute_at(sch[Out], tx) @@ -121,8 +122,8 @@ def schedule(Apad, W, B): tx, ni = sch[AA].split(ni, nparts=cfg['tile_ni'].size[2]) _, ni = sch[AA].split(ni, factor=4) sch[AA].reorder(ty, tx, yi, xi, ci, ni) - sch[AA].bind(ty, tvm.thread_axis('threadIdx.y')) - sch[AA].bind(tx, tvm.thread_axis('threadIdx.x')) + sch[AA].bind(ty, te.thread_axis('threadIdx.y')) + sch[AA].bind(tx, te.thread_axis('threadIdx.x')) sch[AA].vectorize(ni) # Schedule for W's shared memory load yi, xi, ci, fi = sch[WW].op.axis @@ -130,8 +131,8 @@ def schedule(Apad, W, B): tx, fi = sch[WW].split(fi, nparts=cfg['tile_ni'].size[2]) _, fi = sch[WW].split(fi, factor=4) sch[WW].reorder(ty, tx, yi, xi, ci, fi) - sch[WW].bind(ty, tvm.thread_axis('threadIdx.y')) - sch[WW].bind(tx, tvm.thread_axis('threadIdx.x')) + sch[WW].bind(ty, te.thread_axis('threadIdx.y')) + sch[WW].bind(tx, te.thread_axis('threadIdx.x')) sch[WW].vectorize(fi) scheduled_ops = [] @@ -142,12 +143,12 @@ def traverse(operator): if operator not in sch.outputs: sch[operator].compute_inline() for tensor in operator.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) elif operator.tag == 'conv2d_hwcn': Apad = operator.input_tensors[0] W = operator.input_tensors[1] - if isinstance(W.op, tvm.tensor.ComputeOp) and 'dilate' in W.op.tag: + if isinstance(W.op, tvm.te.ComputeOp) and 'dilate' in W.op.tag: sch[W].compute_inline() B = operator.output(0) schedule(Apad, W, B) diff --git a/topi/python/topi/cuda/conv2d_int8.py b/topi/python/topi/cuda/conv2d_int8.py index 53a7bd9fa8492..ad97fa68d6aa1 100644 --- a/topi/python/topi/cuda/conv2d_int8.py +++ b/topi/python/topi/cuda/conv2d_int8.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """Int8 conv2d in NCHWc layout""" import tvm +from tvm import te from tvm import autotvm from .injective import schedule_injective_from_existing @@ -35,11 +36,11 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] or 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or 6-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, num_filter_block, in_channel_block] @@ -61,7 +62,7 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ assert layout in ["NCHW", "NCHW4c"] @@ -74,17 +75,17 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ assert channels % ic_block_factor == 0, \ "Number of input channels should be multiple of {}".format( ic_block_factor) - packed_data = tvm.compute((batch, channels // ic_block_factor, height, width, - ic_block_factor), - lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], - name="packed_data") + packed_data = te.compute((batch, channels // ic_block_factor, height, width, + ic_block_factor), + lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], + name="packed_data") out_channels, in_channels, kernel_h, kernel_w = get_const_tuple( kernel.shape) assert out_channels % 4 == 0, \ "Number of output channels should be multiple of {}".format( oc_block_factor) - packed_kernel = tvm.compute( + packed_kernel = te.compute( (out_channels // oc_block_factor, in_channels // ic_block_factor, kernel_h, kernel_w, oc_block_factor, ic_block_factor), lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: @@ -124,23 +125,23 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ oshape = (batch, oc_chunk, out_height, out_width, oc_block) - icc = tvm.reduce_axis((0, ic_chunk), name='ic_chunk') - icb = tvm.reduce_axis((0, ic_block), name='ic_block') - kh = tvm.reduce_axis((0, kernel_h), name='kh') - kw = tvm.reduce_axis((0, kernel_w), name='kw') + icc = te.reduce_axis((0, ic_chunk), name='ic_chunk') + icb = te.reduce_axis((0, ic_block), name='ic_block') + kh = te.reduce_axis((0, kernel_h), name='kh') + kw = te.reduce_axis((0, kernel_w), name='kw') - conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - tvm.sum(pad_data[n, icc, oh*stride_h+kh*dilation_h, \ - ow*stride_w+kw*dilation_w, icb] - .astype('int32') * - packed_kernel[oc_chunk, icc, - kh, kw, oc_block, icb] - .astype('int32'), - axis=[icc, kh, kw, icb])) + conv = te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + te.sum(pad_data[n, icc, oh*stride_h+kh*dilation_h, \ + ow*stride_w+kw*dilation_w, icb] + .astype('int32') * + packed_kernel[oc_chunk, icc, + kh, kw, oc_block, icb] + .astype('int32'), + axis=[icc, kh, kw, icb])) - output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), - tag="conv2d_NCHWc_int8") + output = te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), + tag="conv2d_NCHWc_int8") # num flop num_flop = batch * oc_chunk * oc_block * out_height * out_width * \ @@ -156,8 +157,8 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ @autotvm.register_topi_schedule("conv2d_NCHWc_int8.cuda") def schedule_conv2d_NCHWc_int8(cfg, outs): """Schedule conv2d int8 NCHWc template""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv2d_NCHWc_int8': @@ -171,7 +172,7 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): conv = output.op.input_tensors[0] packed_data, packed_kernel = conv.op.input_tensors - if isinstance(packed_data.op, tvm.tensor.ComputeOp) and "pad" in packed_data.op.tag: + if isinstance(packed_data.op, tvm.te.ComputeOp) and "pad" in packed_data.op.tag: pad_data = packed_data packed_data = pad_data.op.input_tensors[0] else: @@ -183,8 +184,8 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): s[packed_data].pragma(s[packed_data].op.axis[0], "debug_skip_region") s[packed_kernel].pragma(s[packed_kernel].op.axis[0], "debug_skip_region") else: - if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and\ - packed_kernel.name == 'packed_kernel': + if isinstance(packed_kernel.op, tvm.te.ComputeOp) and\ + packed_kernel.name == 'packed_kernel': # data and kernel are not pre-computed, schedule layout transform here schedule_injective_from_existing(s, packed_data) schedule_injective_from_existing(s, packed_kernel) @@ -219,20 +220,20 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, by, bx, vn, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(tyx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tyx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tyx) # number of threads @@ -240,9 +241,9 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tx) # number of threads @@ -285,9 +286,9 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) fused, tz = s[load].split(fused, factor=n_tz) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # double buffer cfg.define_knob('AA_double_buffer', [0, 1]) diff --git a/topi/python/topi/cuda/conv2d_transpose_nchw.py b/topi/python/topi/cuda/conv2d_transpose_nchw.py index 8751800c45171..17bd37d1fc5c8 100644 --- a/topi/python/topi/cuda/conv2d_transpose_nchw.py +++ b/topi/python/topi/cuda/conv2d_transpose_nchw.py @@ -18,6 +18,7 @@ """Conv2d transpose template for cuda backend""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity from .. import nn @@ -32,9 +33,9 @@ def conv2d_transpose_nchw(cfg, data, kernel, stride, padding, out_dtype): ---------- cfg: ConfigEntity The config for this template - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, num_filter, filter_height, filter_width] strides : tuple of two ints The spatial stride along height and width @@ -45,7 +46,7 @@ def conv2d_transpose_nchw(cfg, data, kernel, stride, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ batch, inp_channels, inp_height, inp_width = get_const_tuple(data.shape) @@ -56,42 +57,42 @@ def conv2d_transpose_nchw(cfg, data, kernel, stride, padding, out_dtype): padding, (kernel_height, kernel_width)) out_width = (inp_width - 1) * stride_width + \ - kernel_width - pad_left - pad_right + kernel_width - pad_left - pad_right pad_left = kernel_width - 1 - pad_left pad_right = kernel_width - 1 - pad_right dilated_width = stride_width * (inp_width - 1) + 1 out_height = (inp_height - 1) * stride_height + \ - kernel_height - pad_top - pad_bottom + kernel_height - pad_top - pad_bottom pad_top = kernel_height - 1 - pad_top pad_bottom = kernel_height - 1 - pad_bottom dilated_height = stride_height * (inp_height - 1) + 1 # compute pad - data = tvm.compute( + data = te.compute( (batch, inp_channels, pad_top + dilated_height + pad_bottom, pad_left + dilated_width + pad_right), - lambda n, c, y, x: tvm.if_then_else( - tvm.all(x >= pad_left, - x < pad_left + dilated_width, - tvm.indexmod(x - pad_left, stride_width).equal(0), - y >= pad_top, - y < pad_top + dilated_height, - tvm.indexmod(y - pad_top, stride_height).equal(0)), + lambda n, c, y, x: tvm.tir.if_then_else( + tvm.tir.all(x >= pad_left, + x < pad_left + dilated_width, + tvm.tir.indexmod(x - pad_left, stride_width).equal(0), + y >= pad_top, + y < pad_top + dilated_height, + tvm.tir.indexmod(y - pad_top, stride_height).equal(0)), data[n, c, - tvm.indexdiv(y - pad_top, stride_height), - tvm.indexdiv(x - pad_left, stride_width)], - tvm.const(0., "float32")), + tvm.tir.indexdiv(y - pad_top, stride_height), + tvm.tir.indexdiv(x - pad_left, stride_width)], + tvm.tir.const(0., "float32")), name='data_pad') # compute transposed conv - dc = tvm.reduce_axis((0, inp_channels), name='dc') - dh = tvm.reduce_axis((0, kernel_height), name='dh') - dw = tvm.reduce_axis((0, kernel_width), name='dw') - data_out = tvm.compute( + dc = te.reduce_axis((0, inp_channels), name='dc') + dh = te.reduce_axis((0, kernel_height), name='dh') + dw = te.reduce_axis((0, kernel_width), name='dw') + data_out = te.compute( (batch, out_channels, out_height, out_width), - lambda b, c, h, w: tvm.sum( + lambda b, c, h, w: te.sum( data[b, dc, h + dh, w + dw].astype(out_dtype) * kernel[dc, c, @@ -119,8 +120,8 @@ def schedule_conv2d_transpose_nchw(cfg, outs): s: Schedule The computation schedule for conv2d transpose. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _fallback_schedule(N, F, Y, X): # pylint: disable=unused-argument @@ -181,7 +182,7 @@ def _callback(op): ##### space definition end ##### - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -206,21 +207,21 @@ def _callback(op): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, by, bx, vn, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(s[output].fuse(ty, tx), tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(ty, tx), te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tyx) # number of threads @@ -228,9 +229,9 @@ def _callback(op): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads @@ -254,9 +255,9 @@ def _callback(op): tz, fused = s[load].split(fused, nparts=n_tz) ty, fused = s[load].split(fused, nparts=n_ty) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) diff --git a/topi/python/topi/cuda/conv2d_winograd.py b/topi/python/topi/cuda/conv2d_winograd.py index 6e09be97390cd..881f63aef781c 100644 --- a/topi/python/topi/cuda/conv2d_winograd.py +++ b/topi/python/topi/cuda/conv2d_winograd.py @@ -19,6 +19,7 @@ import logging import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -75,50 +76,50 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, out_dtype, # transform kernel if not pre_computed: - r_kh = tvm.reduce_axis((0, KH), name='r_kh') - r_kw = tvm.reduce_axis((0, KW), name='r_kw') - kernel_pack = tvm.compute((alpha, alpha, CI, CO), lambda eps, nu, ci, co: - tvm.sum(kernel[co][ci][r_kh][r_kw] * - G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), name='kernel_pack') + r_kh = te.reduce_axis((0, KH), name='r_kh') + r_kw = te.reduce_axis((0, KW), name='r_kw') + kernel_pack = te.compute((alpha, alpha, CI, CO), lambda eps, nu, ci, co: + te.sum(kernel[co][ci][r_kh][r_kw] * + G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), name='kernel_pack') else: kernel_pack = kernel - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod # pack input tile - input_tile = tvm.compute((CI, P, alpha, alpha), lambda c, p, eps, nu: - data_pad[idxdiv(p, (nH * nW))][c][idxmod(idxdiv(p, nW), nH) * m + eps] - [idxmod(p, nW) * m + nu], name='d') + input_tile = te.compute((CI, P, alpha, alpha), lambda c, p, eps, nu: + data_pad[idxdiv(p, (nH * nW))][c][idxmod(idxdiv(p, nW), nH) * m + eps] + [idxmod(p, nW) * m + nu], name='d') # transform data - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_a') - data_pack = tvm.compute((alpha, alpha, CI, P), lambda eps, nu, ci, p: - tvm.sum(input_tile[ci][p][r_a][r_b] * B[r_a][eps] * B[r_b][nu], - axis=[r_a, r_b]), name='data_pack') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_a') + data_pack = te.compute((alpha, alpha, CI, P), lambda eps, nu, ci, p: + te.sum(input_tile[ci][p][r_a][r_b] * B[r_a][eps] * B[r_b][nu], + axis=[r_a, r_b]), name='data_pack') # do batch gemm - ci = tvm.reduce_axis((0, CI), name='ci') - bgemm = tvm.compute((alpha, alpha, CO, P), lambda eps, nu, co, p: - tvm.sum(kernel_pack[eps][nu][ci][co] * - data_pack[eps][nu][ci][p], - axis=[ci]), name='bgemm') + ci = te.reduce_axis((0, CI), name='ci') + bgemm = te.compute((alpha, alpha, CO, P), lambda eps, nu, co, p: + te.sum(kernel_pack[eps][nu][ci][co] * + data_pack[eps][nu][ci][p], + axis=[ci]), name='bgemm') # inverse transform - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_a') - inverse = tvm.compute((CO, P, m, m), lambda co, p, vh, vw: - tvm.sum(bgemm[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], - axis=[r_a, r_b]), name='inverse') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_a') + inverse = te.compute((CO, P, m, m), lambda co, p, vh, vw: + te.sum(bgemm[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], + axis=[r_a, r_b]), name='inverse') # output - output = tvm.compute((N, CO, H, W), lambda n, co, h, w: - inverse[co, - n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), - idxmod(h, m), - idxmod(w, m)], - name='output', tag='conv2d_nchw_winograd') + output = te.compute((N, CO, H, W), lambda n, co, h, w: + inverse[co, + n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), + idxmod(h, m), + idxmod(w, m)], + name='output', tag='conv2d_nchw_winograd') cfg.add_flop(2 * N * CO * H * W * CI * KH * KW) return output @@ -147,8 +148,8 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused = s[data_pack].fuse(c, p) bb, tt = s[data_pack].split(fused, 128) s[data_pack].reorder(bb, tt, pi, eps, nu) - s[data_pack].bind(bb, tvm.thread_axis("blockIdx.x")) - s[data_pack].bind(tt, tvm.thread_axis("threadIdx.x")) + s[data_pack].bind(bb, te.thread_axis("blockIdx.x")) + s[data_pack].bind(tt, te.thread_axis("threadIdx.x")) s[data_l].compute_at(s[data_pack], pi) s[input_tile].compute_at(s[data_pack], pi) @@ -172,12 +173,12 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused = s[kernel_pack].fuse(ci, co) bb, tt = s[kernel_pack].split(fused, 128) s[kernel_pack].reorder(bb, tt, eps, nu, r_a, r_b) - s[kernel_pack].bind(bb, tvm.thread_axis("blockIdx.x")) - s[kernel_pack].bind(tt, tvm.thread_axis("threadIdx.x")) + s[kernel_pack].bind(bb, te.thread_axis("blockIdx.x")) + s[kernel_pack].bind(tt, te.thread_axis("threadIdx.x")) else: kernel = kernel_pack - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() ##### space definition begin ##### @@ -213,15 +214,15 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): bz, vz, tz, zi = cfg["tile_b"].apply(s, C, b) by, vy, ty, yi = cfg["tile_y"].apply(s, C, y) bx, vx, tx, xi = cfg["tile_x"].apply(s, C, x) - s[C].bind(bz, tvm.thread_axis("blockIdx.z")) - s[C].bind(by, tvm.thread_axis("blockIdx.y")) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(vz, tvm.thread_axis("vthread")) - s[C].bind(vy, tvm.thread_axis("vthread")) - s[C].bind(vx, tvm.thread_axis("vthread")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bz, te.thread_axis("blockIdx.z")) + s[C].bind(by, te.thread_axis("blockIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(vz, te.thread_axis("vthread")) + s[C].bind(vy, te.thread_axis("vthread")) + s[C].bind(vx, te.thread_axis("vthread")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) s[C].reorder(bgemm_scope, bz, by, bx, vz, vy, vx, tz, ty, tx, zi, yi, xi) # tile reduction axes @@ -241,9 +242,9 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused, tx = s[load].split(fused, cfg["tile_x"].size[2]) fused, ty = s[load].split(fused, cfg["tile_y"].size[2]) fused, tz = s[load].split(fused, cfg["tile_b"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[C].pragma(bgemm_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[C].pragma(bgemm_scope, 'unroll_explicit', cfg['unroll_explicit'].val) @@ -264,8 +265,8 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused = s[output].fuse(n, co, ho, wo) bb, tt = s[output].split(fused, 128) - s[output].bind(bb, tvm.thread_axis("blockIdx.x")) - s[output].bind(tt, tvm.thread_axis("threadIdx.x")) + s[output].bind(bb, te.thread_axis("blockIdx.x")) + s[output].bind(tt, te.thread_axis("threadIdx.x")) if OL is not None: s[OL].compute_at(s[output], tt) @@ -286,7 +287,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.cuda") def schedule_conv2d_nchw_winograd(cfg, outs): - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'conv2d_nchw_winograd' in op.tag: @@ -306,7 +307,7 @@ def conv2d_nchw_winograd_without_weight_transform(cfg, data, kernel, strides, @autotvm.register_topi_schedule("conv2d_nchw_winograd_without_weight_transform.cuda") def schedule_conv2d_nchw_winograd_without_weight_transform(cfg, outs): """TOPI schedule callback""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'conv2d_nchw_winograd' in op.tag: diff --git a/topi/python/topi/cuda/conv3d.py b/topi/python/topi/cuda/conv3d.py index 0a6a71ccc2f06..cc13aa511612e 100644 --- a/topi/python/topi/cuda/conv3d.py +++ b/topi/python/topi/cuda/conv3d.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Compute definition for conv3d with cuda backend""" -import tvm +from tvm import te from tvm import autotvm from tvm.contrib import cudnn @@ -34,10 +34,10 @@ def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, out_dtype='float cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] strides : int or a list/tuple of three ints @@ -54,7 +54,7 @@ def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, out_dtype='float Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ return nn.conv3d_ncdhw(data, kernel, strides, padding, dilation, out_dtype) @@ -78,8 +78,8 @@ def schedule_conv3d_ncdhw(cfg, outs): s: Schedule The computation schedule for conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv3d_ncdhw': @@ -96,10 +96,10 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype='float Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_depth, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of three ints @@ -113,7 +113,7 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype='float Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_depth, out_height, out_width, out_channel] """ return nn.conv3d_ndhwc(data, kernel, strides, padding, dilation, out_dtype) @@ -137,8 +137,8 @@ def schedule_conv3d_ndhwc(cfg, outs): s: Schedule The computation schedule for conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv3d_ndhwc': @@ -159,10 +159,10 @@ def conv3d_cudnn(cfg, data, kernel, strides, padding, dilation, layout='NCDHW', cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] strides : int or a list/tuple of three ints @@ -182,7 +182,7 @@ def conv3d_cudnn(cfg, data, kernel, strides, padding, dilation, layout='NCDHW', Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ if layout == 'NCDHW': diff --git a/topi/python/topi/cuda/conv3d_direct.py b/topi/python/topi/cuda/conv3d_direct.py index fa6c8781b5d3b..50b73d618995c 100644 --- a/topi/python/topi/cuda/conv3d_direct.py +++ b/topi/python/topi/cuda/conv3d_direct.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """The templates for cuda conv3d operators""" import tvm +from tvm import te from tvm import autotvm from ..util import get_const_tuple @@ -57,7 +58,7 @@ def schedule_direct_conv3d_cuda(cfg, s, conv, layout, workload_name): pad_data, kernel = s[conv].op.input_tensors s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -84,16 +85,16 @@ def schedule_direct_conv3d_cuda(cfg, s, conv, layout, workload_name): bf = s[output].fuse(n, bf) s[output].reorder(bf, bd, by, bx, vf, vd, vy, vx, tf, td, ty, tx, fi, di, yi, xi) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(s[output].fuse(bd, by), tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vd, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(s[output].fuse(td, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(s[output].fuse(bd, by), te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vd, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(s[output].fuse(td, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # tile reduction axes @@ -116,9 +117,9 @@ def schedule_direct_conv3d_cuda(cfg, s, conv, layout, workload_name): td, fused = s[load].split(fused, nparts=cfg["tile_d"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(s[load].fuse(td, ty), tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(s[load].fuse(td, ty), te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/deformable_conv2d.py b/topi/python/topi/cuda/deformable_conv2d.py index bdec4e120fe47..8c31835e3d6dc 100644 --- a/topi/python/topi/cuda/deformable_conv2d.py +++ b/topi/python/topi/cuda/deformable_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name,unused-argument """Schedule template of deformable conv2d with cuda backend""" import tvm +from tvm import te from tvm import autotvm from .. import nn from ..util import traverse_inline @@ -46,8 +47,8 @@ def schedule_deformable_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'deformable_conv2d_nchw': @@ -78,7 +79,7 @@ def _schedule_direct_cuda(cfg, s, conv): data_deform, kernel = s[conv].op.input_tensors s[data_deform].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -102,15 +103,15 @@ def _schedule_direct_cuda(cfg, s, conv): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -135,9 +136,9 @@ def _schedule_direct_cuda(cfg, s, conv): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/dense.py b/topi/python/topi/cuda/dense.py index 93797a4b49ba5..f5b6563fbf09b 100644 --- a/topi/python/topi/cuda/dense.py +++ b/topi/python/topi/cuda/dense.py @@ -16,9 +16,8 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Schedule for dense operator""" -from __future__ import absolute_import as _abs import logging -import tvm +from tvm import te import tvm.autotvm as autotvm from tvm.autotvm.task.space import SplitEntity from tvm.contrib import cublas @@ -45,9 +44,9 @@ def dense_cublas(cfg, data, weight, bias=None, out_dtype=None): matmul = cublas.matmul(data, weight, False, True) cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: - matmul = tvm.compute((batch, out_dim), - lambda i, j: matmul[i, j] + bias[j], - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), + lambda i, j: matmul[i, j] + bias[j], + tag=tag.BROADCAST) return matmul @@ -66,8 +65,8 @@ def dense_small_batch(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule("dense_small_batch.cuda") def schedule_dense_small_batch(cfg, outs): """Schedule float32/64 dense with small batch size""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -91,11 +90,11 @@ def _schedule_dense_small_batch(cfg, s, C): else: Out = s.outputs[0].output(0) s[C].compute_at(s[Out], s[Out].op.axis[1]) - s[Out].bind(s[Out].op.axis[0], tvm.thread_axis("blockIdx.y")) - s[Out].bind(s[Out].op.axis[1], tvm.thread_axis("blockIdx.x")) + s[Out].bind(s[Out].op.axis[0], te.thread_axis("blockIdx.y")) + s[Out].bind(s[Out].op.axis[1], te.thread_axis("blockIdx.x")) tx = s[C].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[C].bind(tx, thread_x) s[CF].compute_at(s[C], tx) s[C].set_store_predicate(thread_x.var.equal(0)) @@ -111,8 +110,8 @@ def dense_large_batch(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule("dense_large_batch.cuda") def schedule_dense_large_batch(cfg, outs): """Schedule float32/64 dense with large batch size""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -185,12 +184,12 @@ def _schedule_dense_large_batch(cfg, s, C): s[CC].compute_at(s[C], tx) # Binding - s[C].bind(by, tvm.thread_axis("blockIdx.y")) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tyz, tvm.thread_axis("vthread")) - s[C].bind(txz, tvm.thread_axis("vthread")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(by, te.thread_axis("blockIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tyz, te.thread_axis("vthread")) + s[C].bind(txz, te.thread_axis("vthread")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) # Split reduction yo, xo = CC.op.axis @@ -207,8 +206,8 @@ def _schedule_dense_large_batch(cfg, s, C): ty, _ = s[AA].split(s[AA].op.axis[0], nparts=num_thread_x) _, xi = s[AA].split(s[AA].op.axis[1], factor=num_thread_x * 4) tx, xi = s[AA].split(xi, nparts=num_thread_x) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) s[AA].double_buffer() # Schedule for B' shared memory load @@ -216,8 +215,8 @@ def _schedule_dense_large_batch(cfg, s, C): ty, _ = s[BB].split(s[BB].op.axis[0], nparts=num_thread_y) _, xi = s[BB].split(s[BB].op.axis[1], factor=num_thread_y * 4) tx, xi = s[BB].split(xi, nparts=num_thread_y) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].double_buffer() @@ -229,19 +228,19 @@ def dense_int8(cfg, data, weight, bias=None, out_dtype=None): batch, in_dim = get_const_tuple(data.shape) out_dim, _ = get_const_tuple(weight.shape) - k = tvm.reduce_axis((0, in_dim), name='k') + k = te.reduce_axis((0, in_dim), name='k') - matmul = tvm.compute((batch, out_dim), - lambda i, j: tvm.sum(data[i, k].astype(out_dtype) * - weight[j, k].astype(out_dtype), axis=[k]), - tag="dense_int8") + matmul = te.compute((batch, out_dim), + lambda i, j: te.sum(data[i, k].astype(out_dtype) * + weight[j, k].astype(out_dtype), axis=[k]), + tag="dense_int8") cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: - matmul = tvm.compute((batch, out_dim), - lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), + lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) cfg.add_flop(batch * out_dim) return matmul @@ -250,8 +249,8 @@ def dense_int8(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule("dense_int8.cuda") def schedule_dense_int8(cfg, outs): """Dense schedule for int8 on CUDA""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if "dense_int8" in op.tag: @@ -302,12 +301,12 @@ def _schedule_dense_int8(cfg, s, output): bx, vx, tx, xi = cfg['tile_x'].apply(s, output, x) s[output].reorder(by, bx, vy, vx, ty, tx, yi, xi) - s[output].bind(by, tvm.thread_axis('blockIdx.y')) - s[output].bind(bx, tvm.thread_axis('blockIdx.x')) - s[output].bind(vy, tvm.thread_axis('vthread')) - s[output].bind(vx, tvm.thread_axis('vthread')) - s[output].bind(ty, tvm.thread_axis('threadIdx.y')) - s[output].bind(tx, tvm.thread_axis('threadIdx.x')) + s[output].bind(by, te.thread_axis('blockIdx.y')) + s[output].bind(bx, te.thread_axis('blockIdx.x')) + s[output].bind(vy, te.thread_axis('vthread')) + s[output].bind(vx, te.thread_axis('vthread')) + s[output].bind(ty, te.thread_axis('threadIdx.y')) + s[output].bind(tx, te.thread_axis('threadIdx.x')) n_ty = cfg['tile_y'].size[2] n_tx = cfg['tile_x'].size[2] @@ -325,8 +324,8 @@ def _schedule_dense_int8(cfg, s, output): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) - s[load].bind(tx, tvm.thread_axis('threadIdx.x')) - s[load].bind(ty, tvm.thread_axis('threadIdx.y')) + s[load].bind(tx, te.thread_axis('threadIdx.x')) + s[load].bind(ty, te.thread_axis('threadIdx.y')) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', False) diff --git a/topi/python/topi/cuda/depthwise_conv2d.py b/topi/python/topi/cuda/depthwise_conv2d.py index 062f95f00eff5..de11dc6683691 100644 --- a/topi/python/topi/cuda/depthwise_conv2d.py +++ b/topi/python/topi/cuda/depthwise_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-argument """Schedule for depthwise_conv2d with auto fusion""" import tvm +from tvm import te from tvm import autotvm from ..util import traverse_inline from .. import tag @@ -43,8 +44,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'depthwise_conv2d_nchw': @@ -75,7 +76,7 @@ def _callback(op): ##### space definition end ##### s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -100,15 +101,15 @@ def _callback(op): kernel_scope, n = s[output].split(n, nparts=1) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -123,9 +124,9 @@ def _callback(op): fused, tx = s[load].split(fused, cfg["tile_x"].size[2]) fused, ty = s[load].split(fused, cfg["tile_y"].size[2]) fused, tz = s[load].split(fused, cfg["tile_f"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) @@ -147,8 +148,8 @@ def schedule_depthwise_conv2d_nhwc(outs): s: Schedule The computation schedule for depthwise_conv2d nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(temp, Filter, DepthwiseConv2d): s[temp].compute_inline() @@ -160,8 +161,8 @@ def _schedule(temp, Filter, DepthwiseConv2d): Output = outs[0].op.output(0) s[DepthwiseConv2d].set_scope("local") - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") b, h, w, c = s[Output].op.axis @@ -199,13 +200,13 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule depthwise_conv2d if OP.tag == 'depthwise_conv2d_nhwc': PaddedInput = OP.input_tensors[0] Filter = OP.input_tensors[1] - if isinstance(Filter.op, tvm.tensor.ComputeOp) and 'dilate' in Filter.op.tag: + if isinstance(Filter.op, tvm.te.ComputeOp) and 'dilate' in Filter.op.tag: s[Filter].compute_inline() DepthwiseConv2d = OP.output(0) _schedule(PaddedInput, Filter, DepthwiseConv2d) @@ -231,14 +232,14 @@ def schedule_depthwise_conv2d_backward_input_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt input with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Padded_out_grad, In_grad): s[Padded_out_grad].compute_inline() - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") _, h, w, c = In_grad.op.axis fused_hwc = s[In_grad].fuse(h, w, c) @@ -276,13 +277,13 @@ def schedule_depthwise_conv2d_backward_weight_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt weight with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Weight_grad): - block_x = tvm.thread_axis("blockIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_y = te.thread_axis("threadIdx.y") + thread_x = te.thread_axis("threadIdx.x") db, dh, dw = Weight_grad.op.reduce_axis diff --git a/topi/python/topi/cuda/group_conv2d_nchw.py b/topi/python/topi/cuda/group_conv2d_nchw.py index 5abf2985273c1..c5cf72b608436 100644 --- a/topi/python/topi/cuda/group_conv2d_nchw.py +++ b/topi/python/topi/cuda/group_conv2d_nchw.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """The template for cuda group_conv2d_nchw""" import tvm +from tvm import te from tvm import autotvm from .injective import schedule_injective_from_existing @@ -51,8 +52,8 @@ def schedule_group_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for group conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == "group_conv2d_nchw": @@ -115,21 +116,21 @@ def _schedule_group_conv2d_nchw_direct(cfg, s, conv): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bg, bf, by, bx, vn, vg, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(s[output].fuse(bg, bf), tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vg, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(s[output].fuse(bg, bf), te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vg, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(tyx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tyx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tyx) # number of threads @@ -137,9 +138,9 @@ def _schedule_group_conv2d_nchw_direct(cfg, s, conv): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads @@ -165,9 +166,9 @@ def _schedule_group_conv2d_nchw_direct(cfg, s, conv): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) fused, tz = s[load].split(fused, factor=n_tz) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) @@ -185,11 +186,11 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] or 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel // groups, filter_height, filter_width] or 6-D with shape [num_filter_chunk, in_channel_chunk // groups, filter_height, filter_width, num_filter_block, in_channel_block] @@ -211,7 +212,7 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_height, out_width, out_channel_block] """ ic_block_factor = 4 @@ -230,11 +231,11 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups assert out_channels % oc_block_factor == 0, \ "Number of output channels per group must divide {}".format(oc_block_factor) - packed_data = tvm.compute((batch, channels // ic_block_factor, height, width, - ic_block_factor), - lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], - name="packed_data") - packed_kernel = tvm.compute( + packed_data = te.compute((batch, channels // ic_block_factor, height, width, + ic_block_factor), + lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], + name="packed_data") + packed_kernel = te.compute( (out_channels // oc_block_factor, in_channels // ic_block_factor, kernel_h, kernel_w, oc_block_factor, ic_block_factor), lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: @@ -286,10 +287,10 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups oshape = (batch, oc_chunk, out_height, out_width, oc_block) - icc = tvm.reduce_axis((0, ic_chunk // groups), name='ic_chunk') - icb = tvm.reduce_axis((0, ic_block_factor), name='ic_block') - kh = tvm.reduce_axis((0, kernel_h), name='kh') - kw = tvm.reduce_axis((0, kernel_w), name='kw') + icc = te.reduce_axis((0, ic_chunk // groups), name='ic_chunk') + icb = te.reduce_axis((0, ic_block_factor), name='ic_block') + kh = te.reduce_axis((0, kernel_h), name='kh') + kw = te.reduce_axis((0, kernel_w), name='kw') # NOTE(kumasento): explanation of this snippet - # oc_chunk//groups and ic_chunk//groups give you the number of blocks, @@ -302,20 +303,20 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups # # Compared with a normal convolution, group convolution only sums # input channels from the group that an output channel resides in. - conv = tvm.compute( + conv = te.compute( oshape, lambda n, occ, oh, ow, ocb: - tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc, - oh*stride_h+kh*dilation_h, ow*stride_w+kw*dilation_w, icb] - .astype('int32') * - packed_kernel[occ, icc, kh, kw, ocb, icb].astype('int32'), - axis=[icc, kh, kw, icb])) + te.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc, + oh*stride_h+kh*dilation_h, ow*stride_w+kw*dilation_w, icb] + .astype('int32') * + packed_kernel[occ, icc, kh, kw, ocb, icb].astype('int32'), + axis=[icc, kh, kw, icb])) # Type conversion - output = tvm.compute(oshape, lambda *index: conv(*index).astype(out_dtype), - tag='group_conv2d_NCHWc_int8') + output = te.compute(oshape, lambda *index: conv(*index).astype(out_dtype), + tag='group_conv2d_NCHWc_int8') num_flop = batch * oc_chunk * oc_block * out_height * out_width * \ - ic_chunk * ic_block * kernel_h * kernel_w * 2 // groups + ic_chunk * ic_block * kernel_h * kernel_w * 2 // groups cfg.add_flop(num_flop) return output @@ -339,8 +340,8 @@ def schedule_group_conv2d_NCHWc_int8(cfg, outs): s: Schedule The computation schedule for group conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == "group_conv2d_NCHWc_int8": @@ -361,7 +362,7 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): conv = output.op.input_tensors[0] packed_data, packed_kernel = conv.op.input_tensors - if isinstance(packed_data.op, tvm.tensor.ComputeOp) and "pad" in packed_data.op.tag: + if isinstance(packed_data.op, tvm.te.ComputeOp) and "pad" in packed_data.op.tag: pad_data = packed_data packed_data = pad_data.op.input_tensors[0] else: @@ -374,7 +375,7 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): s[packed_kernel].pragma( s[packed_kernel].op.axis[0], "debug_skip_region") else: - if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and \ + if isinstance(packed_kernel.op, tvm.te.ComputeOp) and \ packed_kernel.name == 'packed_kernel': # data and kernel are not pre-computed, schedule layout transform here schedule_injective_from_existing(s, packed_data) @@ -407,7 +408,7 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): kernel_scope, n = s[output].split(n, nparts=1) g, f = s[output].split(f, nparts=groups) - s[output].bind(n, tvm.thread_axis('blockIdx.z')) + s[output].bind(n, te.thread_axis('blockIdx.z')) bn, vn, tn, ni = cfg["tile_n"].apply(s, output, n) bg, vg = cfg["tile_g"].apply(s, output, g) bf, vf, tf, fi = cfg["tile_f"].apply(s, output, f) @@ -416,20 +417,20 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): s[output].reorder(bn, bg, bf, by, bx, vn, vg, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(s[output].fuse(bg, bf), tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vg, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(s[output].fuse(bg, bf), te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vg, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(tyx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tyx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tyx) # number of threads @@ -437,10 +438,10 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tx) # number of threads @@ -476,9 +477,9 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) fused, tz = s[load].split(fused, factor=n_tz) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # double buffer cfg.define_knob('AA_double_buffer', [0, 1]) diff --git a/topi/python/topi/cuda/injective.py b/topi/python/topi/cuda/injective.py index 1690407a1602b..303fe5f7cc778 100644 --- a/topi/python/topi/cuda/injective.py +++ b/topi/python/topi/cuda/injective.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable, """Schedule for composition of injective operator""" import tvm +from tvm import te from .. import util def schedule_injective_from_existing(sch, out): @@ -56,12 +57,12 @@ def schedule_injective_from_existing(sch, out): xo, xi = sch[out].split(fused, factor=num_thread * max_block) bx, tx = sch[out].split(xi, factor=num_thread) sch[out].reorder(bx, tx, xo) - sch[out].bind(bx, tvm.thread_axis("blockIdx.x")) - sch[out].bind(tx, tvm.thread_axis("threadIdx.x")) + sch[out].bind(bx, te.thread_axis("blockIdx.x")) + sch[out].bind(tx, te.thread_axis("threadIdx.x")) else: bx, tx = sch[out].split(fused, factor=num_thread) - sch[out].bind(tx, tvm.thread_axis("threadIdx.x")) - sch[out].bind(bx, tvm.thread_axis("blockIdx.x")) + sch[out].bind(tx, te.thread_axis("threadIdx.x")) + sch[out].bind(bx, te.thread_axis("blockIdx.x")) return sch @@ -79,10 +80,10 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + tvm.te.schedule.AutoInlineInjective(s) for out in outs: if not util.is_empty_shape(out.shape): schedule_injective_from_existing(s, out) diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 27a52724fb2d2..2b3f38e79eeab 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -19,20 +19,20 @@ """Non-maximum suppression operator""" import math import tvm +from tvm import te -from tvm import api -from tvm.intrin import if_then_else +from tvm.tir import if_then_else from .sort import argsort from .. import tag def cuda_atomic_add_rule(op): if op.dtype == "float32": - return tvm.call_pure_extern("float32", "atomicAdd", op.args[0], op.args[1]) + return tvm.tir.call_pure_extern("float32", "atomicAdd", op.args[0], op.args[1]) if op.dtype == "float64": - return tvm.call_pure_extern("float64", "atomicAdd", op.args[0], op.args[1]) + return tvm.tir.call_pure_extern("float64", "atomicAdd", op.args[0], op.args[1]) if op.dtype == "int32": - return tvm.call_pure_extern("int32", "atomicAdd", op.args[0], op.args[1]) + return tvm.tir.call_pure_extern("int32", "atomicAdd", op.args[0], op.args[1]) raise RuntimeError("only support int32, float32 and float64") @@ -41,7 +41,7 @@ def cuda_atomic_add_rule(op): def atomic_add(x, y): - return tvm.call_pure_intrin(y.dtype, "atomic_add", x, y) + return tvm.tir.call_pure_intrin(y.dtype, "atomic_add", x, y) def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, score_index): @@ -86,22 +86,22 @@ def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, scor flag = ib.buffer_ptr(flag) atomic_add_return = ib.allocate( valid_count.dtype, (1,), name='atomic_add_return', scope='local') - one_count = tvm.const(1, dtype=valid_count.dtype) - score_threshold = tvm.make.node( + one_count = tvm.tir.const(1, dtype=valid_count.dtype) + score_threshold = tvm.ir.make_node( "FloatImm", dtype="float32", value=score_threshold) - id_index = tvm.make.node("IntImm", dtype="int32", value=id_index) - score_index = tvm.make.node("IntImm", dtype="int32", value=score_index) + id_index = tvm.ir.make_node("IntImm", dtype="int32", value=id_index) + score_index = tvm.ir.make_node("IntImm", dtype="int32", value=score_index) max_threads = int(tvm.target.Target.current( allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = batch_size * num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv # initialize valid_count with ib.if_scope(tid < batch_size): @@ -111,11 +111,12 @@ def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, scor flag[tid] = 0 with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) - with ib.if_scope(tvm.all(data[tid * elem_length + score_index] > score_threshold, - tvm.any(id_index < 0, data[tid * elem_length + id_index] >= 0))): + with ib.if_scope( + tvm.tir.all(data[tid * elem_length + score_index] > score_threshold, + tvm.tir.any(id_index < 0, data[tid * elem_length + id_index] >= 0))): flag[tid] = 1 - atomic_add_return[0] = atomic_add(tvm.call_pure_intrin("handle", "tvm_address_of", - valid_count[i]), one_count) + atomic_add_return[0] = atomic_add(tvm.tir.call_pure_intrin("handle", "tvm_address_of", + valid_count[i]), one_count) return ib.get() @@ -149,13 +150,13 @@ def flag_scan(flag, prefix_sum): allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = batch_size * num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod # initialize prefix_sum with ib.if_scope(tid < batch_size * num_anchors): @@ -202,7 +203,7 @@ def out_rewrite(data, flag, prefix_sum, valid_count, out): ib = tvm.ir_builder.create() - one = tvm.const(1, dtype=out.dtype) + one = tvm.tir.const(1, dtype=out.dtype) data = ib.buffer_ptr(data) flag = ib.buffer_ptr(flag) valid_count = ib.buffer_ptr(valid_count) @@ -213,20 +214,20 @@ def out_rewrite(data, flag, prefix_sum, valid_count, out): allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = batch_size * num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) j = idxm(tid, num_anchors) base_idx = i * num_anchors * elem_length - with ib.if_scope(tvm.all(flag[tid] > 0, prefix_sum[tid] >= 0, - prefix_sum[tid] < num_anchors)): + with ib.if_scope(tvm.tir.all(flag[tid] > 0, prefix_sum[tid] >= 0, + prefix_sum[tid] < num_anchors)): with ib.for_range(0, elem_length) as k: out[base_idx + prefix_sum[tid] * elem_length + k] = data[tid * elem_length + k] @@ -243,7 +244,7 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor Input data. 3-D tensor with shape [batch_size, num_anchors, elem_length]. score_threshold : optional, float @@ -257,28 +258,28 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Returns ------- - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. - out_tensor : tvm.Tensor + out_tensor : tvm.te.Tensor Rearranged data tensor. """ batch_size = data.shape[0] num_anchors = data.shape[1] - data_buf = api.decl_buffer( + data_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "data_buf", data_alignment=8) - valid_count_buf = api.decl_buffer( + valid_count_buf = tvm.tir.decl_buffer( (batch_size,), "int32", "valid_count_buf", data_alignment=8) - temp_flag_buf = api.decl_buffer( + temp_flag_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), "int32", "temp_flag", data_alignment=8) - temp_partial_buf = api.decl_buffer( + temp_partial_buf = tvm.tir.decl_buffer( (batch_size, num_anchors), "int32", "temp_partial", data_alignment=8) - out_buf = api.decl_buffer( + out_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "out_buf", data_alignment=8) valid_count, temp_flag = \ - tvm.extern([(batch_size,), (batch_size, num_anchors)], [data], - lambda ins, outs: get_valid_counts_ir( + te.extern([(batch_size,), (batch_size, num_anchors)], [data], + lambda ins, outs: get_valid_counts_ir( ins[0], outs[0], outs[1], score_threshold, id_index, score_index), dtype=["int32", "int32"], in_buffers=[data_buf], @@ -287,8 +288,8 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): tag="get_valid_counts_gpu") temp_partial = \ - tvm.extern([(batch_size, num_anchors)], [temp_flag], - lambda ins, outs: flag_scan( + te.extern([(batch_size, num_anchors)], [temp_flag], + lambda ins, outs: flag_scan( ins[0], outs[0]), dtype=["int32"], in_buffers=[temp_flag_buf], @@ -296,8 +297,8 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): name="flag_scan") out = \ - tvm.extern([data.shape], [data, temp_flag, temp_partial, valid_count], - lambda ins, outs: out_rewrite( + te.extern([data.shape], [data, temp_flag, temp_partial, valid_count], + lambda ins, outs: out_rewrite( ins[0], ins[1], ins[2], ins[3], outs[0]), dtype=[data.dtype], in_buffers=[data_buf, temp_flag_buf, @@ -357,16 +358,16 @@ def nms_ir(data, sorted_index, valid_count, out, box_indices, def calculate_overlap(out_tensor, box_a_idx, box_b_idx): """Calculate overlap of two boxes. """ - w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx])) - h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1])) + w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) + - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx])) + h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) + - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1])) i = w * h u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx]) * \ (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1]) + \ (out_tensor[box_b_idx + 2] - out_tensor[box_b_idx]) * \ (out_tensor[box_b_idx + 3] - out_tensor[box_b_idx + 1]) - i - return tvm.expr.Select(u <= 0.0, 0.0, i / u) + return tvm.tir.Select(u <= 0.0, 0.0, i / u) batch_size = data.shape[0] num_anchors = data.shape[1] @@ -386,27 +387,27 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) j = bx * max_threads + tx - iou_threshold = tvm.make.node( + iou_threshold = tvm.ir.make_node( "FloatImm", dtype="float32", value=iou_threshold) - top_k = tvm.make.node("IntImm", dtype="int32", value=top_k) - coord_start = tvm.make.node("IntImm", dtype="int32", value=coord_start) - id_index = tvm.make.node("IntImm", dtype="int32", value=id_index) - score_index = tvm.make.node("IntImm", dtype="int32", value=score_index) - force_suppress = tvm.make.node( + top_k = tvm.ir.make_node("IntImm", dtype="int32", value=top_k) + coord_start = tvm.ir.make_node("IntImm", dtype="int32", value=coord_start) + id_index = tvm.ir.make_node("IntImm", dtype="int32", value=id_index) + score_index = tvm.ir.make_node("IntImm", dtype="int32", value=score_index) + force_suppress = tvm.ir.make_node( "IntImm", dtype="int32", value=1 if force_suppress else 0) with ib.for_range(0, batch_size, for_type="unroll") as i: base_idx = i * num_anchors * box_data_length - with ib.if_scope(tvm.all(iou_threshold > 0, valid_count[i] > 0)): + with ib.if_scope(tvm.tir.all(iou_threshold > 0, valid_count[i] > 0)): # Reorder output nkeep = if_then_else( - tvm.all(top_k > 0, top_k < valid_count[i]), + tvm.tir.all(top_k > 0, top_k < valid_count[i]), top_k, valid_count[i]) with ib.if_scope(j < nkeep): with ib.for_range(0, box_data_length) as k: @@ -415,7 +416,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): * box_data_length + k)] box_indices[i * num_anchors + j] = sorted_index[i * num_anchors + j] - with ib.if_scope(tvm.all(top_k > 0, top_k < valid_count[i])): + with ib.if_scope(tvm.tir.all(top_k > 0, top_k < valid_count[i])): with ib.if_scope(j < valid_count[i] - nkeep): with ib.for_range(0, box_data_length) as k: out[(base_idx + (j + nkeep) * box_data_length + k)] = -1.0 @@ -423,19 +424,21 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): # Apply nms with ib.for_range(0, valid_count[i]) as k: offset_k = k * box_data_length - with ib.if_scope(tvm.all(out[base_idx + offset_k + score_index] > 0, - tvm.any(id_index < 0, out[base_idx + - offset_k + id_index] >= 0))): + with ib.if_scope( + tvm.tir.all(out[base_idx + offset_k + score_index] > 0, + tvm.tir.any(id_index < 0, out[base_idx + + offset_k + id_index] >= 0))): with ib.if_scope(j < valid_count[i]): offset_j = j * box_data_length - with ib.if_scope(tvm.all(j > k, - out[base_idx + offset_j + - score_index] > 0, - tvm.any(id_index < 0, - out[base_idx + offset_j + id_index] >= 0), - tvm.any(force_suppress > 0, id_index < 0, - out[base_idx + offset_k + id_index] == - out[base_idx + offset_j + id_index]))): + with ib.if_scope( + tvm.tir.all(j > k, + out[base_idx + offset_j + + score_index] > 0, + tvm.tir.any(id_index < 0, + out[base_idx + offset_j + id_index] >= 0), + tvm.tir.any(force_suppress > 0, id_index < 0, + out[base_idx + offset_k + id_index] == + out[base_idx + offset_j + id_index]))): iou = calculate_overlap(out, base_idx + offset_j + coord_start, base_idx + offset_k + coord_start) with ib.if_scope(iou >= iou_threshold): @@ -505,8 +508,8 @@ def invalid_to_bottom_pre(data, flag, idx): tvm.target.Target.current(allow_none=False).max_num_threads)) nthread_tx = max_threads nthread_bx = num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) j = bx * max_threads + tx @@ -565,8 +568,8 @@ def invalid_to_bottom_ir(data, flag, idx, out): tvm.target.Target.current(allow_none=False).max_num_threads)) nthread_tx = max_threads nthread_bx = num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) j = bx * max_threads + tx @@ -591,12 +594,12 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, elem_length]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom]. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. max_output_size : optional, int @@ -629,7 +632,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, elem_length]. Example @@ -638,8 +641,8 @@ def non_max_suppression(data, valid_count, max_output_size=-1, # An example to use nms dshape = (1, 5, 6) - data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") + data = te.placeholder(dshape, name="data") + valid_count = te.placeholder((dshape[0],), dtype="int32", name="valid_count") iou_threshold = 0.7 force_suppress = True top_k = -1 @@ -659,63 +662,63 @@ def non_max_suppression(data, valid_count, max_output_size=-1, num_anchors = data.shape[1] valid_count_dtype = "int32" - valid_count_buf = api.decl_buffer(valid_count.shape, valid_count_dtype, - "valid_count_buf", data_alignment=4) + valid_count_buf = tvm.tir.decl_buffer(valid_count.shape, valid_count_dtype, + "valid_count_buf", data_alignment=4) score_axis = score_index score_shape = (batch_size, num_anchors) - score_tensor = tvm.compute( + score_tensor = te.compute( score_shape, lambda i, j: data[i, j, score_axis], tag=tag.ELEMWISE) sort_tensor = argsort( score_tensor, valid_count=valid_count, axis=1, is_ascend=False) - sort_tensor_buf = api.decl_buffer(sort_tensor.shape, sort_tensor.dtype, - "sort_tensor_buf", data_alignment=8) + sort_tensor_buf = tvm.tir.decl_buffer(sort_tensor.shape, sort_tensor.dtype, + "sort_tensor_buf", data_alignment=8) - data_buf = api.decl_buffer( + data_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "data_buf", data_alignment=8) - out_buf = api.decl_buffer( + out_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "out_buf", data_alignment=8) out, box_indices = \ - tvm.extern([data.shape, score_shape], - [data, sort_tensor, valid_count], - lambda ins, outs: nms_ir( - ins[0], ins[1], ins[2], outs[0], outs[1], - max_output_size, iou_threshold, force_suppress, - top_k, coord_start, id_index, score_index), - dtype=[data.dtype, "int32"], - in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], - name="nms", - tag="nms") + te.extern([data.shape, score_shape], + [data, sort_tensor, valid_count], + lambda ins, outs: nms_ir( + ins[0], ins[1], ins[2], outs[0], outs[1], + max_output_size, iou_threshold, force_suppress, + top_k, coord_start, id_index, score_index), + dtype=[data.dtype, "int32"], + in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], + name="nms", + tag="nms") if return_indices: return box_indices if invalid_to_bottom: - output_buf = api.decl_buffer( + output_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "output_buf", data_alignment=8) - temp_flag_buf = api.decl_buffer( + temp_flag_buf = tvm.tir.decl_buffer( score_shape, valid_count_dtype, "temp_flag", data_alignment=8) - temp_idx_buf = api.decl_buffer( + temp_idx_buf = tvm.tir.decl_buffer( score_shape, valid_count_dtype, "temp_idx", data_alignment=8) - temp_flag, temp_idx = tvm.extern([score_shape, score_shape], [out], - lambda ins, outs: invalid_to_bottom_pre( - ins[0], outs[0], outs[1]), - dtype=["int32", "int32"], - in_buffers=[out_buf], - out_buffers=[ - temp_flag_buf, temp_idx_buf], - name="invalid_to_bottom_phase_one") - - output = tvm.extern([data.shape], [out, temp_flag, temp_idx], - lambda ins, outs: invalid_to_bottom_ir( - ins[0], ins[1], ins[2], outs[0]), - dtype=[data.dtype], - in_buffers=[out_buf, temp_flag_buf, temp_idx_buf], - out_buffers=[output_buf], - name="invalid_to_bottom", - tag="invalid_to_bottom") + temp_flag, temp_idx = te.extern([score_shape, score_shape], [out], + lambda ins, outs: invalid_to_bottom_pre( + ins[0], outs[0], outs[1]), + dtype=["int32", "int32"], + in_buffers=[out_buf], + out_buffers=[ + temp_flag_buf, temp_idx_buf], + name="invalid_to_bottom_phase_one") + + output = te.extern([data.shape], [out, temp_flag, temp_idx], + lambda ins, outs: invalid_to_bottom_ir( + ins[0], ins[1], ins[2], outs[0]), + dtype=[data.dtype], + in_buffers=[out_buf, temp_flag_buf, temp_idx_buf], + out_buffers=[output_buf], + name="invalid_to_bottom", + tag="invalid_to_bottom") return output return out diff --git a/topi/python/topi/cuda/pooling.py b/topi/python/topi/cuda/pooling.py index 2bebd39123783..26c18eeaa3068 100644 --- a/topi/python/topi/cuda/pooling.py +++ b/topi/python/topi/cuda/pooling.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for pooling operators""" import tvm +from tvm import te from .. import tag from ..util import traverse_inline @@ -35,15 +36,15 @@ def schedule_adaptive_pool(outs): s: Schedule The computation schedule for adaptive_pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Pool): num_thread = 8 - block_x = tvm.thread_axis("blockIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread), "threadIdx.y") if Pool.op in s.outputs: Out = Pool OL = s.cache_write(Pool, "local") @@ -71,7 +72,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('adaptive_pool'): @@ -103,10 +104,10 @@ def schedule_pool(outs, layout): s: Schedule The computation schedule for pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(PaddedInput, Pool): - if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): + if isinstance(PaddedInput.op, tvm.te.ComputeOp): s[PaddedInput].compute_inline() num_thread = tvm.target.Target.current(allow_none=False).max_num_threads if Pool.op in s.outputs: @@ -117,8 +118,8 @@ def _schedule(PaddedInput, Pool): s[Pool].set_scope("local") fused = s[Out].fuse(*s[Out].op.axis) bx, tx = s[Out].split(fused, factor=num_thread) - s[Out].bind(bx, tvm.thread_axis("blockIdx.x")) - s[Out].bind(tx, tvm.thread_axis("threadIdx.x")) + s[Out].bind(bx, te.thread_axis("blockIdx.x")) + s[Out].bind(tx, te.thread_axis("threadIdx.x")) if Pool.op in s.outputs: s[OL].compute_at(s[Out], tx) else: @@ -133,7 +134,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -163,8 +164,8 @@ def schedule_pool_grad(outs): s: Schedule The computation schedule for pool_grad. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule_pool_grad(op): if op in s.outputs: @@ -174,15 +175,15 @@ def _schedule_pool_grad(op): fused = s[out].fuse(*s[out].op.axis) num_thread = tvm.target.Target.current(allow_none=False).max_num_threads bx, tx = s[out].split(fused, factor=num_thread) - s[out].bind(bx, tvm.thread_axis("blockIdx.x")) - s[out].bind(tx, tvm.thread_axis("threadIdx.x")) + s[out].bind(bx, te.thread_axis("blockIdx.x")) + s[out].bind(tx, te.thread_axis("threadIdx.x")) if tag.COMM_REDUCE_IDX in op.input_tensors[0].op.tag: max_pool_index = op.input_tensors[0] s[max_pool_index].compute_at(s[out], tx) pool_input = max_pool_index.op.input_tensors[0] - if isinstance(pool_input.op, tvm.tensor.ComputeOp): + if isinstance(pool_input.op, tvm.te.ComputeOp): # handle padding s[pool_input].compute_inline() if op not in s.outputs: diff --git a/topi/python/topi/cuda/rcnn/proposal.py b/topi/python/topi/cuda/rcnn/proposal.py index 489c354e6cf3b..03907a50c0af8 100644 --- a/topi/python/topi/cuda/rcnn/proposal.py +++ b/topi/python/topi/cuda/rcnn/proposal.py @@ -18,6 +18,7 @@ """Proposal operator""" import math import tvm +from tvm import te from ...vision.rcnn import generate_anchor, reg_bbox, reg_iou from ...util import get_const_tuple, get_const_int @@ -28,16 +29,16 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r Parameters ---------- - cls_prob_buf : tvm.schedule.Buffer + cls_prob_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred_buf : tvm.schedule.Buffer + bbox_pred_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 4 * num_anchors, height, width] - im_info_buf : tvm.schedule.Buffer + im_info_buf : tvm.te.schedule.Buffer 2-D with shape [batch, 3] - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5] The last dimension is in format of [w_start, h_start, w_end, h_end, score] @@ -67,8 +68,8 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = (batch * height * width) // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") tid = bx * max_threads + tx ib = tvm.ir_builder.create() ib.scope_attr(tx, "thread_extent", nthread_tx) @@ -79,8 +80,8 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r p_im_info = ib.buffer_ptr(im_info_buf) p_out = ib.buffer_ptr(out_buf) - idxm = tvm.indexmod - idxd = tvm.indexdiv + idxm = tvm.tir.indexmod + idxd = tvm.tir.indexdiv with ib.if_scope(tid < batch * height * width): w = idxm(tid, width) @@ -104,10 +105,10 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r regression_func = reg_iou if iou_loss else reg_bbox pred_x1, pred_y1, pred_x2, pred_y2 = regression_func(x1, y1, x2, y2, *delta) - pred_x1 = tvm.max(tvm.min(pred_x1, im_width - 1.0), 0.0) - pred_y1 = tvm.max(tvm.min(pred_y1, im_height - 1.0), 0.0) - pred_x2 = tvm.max(tvm.min(pred_x2, im_width - 1.0), 0.0) - pred_y2 = tvm.max(tvm.min(pred_y2, im_height - 1.0), 0.0) + pred_x1 = tvm.te.max(tvm.te.min(pred_x1, im_width - 1.0), 0.0) + pred_y1 = tvm.te.max(tvm.te.min(pred_y1, im_height - 1.0), 0.0) + pred_x2 = tvm.te.max(tvm.te.min(pred_x2, im_width - 1.0), 0.0) + pred_y2 = tvm.te.max(tvm.te.min(pred_y2, im_height - 1.0), 0.0) real_height = (im_height / feature_stride).astype('int32') real_width = (im_width / feature_stride).astype('int32') @@ -117,15 +118,15 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r min_size = p_im_info[b * 3 + 2] * rpn_min_size pred_score = p_score[((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w] - pred_score = tvm.expr.Select(tvm.any(h >= real_height, w >= real_width), - -1.0, pred_score) + pred_score = tvm.tir.Select(tvm.tir.any(h >= real_height, w >= real_width), + -1.0, pred_score) p_out[out_index * 5 + 0] = pred_x1 p_out[out_index * 5 + 1] = pred_y1 p_out[out_index * 5 + 2] = pred_x2 p_out[out_index * 5 + 3] = pred_y2 p_out[out_index * 5 + 4] = pred_score - with ib.if_scope(tvm.any(bbox_w < min_size, bbox_h < min_size)): + with ib.if_scope(tvm.tir.any(bbox_w < min_size, bbox_h < min_size)): p_out[out_index * 5 + 0] -= min_size / 2.0 p_out[out_index * 5 + 1] -= min_size / 2.0 p_out[out_index * 5 + 2] += min_size / 2.0 @@ -140,10 +141,10 @@ def argsort_ir(data_buf, out_index_buf): Parameters ---------- - data_buf : tvm.schedule.Buffer + data_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox] - out_index_buf : tvm.schedule.Buffer + out_index_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Indices of data in sorted order. Returns @@ -158,15 +159,15 @@ def argsort_ir(data_buf, out_index_buf): index_out = ib.buffer_ptr(out_index_buf) nthread_tx = max_threads nthread_bx = (num_bbox + 1) // 2 // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("vthread") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("vthread") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "virtual_thread", nthread_bx) tid = bx * nthread_tx + tx temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local") temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local") - idxm = tvm.indexmod + idxm = tvm.tir.indexmod with ib.for_range(0, batch, for_type="unroll") as b: start = b * num_bbox @@ -177,16 +178,16 @@ def argsort_ir(data_buf, out_index_buf): with ib.for_range(0, num_bbox) as k: offset = start + 2 * tid + idxm(k, 2) with ib.if_scope( - tvm.all(offset + 1 < num_bbox, p_data[offset] < p_data[offset + 1])): + tvm.tir.all(offset + 1 < num_bbox, p_data[offset] < p_data[offset + 1])): temp_data[0] = p_data[offset] p_data[offset] = p_data[offset + 1] p_data[offset + 1] = temp_data[0] temp_index[0] = index_out[offset] index_out[offset] = index_out[offset + 1] index_out[offset + 1] = temp_index[0] - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -195,11 +196,11 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. nms_threshold : float @@ -213,10 +214,10 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): def calculate_overlap(out_tensor, box_a_idx, box_b_idx): """Calculate overlap of two boxes. """ - w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) - h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) + w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) + - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) + h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) + - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) i = w * h u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * \ (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + \ @@ -226,8 +227,8 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): batch, num_bbox = get_const_tuple(out_buf.shape) max_threads = int(math.sqrt(tvm.target.Target.current(allow_none=False).max_num_threads)) - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib = tvm.ir_builder.create() p_data = ib.buffer_ptr(sorted_bbox_buf) p_out = ib.buffer_ptr(out_buf) @@ -241,13 +242,13 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): with ib.if_scope(i < num_bbox): p_out[base_idx + i] = False with ib.for_range(0, num_bbox - 1) as l: - with ib.if_scope(tvm.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): + with ib.if_scope(tvm.tir.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): iou = calculate_overlap(p_data, (base_idx + l) * 5, (base_idx + i) * 5) with ib.if_scope(iou > nms_threshold): p_out[base_idx + i] = True - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -256,14 +257,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - remove_mask_buf : tvm.schedule.Buffer + remove_mask_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. @@ -275,7 +276,7 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape) rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch nthread_tx = batch - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") ib = tvm.ir_builder.create() ib.scope_attr(tx, "thread_extent", nthread_tx) i = ib.allocate('int32', (1,), 'i', scope='local') @@ -292,14 +293,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): with ib.if_scope(p_remove[b * num_bbox + j] == False): nkeep[0] += 1 with ib.if_scope(nkeep[0] > 0): - with ib.for_range(0, tvm.ceil( - tvm.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')): + with ib.for_range(0, te.ceil( + tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')): with ib.for_range(0, num_bbox) as j: offset_j = (b * num_bbox + j) * 5 offset_i = (b * rpn_post_nms_top_n + i[0]) * 5 - with ib.if_scope(tvm.all(i[0] < rpn_post_nms_top_n, - p_remove[(b*num_bbox+j)] == False)): - p_out[offset_i] = tvm.expr.Cast('float32', b) + with ib.if_scope(tvm.tir.all(i[0] < rpn_post_nms_top_n, + p_remove[(b*num_bbox+j)] == False)): + p_out[offset_i] = tvm.tir.Cast('float32', b) with ib.for_range(0, 4, for_type='unroll') as k: p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k] i[0] = i[0] + 1 @@ -314,13 +315,13 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred : tvm.Tensor + bbox_pred : tvm.te.Tensor 4-D with shape [batch, 4 * num_anchors, height, width] - im_info : tvm.Tensor + im_info : tvm.te.Tensor 2-D with shape [batch, 3] scales : list/tuple of float @@ -350,7 +351,7 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 2-D tensor with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. """ @@ -360,20 +361,20 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres num_bbox = height * width * num_anchors rpn_pre_nms_top_n = min(rpn_pre_nms_top_n, num_bbox) if rpn_pre_nms_top_n > 0 else num_bbox - bbox = tvm.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: - predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, - feature_stride, rpn_min_size, iou_loss), - dtype=bbox_pred.dtype) - score = tvm.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') - sorted_index = tvm.extern([score.shape], [score], - lambda ins, outs: argsort_ir(ins[0], outs[0]), - dtype='int32') - sorted_bbox = tvm.compute((batch, rpn_pre_nms_top_n, 5), - lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') - nms_remove_mask = tvm.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], - lambda ins, outs: nms_ir(ins[0], outs[0], threshold), - dtype='bool') - nms_out = tvm.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], - lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), - dtype=sorted_bbox.dtype) + bbox = te.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: + predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, + feature_stride, rpn_min_size, iou_loss), + dtype=bbox_pred.dtype) + score = te.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') + sorted_index = te.extern([score.shape], [score], + lambda ins, outs: argsort_ir(ins[0], outs[0]), + dtype='int32') + sorted_bbox = te.compute((batch, rpn_pre_nms_top_n, 5), + lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') + nms_remove_mask = te.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], + lambda ins, outs: nms_ir(ins[0], outs[0], threshold), + dtype='bool') + nms_out = te.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], + lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), + dtype=sorted_bbox.dtype) return nms_out diff --git a/topi/python/topi/cuda/reduction.py b/topi/python/topi/cuda/reduction.py index 0b9d5885375e8..d885c09e39841 100644 --- a/topi/python/topi/cuda/reduction.py +++ b/topi/python/topi/cuda/reduction.py @@ -18,6 +18,7 @@ """Schedule for reduce operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from .injective import schedule_injective_from_existing @@ -39,13 +40,13 @@ def _schedule_reduce(op, sch, is_idx_reduce=False): # without it, CL_INVALID_WORK_GROUP_SIZE occurred when running test_topi_reduce.py # don't know why num_thread = 16 - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread), "threadIdx.y") else: all_reduce = True num_thread = tvm.target.Target.current(allow_none=False).max_num_threads - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") # Fuse and refactor the reduce axis fused_reduce = sch[data_out].fuse(*[sch[data_out].op.reduce_axis[i] @@ -79,7 +80,7 @@ def _schedule_reduce(op, sch, is_idx_reduce=False): else: if is_idx_reduce: spatial_axis = sch[real_output].fuse(*(sch[real_output].op.axis)) - sch[real_output].bind(spatial_axis, tvm.thread_axis("blockIdx.x")) + sch[real_output].bind(spatial_axis, te.thread_axis("blockIdx.x")) sch[temp_idx_input].compute_at(sch[real_output], spatial_axis) sch[temp_val_input].compute_at(sch[real_output], @@ -102,13 +103,13 @@ def schedule_reduce(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - sch = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + sch = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse_before_reduce(operator): """Internal traverse function""" - if isinstance(operator, tvm.tensor.PlaceholderOp): + if isinstance(operator, tvm.te.PlaceholderOp): return if tag.is_injective(operator.tag): sch[operator].compute_inline() diff --git a/topi/python/topi/cuda/softmax.py b/topi/python/topi/cuda/softmax.py index afd11ea0e71e1..ded3ff9cfff8d 100644 --- a/topi/python/topi/cuda/softmax.py +++ b/topi/python/topi/cuda/softmax.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, trailing-whitespace """Schedule for softmax operator""" -import tvm +from tvm import te from .injective import schedule_injective_from_existing @@ -34,8 +34,8 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) softmax = outs[0] op_tag = softmax.op.tag @@ -60,8 +60,8 @@ def schedule_softmax(outs): s = schedule_injective_from_existing(s, op.output(0)) else: num_thread = 64 - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") if exp is not None: s[exp].bind(exp.op.axis[0], block_x) diff --git a/topi/python/topi/cuda/sort.py b/topi/python/topi/cuda/sort.py index 88ca9d876abce..c1c622ff1d30a 100644 --- a/topi/python/topi/cuda/sort.py +++ b/topi/python/topi/cuda/sort.py @@ -17,8 +17,8 @@ # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison, unused-argument """Argsort operator """ import tvm +from tvm import te -from tvm import api from .injective import schedule_injective_from_existing from ..math import identity from ..transform import strided_slice @@ -38,8 +38,8 @@ def _schedule_sort(outs): s: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -94,8 +94,8 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): nthread_tx = max_threads nthread_bx = shape[axis] // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("vthread") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("vthread") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "virtual_thread", nthread_bx) tid = bx * nthread_tx + tx @@ -111,11 +111,11 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): if indices_out is not None: indices_out[base_idx + tid * axis_mul_after] = \ tvm.generic.cast(tid, indices_out.dtype) - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) - idxd = tvm.indexdiv - idxm = tvm.indexmod + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.for_range(0, axis_mul_before) as i: with ib.for_range(0, axis_mul_after) as j: @@ -126,11 +126,11 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): with ib.if_scope(tid < idxd(current_sort_num + 1, 2)): offset = base_idx + (2 * tid + idxm(k, 2)) * axis_mul_after if is_ascend: - cond = tvm.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, - values_out[offset] > values_out[offset + axis_mul_after]) + cond = tvm.tir.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, + values_out[offset] > values_out[offset + axis_mul_after]) else: - cond = tvm.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, - values_out[offset] < values_out[offset + axis_mul_after]) + cond = tvm.tir.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, + values_out[offset] < values_out[offset + axis_mul_after]) with ib.if_scope(cond): temp_data[0] = values_out[offset] values_out[offset] = values_out[offset + axis_mul_after] @@ -139,9 +139,9 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): temp_index[0] = indices_out[offset] indices_out[offset] = indices_out[offset + axis_mul_after] indices_out[offset + axis_mul_after] = temp_index[0] - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -191,17 +191,17 @@ def sort_nms_ir(data, valid_count, output, axis, is_ascend): output = ib.buffer_ptr(output) nthread_tx = max_threads nthread_bx = size // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("vthread") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("vthread") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "virtual_thread", nthread_bx) tid = bx * nthread_tx + tx temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local") temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local") - is_ascend = tvm.make.node("IntImm", dtype="int32", value=is_ascend) + is_ascend = tvm.ir.make_node("IntImm", dtype="int32", value=is_ascend) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.for_range(0, axis_mul_before) as i: with ib.for_range(0, axis_mul_after) as j: @@ -213,27 +213,27 @@ def sort_nms_ir(data, valid_count, output, axis, is_ascend): with ib.for_range(0, current_sort_num) as k: with ib.if_scope(tid < idxd(current_sort_num + 1, 2)): offset = base_idx + (2 * tid + idxm(k, 2)) * axis_mul_after - with ib.if_scope(tvm.all(is_ascend == 1, \ - 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ - data[offset] > data[offset + axis_mul_after])): + with ib.if_scope(tvm.tir.all(is_ascend == 1, \ + 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ + data[offset] > data[offset + axis_mul_after])): temp_data[0] = data[offset] data[offset] = data[offset + axis_mul_after] data[offset + axis_mul_after] = temp_data[0] temp_index[0] = output[offset] output[offset] = output[offset + axis_mul_after] output[offset + axis_mul_after] = temp_index[0] - with ib.if_scope(tvm.all(is_ascend == 0, \ - 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ - data[offset] < data[offset + axis_mul_after])): + with ib.if_scope(tvm.tir.all(is_ascend == 0, \ + 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ + data[offset] < data[offset + axis_mul_after])): temp_data[0] = data[offset] data[offset] = data[offset + axis_mul_after] data[offset + axis_mul_after] = temp_data[0] temp_index[0] = output[offset] output[offset] = output[offset + axis_mul_after] output[offset + axis_mul_after] = temp_index[0] - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -243,10 +243,10 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Parameters ---------- - data: tvm.Tensor + data: tvm.te.Tensor The input array. - valid_count : tvm.Tensor, optional + valid_count : tvm.te.Tensor, optional The number of valid elements to be sorted. axis : int, optional @@ -260,35 +260,35 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor The output of this function. """ if valid_count is not None: sorted_data = identity(data) - sorted_data_buf = api.decl_buffer(data.shape, data.dtype, "sorted_data_buf", - data_alignment=8) - valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype, - "valid_count_buf", data_alignment=4) - out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=4) - out = tvm.extern([data.shape], - [sorted_data, valid_count], - lambda ins, outs: sort_nms_ir( - ins[0], ins[1], outs[0], axis, is_ascend), - dtype="int32", - in_buffers=[sorted_data_buf, valid_count_buf], - out_buffers=[out_buf], - name="argsort_nms_gpu", - tag="argsort_nms_gpu") + sorted_data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "sorted_data_buf", + data_alignment=8) + valid_count_buf = tvm.tir.decl_buffer(valid_count.shape, valid_count.dtype, + "valid_count_buf", data_alignment=4) + out_buf = tvm.tir.decl_buffer(data.shape, "int32", "out_buf", data_alignment=4) + out = te.extern([data.shape], + [sorted_data, valid_count], + lambda ins, outs: sort_nms_ir( + ins[0], ins[1], outs[0], axis, is_ascend), + dtype="int32", + in_buffers=[sorted_data_buf, valid_count_buf], + out_buffers=[out_buf], + name="argsort_nms_gpu", + tag="argsort_nms_gpu") else: - value_buf = api.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8) - indices_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) - out = tvm.extern([data.shape, data.shape], - [data], - lambda ins, outs: sort_ir( - ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), - out_buffers=[value_buf, indices_buf], - name="argsort_gpu", - tag="argsort_gpu")[1] + value_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8) + indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) + out = te.extern([data.shape, data.shape], + [data], + lambda ins, outs: sort_ir( + ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), + out_buffers=[value_buf, indices_buf], + name="argsort_gpu", + tag="argsort_gpu")[1] return out def schedule_argsort(outs): @@ -312,7 +312,7 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tensor. k : int, optional @@ -335,31 +335,31 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Returns ------- - out : tvm.Tensor or List[tvm.Tensor] + out : tvm.te.Tensor or List[tvm.te.Tensor] The computed result. """ assert ret_type in ["both", "values", "indices"] ndim = len(data.shape) axis = axis + ndim if axis < 0 else axis assert 0 <= axis < ndim - values_buf = api.decl_buffer(data.shape, data.dtype, "values_buf", data_alignment=8) - indices_buf = api.decl_buffer(data.shape, dtype, "indices_buf", data_alignment=8) + values_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "values_buf", data_alignment=8) + indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "indices_buf", data_alignment=8) if ret_type == "values": - output = tvm.extern([data.shape], - [data], - lambda ins, outs: sort_ir( - ins[0], outs[0], axis, is_ascend), - out_buffers=[values_buf], - name="topk_gpu", - tag="topk_gpu") + output = te.extern([data.shape], + [data], + lambda ins, outs: sort_ir( + ins[0], outs[0], axis, is_ascend), + out_buffers=[values_buf], + name="topk_gpu", + tag="topk_gpu") else: - output = tvm.extern([data.shape, data.shape], - [data], - lambda ins, outs: sort_ir( - ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), - out_buffers=[values_buf, indices_buf], - name="topk_gpu", - tag="topk_gpu") + output = te.extern([data.shape, data.shape], + [data], + lambda ins, outs: sort_ir( + ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), + out_buffers=[values_buf, indices_buf], + name="topk_gpu", + tag="topk_gpu") if k < 1: if ret_type == "indices": return output[1] diff --git a/topi/python/topi/cuda/ssd/multibox.py b/topi/python/topi/cuda/ssd/multibox.py index 0b3f50ba0031b..9714194271f05 100644 --- a/topi/python/topi/cuda/ssd/multibox.py +++ b/topi/python/topi/cuda/ssd/multibox.py @@ -16,12 +16,10 @@ # under the License. # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, too-many-function-args """SSD multibox operators""" -from __future__ import absolute_import as _abs import math import tvm - -from tvm import api -from tvm.intrin import if_then_else, exp +from tvm import te +from tvm.tir import if_then_else, exp import topi @@ -58,10 +56,10 @@ def multibox_prior_ir(data, out, sizes, ratios, steps, offsets): """ max_threads = int(math.sqrt( tvm.target.Target.current(allow_none=False).max_num_threads)) - tx = tvm.thread_axis("threadIdx.x") - ty = tvm.thread_axis("threadIdx.y") - bx = tvm.thread_axis("blockIdx.x") - by = tvm.thread_axis("blockIdx.y") + tx = te.thread_axis("threadIdx.x") + ty = te.thread_axis("threadIdx.y") + bx = te.thread_axis("blockIdx.x") + by = te.thread_axis("blockIdx.y") ib = tvm.ir_builder.create() p_out = ib.buffer_ptr(out) in_height = data.shape[2] @@ -115,7 +113,7 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, c_in, h_in, w_in]] sizes : tuple of float @@ -135,17 +133,17 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ num_sizes = len(sizes) num_ratios = len(ratios) oshape = ( 1, data.shape[2] * data.shape[3] * (num_sizes + num_ratios - 1), 4) - out = tvm.extern(oshape, [data], lambda ins, outs: - multibox_prior_ir( - ins[0], outs[0], sizes, ratios, steps, offsets), - tag="multibox_prior") + out = te.extern(oshape, [data], lambda ins, outs: + multibox_prior_ir( + ins[0], outs[0], sizes, ratios, steps, offsets), + tag="multibox_prior") if clip: out = topi.clip(out, 0, 1) return out @@ -190,18 +188,18 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp temp_valid_count = ib.buffer_ptr(temp_valid_count) score = ib.buffer_ptr(temp_score) - threshold = tvm.make.node("FloatImm", dtype="float32", value=threshold) + threshold = tvm.ir.make_node("FloatImm", dtype="float32", value=threshold) max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = (batch_size * num_anchors) // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) @@ -212,8 +210,8 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp with ib.for_range(0, num_classes - 1) as k: temp = cls_prob[i * num_classes * num_anchors + (k + 1) * num_anchors + j] cls_id[tid] = if_then_else(temp > score[tid], k + 1, cls_id[tid]) - score[tid] = tvm.max(temp, score[tid]) - with ib.if_scope(tvm.all(cls_id[tid] > 0, score[tid] < threshold)): + score[tid] = tvm.te.max(temp, score[tid]) + with ib.if_scope(tvm.tir.all(cls_id[tid] > 0, score[tid] < threshold)): cls_id[tid] = 0 with ib.if_scope(cls_id[tid] > 0): temp_valid_count[tid] = 1 @@ -224,7 +222,7 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp with ib.for_range(0, num_anchors) as k: with ib.if_scope(k > 0): temp_valid_count[tid * num_anchors + k] += \ - temp_valid_count[tid * num_anchors + k - 1] + temp_valid_count[tid * num_anchors + k - 1] valid_count[i] = temp_valid_count[tid * num_anchors + num_anchors - 1] return ib.get() @@ -289,10 +287,10 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, oy = py * vy * ah + ay ow = exp(pw * vw) * aw / 2.0 oh = exp(ph * vh) * ah / 2.0 - return tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, ox - ow)), ox - ow), \ - tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, oy - oh)), oy - oh), \ - tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, ox + ow)), ox + ow), \ - tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, oy + oh)), oy + oh) + return tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox - ow)), ox - ow), \ + tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy - oh)), oy - oh), \ + tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox + ow)), ox + ow), \ + tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy + oh)), oy + oh) ib = tvm.ir_builder.create() @@ -306,14 +304,14 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = (batch_size * num_anchors) // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) @@ -348,13 +346,13 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, \ Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -368,12 +366,12 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, \ Returns ------- - ret : tuple of tvm.Tensor composed of + ret : tuple of tvm.te.Tensor composed of - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape (batch_size, num_anchors, 6) - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor with shape (batch_size,), number of valid anchor boxes. """ batch_size = cls_prob.shape[0] @@ -383,40 +381,40 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, \ valid_count_dtype = "int32" out_loc_dtype = loc_pred.dtype - valid_count_buf = api.decl_buffer((batch_size,), valid_count_dtype, - "valid_count_buf", data_alignment=4) - loc_pred_buf = api.decl_buffer(loc_pred.shape, loc_pred.dtype, - "loc_pred_buf", data_alignment=8) - anchor_buf = api.decl_buffer(anchor.shape, anchor.dtype, - "anchor_buf", data_alignment=8) + valid_count_buf = tvm.tir.decl_buffer((batch_size,), valid_count_dtype, + "valid_count_buf", data_alignment=4) + loc_pred_buf = tvm.tir.decl_buffer(loc_pred.shape, loc_pred.dtype, + "loc_pred_buf", data_alignment=8) + anchor_buf = tvm.tir.decl_buffer(anchor.shape, anchor.dtype, + "anchor_buf", data_alignment=8) - temp_valid_count_buf = api.decl_buffer( + temp_valid_count_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), valid_count_dtype, "temp_valid_count", data_alignment=8) - temp_cls_id_buf = api.decl_buffer( + temp_cls_id_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), valid_count_dtype, "temp_cls_id", data_alignment=8) - temp_score_buf = api.decl_buffer( + temp_score_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), cls_prob.dtype, "temp_score", data_alignment=8) valid_count, temp_valid_count, temp_cls_id, temp_score = \ - tvm.extern([(batch_size,), (batch_size, num_anchors,), (batch_size, num_anchors,), \ - (batch_size, num_anchors,)], [cls_prob], - lambda ins, outs: transform_loc_pre( - ins[0], outs[0], outs[1], outs[2], outs[3], threshold), - dtype=[valid_count_dtype, valid_count_dtype, valid_count_dtype, cls_prob.dtype], - out_buffers=[valid_count_buf, temp_valid_count_buf, \ - temp_cls_id_buf, temp_score_buf], - tag="multibox_transform_loc_phase_one") + te.extern([(batch_size,), (batch_size, num_anchors,), (batch_size, num_anchors,), \ + (batch_size, num_anchors,)], [cls_prob], + lambda ins, outs: transform_loc_pre( + ins[0], outs[0], outs[1], outs[2], outs[3], threshold), + dtype=[valid_count_dtype, valid_count_dtype, valid_count_dtype, cls_prob.dtype], + out_buffers=[valid_count_buf, temp_valid_count_buf, \ + temp_cls_id_buf, temp_score_buf], + tag="multibox_transform_loc_phase_one") out_loc = \ - tvm.extern([oshape], - [loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score], - lambda ins, outs: transform_loc_ir( - ins[0], ins[1], ins[2], ins[3], ins[4], outs[0], clip, variances, \ - batch_size, num_anchors), - in_buffers=[loc_pred_buf, anchor_buf, temp_valid_count_buf, \ - temp_cls_id_buf, temp_score_buf], - dtype=[out_loc_dtype], - tag="multibox_transform_loc") + te.extern([oshape], + [loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score], + lambda ins, outs: transform_loc_ir( + ins[0], ins[1], ins[2], ins[3], ins[4], outs[0], clip, variances, \ + batch_size, num_anchors), + in_buffers=[loc_pred_buf, anchor_buf, temp_valid_count_buf, \ + temp_cls_id_buf, temp_score_buf], + dtype=[out_loc_dtype], + tag="multibox_transform_loc") return [out_loc, valid_count] @@ -427,13 +425,13 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -456,7 +454,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape (batch_size, num_anchors, 6) """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, diff --git a/topi/python/topi/cuda/tensor_intrin.py b/topi/python/topi/cuda/tensor_intrin.py index 8f46d278d9509..214ceec0faf61 100644 --- a/topi/python/topi/cuda/tensor_intrin.py +++ b/topi/python/topi/cuda/tensor_intrin.py @@ -17,6 +17,7 @@ """Tensor intrinsics on CUDA.""" #pylint: disable=invalid-name import tvm +from tvm import te def dp4a(x_scope='local', y_scope='local', z_scope='local'): @@ -39,12 +40,12 @@ def dp4a(x_scope='local', y_scope='local', z_scope='local'): """ n = 4 # dp4a requires operands packed by 4 - x = tvm.placeholder((n,), name='x', dtype='int8') - y = tvm.placeholder((n,), name='y', dtype='int8') + x = te.placeholder((n,), name='x', dtype='int8') + y = te.placeholder((n,), name='y', dtype='int8') - k = tvm.reduce_axis((0, n), name='rc') + k = te.reduce_axis((0, n), name='rc') - z = tvm.compute((1,), lambda i: tvm.sum( + z = te.compute((1,), lambda i: te.sum( x[k].astype('int32') * y[k].astype('int32'), axis=[k])) def _intrin_func(ins, outs): @@ -61,7 +62,7 @@ def _instr(index): vec_y = yy.vload(0, dtype='int8x4') prev_z = 0 if index == 0 else zz.vload(0) - new_z = tvm.call_pure_extern('int32', '__dp4a', vec_x, vec_y, prev_z) + new_z = tvm.tir.call_pure_extern('int32', '__dp4a', vec_x, vec_y, prev_z) ib.emit(zz.vstore(0, new_z)) return ib.get() @@ -70,9 +71,9 @@ def _instr(index): with tvm.build_config(data_alignment=4, offset_factor=1) as cfg: scopes = {x: x_scope, y: y_scope, z: z_scope} - binds = {t: tvm.decl_buffer(t.shape, t.dtype, t.op.name, - data_alignment=cfg.data_alignment, - offset_factor=cfg.offset_factor, - scope=scopes[t]) for t in [x, y, z]} + binds = {t: tvm.tir.decl_buffer(t.shape, t.dtype, t.op.name, + data_alignment=cfg.data_alignment, + offset_factor=cfg.offset_factor, + scope=scopes[t]) for t in [x, y, z]} return tvm.decl_tensor_intrin(z.op, _intrin_func, binds=binds) diff --git a/topi/python/topi/cuda/vision.py b/topi/python/topi/cuda/vision.py index 8666c22774dec..eb49328c3da33 100644 --- a/topi/python/topi/cuda/vision.py +++ b/topi/python/topi/cuda/vision.py @@ -18,6 +18,7 @@ """Schedule for vision operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import cpp from .. import tag from .pooling import schedule_pool @@ -25,8 +26,8 @@ def _default_schedule(outs): """Default schedule for gpu.""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): if tag.is_broadcast(op.tag) or op.tag in ['bbox_score', 'sorted_bbox']: diff --git a/topi/python/topi/generic/conv2d.py b/topi/python/topi/generic/conv2d.py index 08bb06c6f8558..69984a169ac67 100644 --- a/topi/python/topi/generic/conv2d.py +++ b/topi/python/topi/generic/conv2d.py @@ -17,8 +17,7 @@ # pylint: disable=invalid-name, unused-variable, too-many-locals # pylint: disable=unused-argument, redefined-builtin """Generic convolution schedules""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity from ..util import get_const_tuple @@ -123,7 +122,7 @@ def schedule_conv_NCHWc_cpu_common_int8(s, cfg, data_vec, kernel_vec, conv_out, _, _, _, _, oc_bn = get_const_tuple(conv_out.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, te.tensor.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -136,7 +135,7 @@ def schedule_conv_NCHWc_cpu_common_int8(s, cfg, data_vec, kernel_vec, conv_out, # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, te.tensor.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for @@ -213,7 +212,7 @@ def schedule_conv_NCHWc_cpu_1x1_int8(s, cfg, data_vec, kernel_vec, conv_out, _, _, _, _, oc_bn = get_const_tuple(conv_out.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, te.tensor.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -226,7 +225,7 @@ def schedule_conv_NCHWc_cpu_1x1_int8(s, cfg, data_vec, kernel_vec, conv_out, # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, te.tensor.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 977c53763a523..3b4feb771876b 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -16,8 +16,6 @@ # under the License. # pylint: disable=invalid-name """generic declaration and schedules.""" -from __future__ import absolute_import as _abs - import tvm from .. import cpp diff --git a/topi/python/topi/generic/injective.py b/topi/python/topi/generic/injective.py index 6f1013c06dbd5..50de7988be10a 100644 --- a/topi/python/topi/generic/injective.py +++ b/topi/python/topi/generic/injective.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te def schedule_injective_from_existing(sch, out): """Schedule for injective op from existing schedule. @@ -55,10 +56,10 @@ def schedule_injective(outs): target = tvm.target.Target.current(allow_none=False) if target.target_name != "llvm": raise RuntimeError("schedule_injective not registered for '%s'" % target) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs x = outs[0] - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + s = te.create_schedule([x.op for x in outs]) + te.schedule.AutoInlineInjective(s) schedule_injective_from_existing(s, x) return s diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index ba50a8b88cb42..25b5501152000 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -16,19 +16,19 @@ # under the License. # pylint: disable=invalid-name,unused-argument """Generic nn operators""" -from __future__ import absolute_import as _abs import tvm +from tvm import te def _default_schedule(outs, auto_inline): """Default schedule for llvm.""" target = tvm.target.Target.current(allow_none=False) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs if target.target_name not in ("llvm", "c"): raise RuntimeError("schedule not registered for '%s'" % target) - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) if auto_inline: x = outs[0] - tvm.schedule.AutoInlineInjective(s) + te.schedule.AutoInlineInjective(s) s[x].fuse(s[x].op.axis) return s @@ -187,7 +187,7 @@ def schedule_conv2d_winograd_weight_transform(outs): """ # Typically this is computed in PreCompute pass # so we make a schedule here for cpu llvm - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output = outs[0] _, G = s[output].op.input_tensors s[G].compute_inline() @@ -230,7 +230,7 @@ def schedule_conv2d_winograd_nnpack_weight_transform(outs): The computation schedule for the op. """ # Typically this is computed in PreCompute pass - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) return s diff --git a/topi/python/topi/generic/vision.py b/topi/python/topi/generic/vision.py index d6e80df9b89d6..3935250bcbbdb 100644 --- a/topi/python/topi/generic/vision.py +++ b/topi/python/topi/generic/vision.py @@ -18,18 +18,19 @@ """Generic vision operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import cpp def _default_schedule(outs, auto_inline): """Default schedule for llvm.""" target = tvm.target.Target.current(allow_none=False) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs if target.target_name != "llvm": raise RuntimeError("schedule not registered for '%s'" % target) - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) if auto_inline: x = outs[0] - tvm.schedule.AutoInlineInjective(s) + te.schedule.AutoInlineInjective(s) s[x].fuse(s[x].op.axis) return s diff --git a/topi/python/topi/generic_op_impl.py b/topi/python/topi/generic_op_impl.py index b4b719fb35d48..864bd936a268f 100644 --- a/topi/python/topi/generic_op_impl.py +++ b/topi/python/topi/generic_op_impl.py @@ -16,8 +16,8 @@ # under the License. """Implementation of generic operators in the presence of Tensor""" # pylint: disable=invalid-name, too-many-arguments -from __future__ import absolute_import as _abs import tvm +from tvm import te from . import broadcast as _broadcast from . import math as _math @@ -75,11 +75,11 @@ def _tensor_bop_impl(lhs, rhs): Returns ------- - ret : tvm.Tensor (if at least one operand is non-zero-rank Tensor) + ret : tvm.te.Tensor (if at least one operand is non-zero-rank Tensor) tvm.Expr (otherwise) The result of {op} operation. """ - if not isinstance(lhs, tvm.tensor.Tensor) and not isinstance(rhs, tvm.tensor.Tensor): + if not isinstance(lhs, te.tensor.Tensor) and not isinstance(rhs, te.tensor.Tensor): return orig_bop(lhs, rhs) return broadcast_bop(lhs, rhs) _tensor_bop_impl.__doc__ = _tensor_bop_impl.__doc__.format(op=name) diff --git a/topi/python/topi/hls/injective.py b/topi/python/topi/hls/injective.py index d4ccf41ed26da..6d0c6f4928ec8 100644 --- a/topi/python/topi/hls/injective.py +++ b/topi/python/topi/hls/injective.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable, """Schedule for composition of injective operator""" import tvm +from tvm import te def schedule_injective_from_existing(sch, out): """Schedule for injective op from existing schedule. @@ -35,7 +36,7 @@ def schedule_injective_from_existing(sch, out): """ fused = sch[out].fuse(*sch[out].op.axis) px, x = sch[out].split(fused, nparts=1) - sch[out].bind(px, tvm.thread_axis("pipeline")) + sch[out].bind(px, te.thread_axis("pipeline")) return sch def schedule_injective(outs): @@ -52,9 +53,9 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) for out in outs: schedule_injective_from_existing(s, out) return s diff --git a/topi/python/topi/hls/nn.py b/topi/python/topi/hls/nn.py index 06cf3298682dd..3d7ff82085c72 100644 --- a/topi/python/topi/hls/nn.py +++ b/topi/python/topi/hls/nn.py @@ -18,13 +18,14 @@ """HLS nn operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag def _schedule_conv2d(outs): - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -33,7 +34,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule conv2d elif OP.tag.find("conv2d") >= 0: @@ -47,7 +48,7 @@ def traverse(OP): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -199,9 +200,9 @@ def schedule_reduce(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -210,7 +211,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif OP.tag in ["comm_reduce", "comm_reduce_idx"]: if OP.tag == "comm_reduce": @@ -227,7 +228,7 @@ def traverse(OP): fused = s[outs[0]].fuse() px, x = s[outs[0]].split(fused, nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -245,9 +246,9 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) softmax = outs[0] @@ -271,7 +272,7 @@ def schedule_softmax(outs): s[max_elem].compute_at(s[softmax], s[softmax].op.axis[1]) px, x = s[softmax].split(softmax.op.axis[0], nparts=1) - s[softmax].bind(px, tvm.thread_axis("pipeline")) + s[softmax].bind(px, te.thread_axis("pipeline")) return s @@ -289,9 +290,9 @@ def schedule_dense(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -300,7 +301,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule dense elif OP.tag == 'dense': @@ -314,7 +315,7 @@ def traverse(OP): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -332,9 +333,9 @@ def schedule_pool(outs, layout): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -343,7 +344,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -357,7 +358,7 @@ def traverse(OP): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -375,9 +376,9 @@ def schedule_adaptive_pool(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -386,7 +387,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('adaptive_pool'): @@ -400,5 +401,5 @@ def traverse(OP): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s diff --git a/topi/python/topi/image/resize.py b/topi/python/topi/image/resize.py index 0c02867ef54d1..d901babc835bd 100644 --- a/topi/python/topi/image/resize.py +++ b/topi/python/topi/image/resize.py @@ -18,6 +18,7 @@ """TVM operator input resize compute.""" from __future__ import absolute_import import tvm +from tvm import te from topi.util import nchw_pack_layout, nchw_xc_layout from .. import tag @@ -42,8 +43,8 @@ def get_2d_indices(indices, layout='NCHW'): def get_2d_pixel(data, layout, boxes, image_height, image_width, n, c, y, x, cc, ib, ic): """ Get 2d pixel """ if boxes is None: - y = tvm.max(tvm.min(y, image_height - 1), 0) - x = tvm.max(tvm.min(x, image_width - 1), 0) + y = tvm.te.max(tvm.te.min(y, image_height - 1), 0) + x = tvm.te.max(tvm.te.min(x, image_width - 1), 0) if layout == 'NHWC': return data(n, y, x, c).astype('float') if layout == 'NCHW': @@ -70,7 +71,7 @@ def resize_nearest_neighbor(indices, data, image_height, image_width, indices : tuple The indices of input data - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -87,11 +88,11 @@ def resize_nearest_neighbor(indices, data, image_height, image_width, target_width : integer The target resized image width - boxes : tvm.Tensor, optional + boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor, optional + box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -150,29 +151,29 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): in_x = w_scale * x if coordinate_transformation_mode == "align_corners" or boxes is not None: - closest_x_index = tvm.round(in_x).astype("int32") - closest_y_index = tvm.round(in_y).astype("int32") + closest_x_index = te.round(in_x).astype("int32") + closest_y_index = te.round(in_y).astype("int32") else: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 - closest_y_index = tvm.floor(in_y + epsilon).astype('int32') - closest_x_index = tvm.floor(in_x + epsilon).astype('int32') + closest_y_index = te.floor(in_y + epsilon).astype('int32') + closest_x_index = te.floor(in_x + epsilon).astype('int32') value = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, closest_y_index, closest_x_index, cc, inum, ic) if extrapolation_value is not None: - out = tvm.if_then_else(in_y < 0, - extrapolation_value, - tvm.if_then_else(in_y > image_height - 1, - extrapolation_value, - value)) + out = tvm.tir.if_then_else(in_y < 0, + extrapolation_value, + tvm.tir.if_then_else(in_y > image_height - 1, + extrapolation_value, + value)) # use extrapolation_value if in_x is out of boundary - value = tvm.if_then_else(in_x < 0, - extrapolation_value, - tvm.if_then_else(in_x > image_width - 1, - extrapolation_value, - out)) + value = tvm.tir.if_then_else(in_x < 0, + extrapolation_value, + tvm.tir.if_then_else(in_x > image_width - 1, + extrapolation_value, + out)) return _cast_output(value, data.dtype, out_dtype=out_dtype) @@ -191,7 +192,7 @@ def resize_bilinear(indices, data, image_height, image_width, indices : tuple The indices of input data - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -208,11 +209,11 @@ def resize_bilinear(indices, data, image_height, image_width, target_width : integer The target resized image width - boxes : tvm.Tensor, optional + boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor, optional + box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -279,12 +280,12 @@ def _lerp(A, B, t): in_y = h_scale * y in_x = w_scale * x - top_y_index = tvm.floor(in_y).astype('int32') - bottom_y_index = tvm.ceil(in_y).astype('int32') + top_y_index = te.floor(in_y).astype('int32') + bottom_y_index = te.ceil(in_y).astype('int32') y_lerp = in_y - top_y_index - left_x_index = tvm.floor(in_x).astype('int32') - right_x_index = tvm.ceil(in_x).astype('int32') + left_x_index = te.floor(in_x).astype('int32') + right_x_index = te.ceil(in_x).astype('int32') x_lerp = in_x - left_x_index top_left = get_2d_pixel(data, layout, boxes, image_height, image_width, @@ -302,16 +303,16 @@ def _lerp(A, B, t): # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: - out = tvm.if_then_else(in_y < 0, - extrapolation_value, - tvm.if_then_else(in_y > image_height - 1, - extrapolation_value, - value)) - value = tvm.if_then_else(in_x < 0, - extrapolation_value, - tvm.if_then_else(in_x > image_width - 1, - extrapolation_value, - out)) + out = tvm.tir.if_then_else(in_y < 0, + extrapolation_value, + tvm.tir.if_then_else(in_y > image_height - 1, + extrapolation_value, + value)) + value = tvm.tir.if_then_else(in_x < 0, + extrapolation_value, + tvm.tir.if_then_else(in_x > image_width - 1, + extrapolation_value, + out)) return _cast_output(value, data.dtype, out_dtype=out_dtype) @@ -329,7 +330,7 @@ def resize_bicubic(indices, data, image_height, image_width, indices : tuple The indices of input data - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -346,11 +347,11 @@ def resize_bicubic(indices, data, image_height, image_width, target_width : integer The target resized image width - boxes : tvm.Tensor, optional + boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor, optional + box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -421,11 +422,11 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): in_y = h_scale * y in_x = w_scale * x - xint = tvm.floor(in_x).astype('int32') - xfract = in_x - tvm.floor(in_x) + xint = te.floor(in_x).astype('int32') + xfract = in_x - te.floor(in_x) - yint = tvm.floor(in_y).astype('int32') - yfract = in_y - tvm.floor(in_y) + yint = te.floor(in_y).astype('int32') + yfract = in_y - te.floor(in_y) # 1st row p00 = _get_pixel(data, layout, boxes, image_height, image_width, @@ -476,16 +477,16 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: - out = tvm.if_then_else(in_y < 0, - extrapolation_value, - tvm.if_then_else(in_y > image_height - 1, - extrapolation_value, - value)) - value = tvm.if_then_else(in_x < 0, - extrapolation_value, - tvm.if_then_else(in_x > image_width - 1, - extrapolation_value, - out)) + out = tvm.tir.if_then_else(in_y < 0, + extrapolation_value, + tvm.tir.if_then_else(in_y > image_height - 1, + extrapolation_value, + value)) + value = tvm.tir.if_then_else(in_x < 0, + extrapolation_value, + tvm.tir.if_then_else(in_x > image_width - 1, + extrapolation_value, + out)) return _cast_output(value, data.dtype, out_dtype=out_dtype) @@ -495,7 +496,7 @@ def resize(data, size, layout="NCHW", method="bilinear", Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -520,7 +521,7 @@ def resize(data, size, layout="NCHW", method="bilinear", Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, channel, in_height*scale, in_width*scale] or [batch, in_height*scale, in_width*scale, channel] or 5-D with shape [batch, channel-major, in_height*scale, in_width*scale, channel-minor] @@ -548,21 +549,21 @@ def _nearest_neighbor(*indices): return resize_nearest_neighbor(indices, data, in_h, in_w, size[0], size[1], layout=layout, coordinate_transformation_mode= \ - coordinate_transformation_mode, + coordinate_transformation_mode, out_dtype=out_dtype) def _bilinear(*indices): return resize_bilinear(indices, data, in_h, in_w, size[0], size[1], layout=layout, coordinate_transformation_mode= \ - coordinate_transformation_mode, + coordinate_transformation_mode, out_dtype=out_dtype) def _bicubic(*indices): return resize_bicubic(indices, data, in_h, in_w, size[0], size[1], layout, coordinate_transformation_mode= \ - coordinate_transformation_mode, + coordinate_transformation_mode, out_dtype=out_dtype) # Determine which interpolation method to use then run it. @@ -575,7 +576,7 @@ def _bicubic(*indices): else: raise ValueError('%s method is not supported.' % method) - return tvm.compute(output_shape, compute_func, name='resize', tag=tag.INJECTIVE) + return te.compute(output_shape, compute_func, name='resize', tag=tag.INJECTIVE) def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", @@ -584,16 +585,16 @@ def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] - boxes : tvm.Tensor + boxes : tvm.te.Tensor A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor + box_indices : tvm.te.Tensor A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -614,7 +615,7 @@ def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_boxes, channel, crop_height, crop_width] or [num_boxes, crop_height, crop_width, channel] """ @@ -656,7 +657,7 @@ def _nearest_neighbor(*indices): else: raise ValueError('%s method is not supported.' % method) - return tvm.compute(output_shape, compute_func, name='crop_and_resize', tag=tag.INJECTIVE) + return te.compute(output_shape, compute_func, name='crop_and_resize', tag=tag.INJECTIVE) @@ -665,7 +666,7 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", """Perform resize operation on the data. Parameters ---------- - inputs: tvm.Tensor + inputs: tvm.te.Tensor inputs is a 5-D tensor with shape [batch, channel, in_depth, in_height, in_width] or [batch, in_depth, in_height, in_width, channel] @@ -684,7 +685,7 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", Type to return. If left None will be same as input type. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, channel, in_depth*scale, in_height*scale, in_width*scale] or [batch, in_depth*scale, in_height*scale, in_width*scale, channel] or 5-D with shape [batch, channel-major, in_depth*scale, in_height*scale, in_width*scale, @@ -716,9 +717,9 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", coordinate_transformation_mode)) def _get_pixel(n, c, z, y, x, cc): - z = tvm.max(tvm.min(z, in_d - 1), 0) - y = tvm.max(tvm.min(y, in_h - 1), 0) - x = tvm.max(tvm.min(x, in_w - 1), 0) + z = tvm.te.max(tvm.te.min(z, in_d - 1), 0) + y = tvm.te.max(tvm.te.min(y, in_h - 1), 0) + x = tvm.te.max(tvm.te.min(x, in_w - 1), 0) if layout == 'NDHWC': return data(n, z, y, x, c).astype('float') if layout == 'NCDHW': @@ -754,15 +755,15 @@ def _nearest_neighbor(*indices): in_x = x_ratio * x if coordinate_transformation_mode == "align_corners": - zint = tvm.round(in_z).astype('int32') - yint = tvm.round(in_y).astype('int32') - xint = tvm.round(in_x).astype('int32') + zint = te.round(in_z).astype('int32') + yint = te.round(in_y).astype('int32') + xint = te.round(in_x).astype('int32') elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 - zint = tvm.floor(in_z + epsilon).astype('int32') - yint = tvm.floor(in_y + epsilon).astype('int32') - xint = tvm.floor(in_x + epsilon).astype('int32') + zint = te.floor(in_z + epsilon).astype('int32') + yint = te.floor(in_y + epsilon).astype('int32') + xint = te.floor(in_x + epsilon).astype('int32') else: raise ValueError("Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) @@ -785,14 +786,14 @@ def _trilinear(*indices): in_y = y_ratio * y in_x = x_ratio * x - zint = tvm.floor(in_z).astype('int32') - zfract = in_z - tvm.floor(in_z) + zint = te.floor(in_z).astype('int32') + zfract = in_z - te.floor(in_z) - xint = tvm.floor(in_x).astype('int32') - xfract = in_x - tvm.floor(in_x) + xint = te.floor(in_x).astype('int32') + xfract = in_x - te.floor(in_x) - yint = tvm.floor(in_y).astype('int32') - yfract = in_y - tvm.floor(in_y) + yint = te.floor(in_y).astype('int32') + yfract = in_y - te.floor(in_y) p000 = _get_pixel(n, c, zint, yint, xint, cc) p001 = _get_pixel(n, c, zint, yint, xint + 1, cc) @@ -820,4 +821,4 @@ def _trilinear(*indices): else: raise ValueError('%s method is not supported.' % method) - return tvm.compute(output_shape, compute_func, name='resize3d', tag=tag.INJECTIVE) + return te.compute(output_shape, compute_func, name='resize3d', tag=tag.INJECTIVE) diff --git a/topi/python/topi/intel_graphics/conv2d.py b/topi/python/topi/intel_graphics/conv2d.py index 8993063b16e37..e4ea196ac84f6 100644 --- a/topi/python/topi/intel_graphics/conv2d.py +++ b/topi/python/topi/intel_graphics/conv2d.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -132,14 +133,14 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None xo, xi = s[tensor].split(x, x_factor) s[tensor].reorder(zo, yo, xo, zi, yi, xi) - thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z") - thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y") - thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x") - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) + thread_z = te.thread_axis((0, z_factor), "threadIdx.z") + thread_y = te.thread_axis((0, y_factor), "threadIdx.y") + thread_x = te.thread_axis((0, x_factor), "threadIdx.x") + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) s[tensor].bind(zi, thread_z) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) s[tensor].bind(yi, thread_y) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) s[tensor].bind(xi, thread_x) return xi, thread_z, thread_y, thread_x @@ -151,11 +152,11 @@ def _pack_data(data, kernel, ic_bn, oc_bn): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn), lambda occ, icc, k_h, k_w, icb, ocb: kernel[occ * oc_bn + ocb, @@ -172,10 +173,10 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_height, filter_width, nnum_filter_vec] stride : int or a list/tuple of two ints @@ -189,7 +190,7 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if len(data.shape) == 5: @@ -215,9 +216,9 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, _create_schedule_template(cfg, data_shape, kernel_shape, strides, padding, dilation) if cfg.is_fallback: - _get_default_config(cfg, tvm.placeholder((batch, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width), - dtype=kernel.dtype), + _get_default_config(cfg, te.placeholder((batch, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kernel_height, kernel_width), + dtype=kernel.dtype), strides, padding, out_dtype) ic_bn = cfg["tile_ic"].val if hasattr(cfg["tile_ic"], "val") else cfg["tile_ic"].size[-1] @@ -232,9 +233,9 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_width = simplify((iw - kernel_width + pad_left + pad_right) // stride_w + 1) oshape = (batch, out_channel // oc_bn, out_height, out_width, oc_bn) - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_height), name='ry') - rx = tvm.reduce_axis((0, kernel_width), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_height), name='ry') + rx = te.reduce_axis((0, kernel_width), name='rx') block_h = cfg["block_oh"].val block_w = cfg["block_ow"].val @@ -261,17 +262,17 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, else: temp = data - conv = tvm.compute( + conv = te.compute( cshape, lambda nn, ff, yy, xx, ff_v: \ - tvm.sum( - temp[nn, rc//ic_bn, yy * stride_h + ry, xx * stride_w + rx, rc%ic_bn]. \ - astype(out_dtype) * - kernel[ff, rc//ic_bn, ry, rx, rc%ic_bn, ff_v].astype(out_dtype), - axis=[rc, ry, rx]), tag="conv2d_NCHWc", name='conv2d_NCHWc') + te.sum( + temp[nn, rc//ic_bn, yy * stride_h + ry, xx * stride_w + rx, rc%ic_bn]. \ + astype(out_dtype) * + kernel[ff, rc//ic_bn, ry, rx, rc%ic_bn, ff_v].astype(out_dtype), + axis=[rc, ry, rx]), tag="conv2d_NCHWc", name='conv2d_NCHWc') if DOUNPACK: - output = tvm.compute( + output = te.compute( oshape, lambda nn, ff, yy, xx, ff_v: conv[nn][ff][yy][xx][ff_v], @@ -297,8 +298,8 @@ def schedule_conv2d_NCHWc(cfg, outs): s: Schedule The computation schedule for conv2d_nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -344,7 +345,7 @@ def _schedule_cl_spatialpack_NCHWc(cfg, s, op): # this part will be folded during Relay fold_constant pass. s[data].pragma(s[data].op.axis[0], "debug_skip_region") s[kernel].pragma(s[kernel].op.axis[0], "debug_skip_region") - elif isinstance(kernel.op, tvm.tensor.ComputeOp) and kernel.name == "kernel_vec": + elif isinstance(kernel.op, tvm.te.ComputeOp) and kernel.name == "kernel_vec": # data and kernel are not pre-computed, schedule layout transform here. # TODO(@Laurawly): Add schedule for data and kernel pack pass @@ -356,9 +357,9 @@ def _schedule_cl_spatialpack_NCHWc(cfg, s, op): z_factor = 1 y_factor = 1 x_factor = 16 - thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z") - thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y") - thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x") + thread_z = te.thread_axis((0, z_factor), "threadIdx.z") + thread_y = te.thread_axis((0, y_factor), "threadIdx.y") + thread_x = te.thread_axis((0, x_factor), "threadIdx.x") _, co, oh, ow, vc = s[conv].op.axis ooh, ioh = s[conv].split(oh, factor=OUTPUT_BLOCK_HEIGHT) oow, iow = s[conv].split(ow, factor=OUTPUT_BLOCK_WIDTH) @@ -371,9 +372,9 @@ def _schedule_cl_spatialpack_NCHWc(cfg, s, op): s[conv].bind(oohi, thread_z) s[conv].bind(oowi, thread_y) s[conv].bind(vci, thread_x) - s[conv].bind(ooho, tvm.thread_axis("blockIdx.z")) - s[conv].bind(oowo, tvm.thread_axis("blockIdx.y")) - s[conv].bind(coi, tvm.thread_axis("blockIdx.x")) + s[conv].bind(ooho, te.thread_axis("blockIdx.z")) + s[conv].bind(oowo, te.thread_axis("blockIdx.y")) + s[conv].bind(coi, te.thread_axis("blockIdx.x")) # schedule conv_L s[conv_L].compute_at(s[conv], vci) @@ -424,9 +425,9 @@ def conv2d_nchw(data, kernel, stride, padding, dilation, out_dtype='float32'): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints stride size, or [stride_height, stride_width] @@ -434,7 +435,7 @@ def conv2d_nchw(data, kernel, stride, padding, dilation, out_dtype='float32'): padding size, or [pad_height, pad_width] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert data.shape[0].value == 1, "only support batch size=1 convolution on intel gpu" @@ -456,8 +457,8 @@ def schedule_conv2d_nchw(outs): s: Schedule The computation schedule for conv2d_nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -483,9 +484,9 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, out_dtype='float16'): out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1) oshape = (batch, out_channel, out_height, out_width) - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') if stride_h == 2: if num_filter + kernel_h == 515: @@ -529,20 +530,20 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, out_dtype='float16'): cshape = (batch, out_channel // nv, c_h, c_w, nv) kvshape = (num_filter // nv, channel, kernel_h, kernel_w, nv) - kernel_vec = tvm.compute( + kernel_vec = te.compute( kvshape, lambda co, ci, kh, kw, vc: kernel[co*nv + vc][ci][kh][kw], name='kernel_vec') - conv = tvm.compute( + conv = te.compute( cshape, lambda nn, ff, yy, xx, vc: \ - tvm.sum( - temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) * - kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype), - axis=[rc, ry, rx]), name='conv', attrs=attrs) + te.sum( + temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) * + kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype), + axis=[rc, ry, rx]), name='conv', attrs=attrs) - output = tvm.compute( + output = te.compute( oshape, lambda nn, ff, yy, xx: conv[nn][ff//nv][yy][xx][ff%nv], @@ -573,9 +574,9 @@ def _schedule_cl_spatialpack(s, op): z_factor = 1 y_factor = 1 x_factor = 16 - thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z") - thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y") - thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x") + thread_z = te.thread_axis((0, z_factor), "threadIdx.z") + thread_y = te.thread_axis((0, y_factor), "threadIdx.y") + thread_x = te.thread_axis((0, x_factor), "threadIdx.x") _, co, oh, ow, vc = s[conv].op.axis ooh, ioh = s[conv].split(oh, factor=OUTPUT_BLOCK_HEIGHT) oow, iow = s[conv].split(ow, factor=OUTPUT_BLOCK_WIDTH) @@ -588,9 +589,9 @@ def _schedule_cl_spatialpack(s, op): s[conv].bind(oohi, thread_z) s[conv].bind(oowi, thread_y) s[conv].bind(vci, thread_x) - s[conv].bind(ooho, tvm.thread_axis("blockIdx.z")) - s[conv].bind(oowo, tvm.thread_axis("blockIdx.y")) - s[conv].bind(coi, tvm.thread_axis("blockIdx.x")) + s[conv].bind(ooho, te.thread_axis("blockIdx.z")) + s[conv].bind(oowo, te.thread_axis("blockIdx.y")) + s[conv].bind(coi, te.thread_axis("blockIdx.x")) # schedule conv_L s[conv_L].compute_at(s[conv], vci) diff --git a/topi/python/topi/intel_graphics/conv2d_alter_op.py b/topi/python/topi/intel_graphics/conv2d_alter_op.py index e95e59f4c6d7c..bbe5e7f296cf9 100644 --- a/topi/python/topi/intel_graphics/conv2d_alter_op.py +++ b/topi/python/topi/intel_graphics/conv2d_alter_op.py @@ -18,6 +18,7 @@ """Conv2D alter op and legalize functions for x86""" import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -74,10 +75,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, - kh, kw, ic_bn, oc_bn), dtype=kernel_dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn, + kh, kw, ic_bn, oc_bn), dtype=kernel_dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"], new_attrs["out_layout"], out_dtype], "conv2d_NCHWc.intel_graphics") diff --git a/topi/python/topi/intel_graphics/depthwise_conv2d.py b/topi/python/topi/intel_graphics/depthwise_conv2d.py index 17f19435b62f1..618ef50481b9c 100644 --- a/topi/python/topi/intel_graphics/depthwise_conv2d.py +++ b/topi/python/topi/intel_graphics/depthwise_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """Schedule for depthwise_conv2d with auto fusion""" import tvm +from tvm import te from tvm import autotvm from ..util import traverse_inline from .. import tag @@ -44,8 +45,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'depthwise_conv2d_nchw': @@ -75,7 +76,7 @@ def _callback(op): ##### space definition end ##### s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -100,15 +101,15 @@ def _callback(op): kernel_scope, n = s[output].split(n, nparts=1) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -123,9 +124,9 @@ def _callback(op): fused, tx = s[load].split(fused, cfg["tile_x"].size[2]) fused, ty = s[load].split(fused, cfg["tile_y"].size[2]) fused, tz = s[load].split(fused, cfg["tile_f"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) @@ -148,8 +149,8 @@ def schedule_depthwise_conv2d_nhwc(outs): s: Schedule The computation schedule for depthwise_conv2d nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(temp, Filter, DepthwiseConv2d): s[temp].compute_inline() @@ -161,8 +162,8 @@ def _schedule(temp, Filter, DepthwiseConv2d): Output = outs[0].op.output(0) s[DepthwiseConv2d].set_scope("local") - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") b, h, w, c = s[Output].op.axis @@ -206,7 +207,7 @@ def traverse(OP): if OP.tag == 'depthwise_conv2d_nhwc': PaddedInput = OP.input_tensors[0] Filter = OP.input_tensors[1] - if isinstance(Filter.op, tvm.tensor.ComputeOp) and 'dilate' in Filter.op.tag: + if isinstance(Filter.op, tvm.te.ComputeOp) and 'dilate' in Filter.op.tag: s[Filter].compute_inline() DepthwiseConv2d = OP.output(0) _schedule(PaddedInput, Filter, DepthwiseConv2d) @@ -232,14 +233,14 @@ def schedule_depthwise_conv2d_backward_input_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt input with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Padded_out_grad, In_grad): s[Padded_out_grad].compute_inline() - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") _, h, w, c = In_grad.op.axis fused_hwc = s[In_grad].fuse(h, w, c) @@ -277,13 +278,13 @@ def schedule_depthwise_conv2d_backward_weight_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt weight with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Weight_grad): - block_x = tvm.thread_axis("blockIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_y = te.thread_axis("threadIdx.y") + thread_x = te.thread_axis("threadIdx.x") db, dh, dw = Weight_grad.op.reduce_axis diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index f774e76c0ccd6..d195928570860 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return """conv2d schedule on ARM Mali GPU""" import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.autotvm.task.space import get_factors @@ -38,10 +39,10 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height, filter_width, num_filter_block] @@ -60,7 +61,7 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, @@ -83,7 +84,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): s: Schedule The computation schedule for conv2d """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): # schedule conv2d @@ -100,7 +101,7 @@ def _callback(op): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec) @@ -121,12 +122,12 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): BW, TW, VW = cfg["tile_ow"].size # schedule padding - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data s[data_pad].compute_inline() # schedule data packing - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated': + if isinstance(data_vec.op, tvm.te.ComputeOp) and data_vec.op.name == 'data_vec_undilated': _, h, w, ci, _, _, vh, vw = s[data_vec].op.axis else: _, h, w, ci, vh, vw = s[data_vec].op.axis @@ -136,7 +137,7 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): if vw.dom.extent.value < max_unroll: s[data_vec].unroll(vw) - if isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and kernel_vec.name == 'kernel_vec': + if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec': if autotvm.GLOBAL_SCOPE.in_tuning: # kernel packing will be pre-computed during compilation, so we skip # this part to make tuning records correct @@ -147,8 +148,8 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): fused = s[kernel_vec].fuse(co, ci, kh, kw, vc) fused, vec = s[kernel_vec].split(fused, VC) bb, tt = s[kernel_vec].split(fused, max_threads) - s[kernel_vec].bind(bb, tvm.thread_axis("blockIdx.x")) - s[kernel_vec].bind(tt, tvm.thread_axis("threadIdx.x")) + s[kernel_vec].bind(bb, te.thread_axis("blockIdx.x")) + s[kernel_vec].bind(tt, te.thread_axis("threadIdx.x")) if VC in vec_size: s[kernel_vec].vectorize(vec) @@ -199,7 +200,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.mali") def schedule_conv2d_nchw_winograd(cfg, outs): - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_conv2d_output' in op.tag: @@ -271,54 +272,55 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til assert CO % bna == 0 and P_round % bnb == 0 # pack input tile - input_tile = tvm.compute((CI, P_round // bnb, alpha, alpha, bnb), lambda ci, b, eps, nu, bb: \ - tvm.if_then_else( - b * bnb + bb < P, - data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps] - [(b*bnb+bb) % nW * m + nu], tvm.const(0, data_pad.dtype)), name='d') + input_tile = te.compute( + (CI, P_round // bnb, alpha, alpha, bnb), lambda ci, b, eps, nu, bb: \ + tvm.tir.if_then_else( + b * bnb + bb < P, + data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps] + [(b*bnb+bb) % nW * m + nu], tvm.tir.const(0, data_pad.dtype)), name='d') # transform kernel if pre_computed: U = kernel else: - r_kh = tvm.reduce_axis((0, KH), 'r_kh') - r_kw = tvm.reduce_axis((0, KW), 'r_kw') - U = tvm.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco: - tvm.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), name='U') + r_kh = te.reduce_axis((0, KH), 'r_kh') + r_kw = te.reduce_axis((0, KW), 'r_kw') + U = te.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco: + te.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), name='U') # transform image - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_b') - V = tvm.compute((alpha, alpha, P_round // bnb, CI, bnb), lambda eps, nu, p, ci, vp: - tvm.sum(input_tile[ci][p][r_a][r_b][vp] * B[r_a][eps] * B[r_b][nu], - axis=[r_a, r_b]), name='V') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_b') + V = te.compute((alpha, alpha, P_round // bnb, CI, bnb), lambda eps, nu, p, ci, vp: + te.sum(input_tile[ci][p][r_a][r_b][vp] * B[r_a][eps] * B[r_b][nu], + axis=[r_a, r_b]), name='V') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod # batch gemm - ci = tvm.reduce_axis((0, CI), name='c') - M = tvm.compute((alpha, alpha, CO, P_round), lambda eps, nu, co, p: - tvm.sum(U[eps][nu][idxdiv(co, bna)][ci][idxmod(co, bna)] * - V[eps][nu][idxdiv(p, bnb)][ci][idxmod(p, bnb)], axis=ci), name='M') + ci = te.reduce_axis((0, CI), name='c') + M = te.compute((alpha, alpha, CO, P_round), lambda eps, nu, co, p: + te.sum(U[eps][nu][idxdiv(co, bna)][ci][idxmod(co, bna)] * + V[eps][nu][idxdiv(p, bnb)][ci][idxmod(p, bnb)], axis=ci), name='M') - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_b') - Y = tvm.compute((CO, P, m, m), lambda co, p, vh, vw: - tvm.sum(M[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], - axis=[r_a, r_b]), name='Y') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_b') + Y = te.compute((CO, P, m, m), lambda co, p, vh, vw: + te.sum(M[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], + axis=[r_a, r_b]), name='Y') # unpack output - output = tvm.compute((N, CO, H, W), lambda n, co, h, w: - Y[co, n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), - idxmod(h, m), idxmod(w, m)] - # The following hack term is used to make the padding in batch gemm ("M") - # effective, otherwise the padding will be eliminated by bound inference. - # Use `tvm.expr.Mul` instead of `*` to avoid issues in const folding. - + tvm.expr.Mul(tvm.const(0, out_dtype), - M[alpha-1][alpha-1][CO-1][P_round-1]), - name='output', tag='winograd_conv2d_output') + output = te.compute((N, CO, H, W), lambda n, co, h, w: + Y[co, n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), + idxmod(h, m), idxmod(w, m)] + # The following hack term is used to make the padding in batch gemm ("M") + # effective, otherwise the padding will be eliminated by bound inference. + # Use `tvm.tir.Mul` instead of `*` to avoid issues in const folding. + + tvm.tir.Mul(tvm.tir.const(0, out_dtype), + M[alpha-1][alpha-1][CO-1][P_round-1]), + name='output', tag='winograd_conv2d_output') # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * CO * H * W * KH * KW * CI) @@ -339,7 +341,7 @@ def _schedule_winograd(cfg, s, op): s[data_pad].compute_inline() # transform kernel - if isinstance(U.op, tvm.tensor.ComputeOp): + if isinstance(U.op, tvm.te.ComputeOp): kernel, G = s[U].op.input_tensors s[G].compute_inline() eps, nu, co, ci, vco, = s[U].op.axis @@ -355,7 +357,7 @@ def _schedule_winograd(cfg, s, op): tile_and_bind(s, U, co, ci, 1, 256) # dilation - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # transform image @@ -370,8 +372,8 @@ def _schedule_winograd(cfg, s, op): fused = s[V].fuse(p, ci) bb, tt = cfg['tile_t1'].apply(s, V, fused) - s[V].bind(bb, tvm.thread_axis('blockIdx.x')) - s[V].bind(tt, tvm.thread_axis('threadIdx.x')) + s[V].bind(bb, te.thread_axis('blockIdx.x')) + s[V].bind(tt, te.thread_axis('threadIdx.x')) eps, nu, p, ci, vp = s[VL].op.axis r_a, r_b = s[VL].op.reduce_axis @@ -416,8 +418,8 @@ def _schedule_winograd(cfg, s, op): s[output].unroll(wi) fused = s[output].fuse(n, co, h, w) bb, tt = cfg['tile_t2'].apply(s, output, fused) - s[output].bind(bb, tvm.thread_axis('blockIdx.x')) - s[output].bind(tt, tvm.thread_axis('threadIdx.x')) + s[output].bind(bb, te.thread_axis('blockIdx.x')) + s[output].bind(tt, te.thread_axis('threadIdx.x')) s[Y].compute_at(s[output], tt) @@ -451,7 +453,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): data, kernel = tinfos out_dtype = out_type.dtype - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv if topi_tmpl == "conv2d_nchw_spatial_pack.mali": assert data_layout == "NCHW" and kernel_layout == "OIHW" @@ -462,7 +464,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OIHW%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nchw_spatial_pack.mali") @@ -488,10 +490,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['tile_size'] = tile_size new_data = data - new_kernel = tvm.placeholder((KH + tile_size - 1, - KW + tile_size -1, - idxd(CO, VC), CI, VC), - kernel.dtype) + new_kernel = te.placeholder((KH + tile_size - 1, + KW + tile_size -1, + idxd(CO, VC), CI, VC), + kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], 'conv2d_nchw_winograd.mali') @@ -508,10 +510,10 @@ def tile_and_bind(s, tensor, y, x, y_factor, x_factor=None): """ tile and bind to GPU threads """ x_factor = x_factor or y_factor yo, xo, yi, xi = s[tensor].tile(y, x, y_factor, x_factor) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) return yo, xo, yi, xi @@ -522,11 +524,11 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) s[tensor].reorder(zo, yo, xo, zi, yi, xi) return zo, yo, xo, zi, yi, xi diff --git a/topi/python/topi/mali/dense.py b/topi/python/topi/mali/dense.py index 3b233e92ba8ae..8ec5d19c9fa0b 100644 --- a/topi/python/topi/mali/dense.py +++ b/topi/python/topi/mali/dense.py @@ -16,10 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable """dense schedule on ARM Mali GPU""" - -from __future__ import absolute_import as _abs - -import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -50,8 +47,8 @@ def schedule_dense(cfg, outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -82,10 +79,10 @@ def _callback(op): by, ty, yi = cfg['tile_y'].apply(s, output, y) bx, tx, xi = cfg['tile_x'].apply(s, output, x) - s[output].bind(by, tvm.thread_axis('blockIdx.y')) - s[output].bind(bx, tvm.thread_axis('blockIdx.x')) - s[output].bind(ty, tvm.thread_axis('threadIdx.y')) - s[output].bind(tx, tvm.thread_axis('threadIdx.x')) + s[output].bind(by, te.thread_axis('blockIdx.y')) + s[output].bind(bx, te.thread_axis('blockIdx.x')) + s[output].bind(ty, te.thread_axis('threadIdx.y')) + s[output].bind(tx, te.thread_axis('threadIdx.x')) if cfg['tile_y'].size[-1] < max_unroll: s[output].unroll(yi) @@ -113,6 +110,6 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): axis = axis or s[tensor].op.axis fused = s[tensor].fuse(*axis) bx, tx = s[tensor].split(fused, num_thread) - s[tensor].bind(bx, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(tx, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(bx, te.thread_axis("blockIdx.x")) + s[tensor].bind(tx, te.thread_axis("threadIdx.x")) return bx, tx diff --git a/topi/python/topi/mali/depthwise_conv2d.py b/topi/python/topi/mali/depthwise_conv2d.py index 4ff17e534febc..785128c84dfa4 100644 --- a/topi/python/topi/mali/depthwise_conv2d.py +++ b/topi/python/topi/mali/depthwise_conv2d.py @@ -18,6 +18,7 @@ """depthwise_conv2d schedule on ARM Mali GPU""" import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -47,8 +48,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(pad_data, kernel, conv): """schedule depthwise_conv2d""" @@ -75,7 +76,7 @@ def _schedule(pad_data, kernel, conv): tile_and_bind3d(s, pad_data, c, y, x, cfg["tile_c"].size[1], 1, 1) # schedule dilation - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # schedule conv @@ -93,12 +94,12 @@ def _schedule(pad_data, kernel, conv): bx, tx, xi = cfg['tile_x'].apply(s, output, x) bc = s[output].fuse(n, bc) - s[output].bind(bc, tvm.thread_axis("blockIdx.z")) - s[output].bind(tc, tvm.thread_axis("threadIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bc, te.thread_axis("blockIdx.z")) + s[output].bind(tc, te.thread_axis("threadIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) di, dj = s[OL].op.reduce_axis s[OL].unroll(di) @@ -134,10 +135,10 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) return zo, zi, yo, yi, xo, xi diff --git a/topi/python/topi/math.py b/topi/python/topi/math.py index 148d53a54cfe0..0fceaadfc09c3 100644 --- a/topi/python/topi/math.py +++ b/topi/python/topi/math.py @@ -16,8 +16,8 @@ # under the License. """Elementwise operators""" # pylint: disable=redefined-builtin -from __future__ import absolute_import as _abs import tvm +from tvm import te from . import tag from . import cpp @@ -28,16 +28,16 @@ def identity(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ # pylint: disable=unnecessary-lambda - return tvm.compute(x.shape, lambda *i: x(*i)) + return te.compute(x.shape, lambda *i: x(*i)) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -46,16 +46,16 @@ def negative(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ # pylint: disable=unnecessary-lambda - return tvm.compute(x.shape, lambda *i: -x(*i)) + return te.compute(x.shape, lambda *i: -x(*i)) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -64,15 +64,15 @@ def exp(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.exp(x(*i))) + return te.compute(x.shape, lambda *i: te.exp(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -81,15 +81,15 @@ def erf(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.erf(x(*i))) + return te.compute(x.shape, lambda *i: te.erf(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -98,15 +98,15 @@ def tanh(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.tanh(x(*i))) + return te.compute(x.shape, lambda *i: te.tanh(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -115,15 +115,15 @@ def cos(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.cos(x(*i))) + return te.compute(x.shape, lambda *i: te.cos(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -132,15 +132,15 @@ def sin(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.sin(x(*i))) + return te.compute(x.shape, lambda *i: te.sin(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -149,15 +149,15 @@ def atan(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.atan(x(*i))) + return te.compute(x.shape, lambda *i: te.atan(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) def floor(x): @@ -165,15 +165,15 @@ def floor(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.floor(x(*i))) + return te.compute(x.shape, lambda *i: te.floor(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -182,15 +182,15 @@ def ceil(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.ceil(x(*i))) + return te.compute(x.shape, lambda *i: te.ceil(x(*i))) def sign(x): @@ -198,12 +198,12 @@ def sign(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.sign(x) @@ -215,15 +215,15 @@ def trunc(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.trunc(x(*i))) + return te.compute(x.shape, lambda *i: te.trunc(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -232,15 +232,15 @@ def abs(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.abs(x(*i))) + return te.compute(x.shape, lambda *i: te.abs(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -249,15 +249,15 @@ def isnan(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.isnan(x(*i))) + return te.compute(x.shape, lambda *i: te.isnan(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -266,15 +266,15 @@ def round(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.round(x(*i))) + return te.compute(x.shape, lambda *i: te.round(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -283,15 +283,15 @@ def log(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.log(x(*i))) + return te.compute(x.shape, lambda *i: te.log(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -300,15 +300,15 @@ def sqrt(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.sqrt(x(*i))) + return te.compute(x.shape, lambda *i: te.sqrt(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -317,15 +317,15 @@ def rsqrt(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.rsqrt(x(*i))) + return te.compute(x.shape, lambda *i: te.rsqrt(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -334,15 +334,15 @@ def sigmoid(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.sigmoid(x(*i))) + return te.compute(x.shape, lambda *i: te.sigmoid(x(*i))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -351,17 +351,17 @@ def left_shift(x, n): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. n : int Number of bits. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: x(*i) << n) + return te.compute(x.shape, lambda *i: x(*i) << n) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -370,17 +370,17 @@ def right_shift(x, n): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. n : int Number of bits. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: x(*i) >> n) + return te.compute(x.shape, lambda *i: x(*i) >> n) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -390,7 +390,7 @@ def clip(x, a_min, a_max): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. a_min : int or float Minimum value. @@ -399,15 +399,15 @@ def clip(x, a_min, a_max): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ def _compute(*indices): value = x(*indices) - const_min = tvm.const(a_min, value.dtype) - const_max = tvm.const(a_max, value.dtype) - return tvm.max(tvm.min(value, const_max), const_min) - return tvm.compute(x.shape, _compute) + const_min = tvm.tir.const(a_min, value.dtype) + const_max = tvm.tir.const(a_max, value.dtype) + return tvm.te.max(tvm.te.min(value, const_max), const_min) + return te.compute(x.shape, _compute) def cast(x, dtype): @@ -415,7 +415,7 @@ def cast(x, dtype): Parameters ---------- - x : tvm.Tensor or Expr + x : tvm.te.Tensor or Expr Input argument. dtype : str @@ -423,11 +423,11 @@ def cast(x, dtype): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - if isinstance(x, tvm.tensor.Tensor): - return tvm.compute( + if isinstance(x, te.tensor.Tensor): + return te.compute( x.shape, lambda *i: x(*i).astype(dtype), tag=tag.ELEMWISE) # pylint: disable=import-outside-toplevel from tvm.tir import _ffi_api @@ -439,7 +439,7 @@ def reinterpret(x, dtype): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. dtype : str @@ -447,7 +447,7 @@ def reinterpret(x, dtype): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.reinterpret(x, dtype) @@ -458,12 +458,12 @@ def fast_exp(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.fast_exp(x, x.dtype, tag.ELEMWISE) diff --git a/topi/python/topi/nn/batch_matmul.py b/topi/python/topi/nn/batch_matmul.py index d69562c4daf6c..0d9f3510d097e 100644 --- a/topi/python/topi/nn/batch_matmul.py +++ b/topi/python/topi/nn/batch_matmul.py @@ -16,8 +16,7 @@ # under the License. """Binary Neural Network (BNN) Operators""" # pylint: disable=invalid-name -from __future__ import absolute_import as _abs -import tvm +from tvm import te from ..util import get_const_tuple def batch_matmul(x, y): @@ -26,15 +25,15 @@ def batch_matmul(x, y): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len(y.shape) == 3, "only support 3-dim batch_matmul" @@ -44,7 +43,7 @@ def batch_matmul(x, y): assert x_shape[2] == y_shape[2], "shapes of x and y is inconsistant" batch, M, K = x.shape N = y.shape[1] - k = tvm.reduce_axis((0, K), name='k') - return tvm.compute((batch, M, N), - lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k), - tag='batch_matmul') + k = te.reduce_axis((0, K), name='k') + return te.compute((batch, M, N), + lambda b, i, j: te.sum(x[b, i, k] * y[b, j, k], axis=k), + tag='batch_matmul') diff --git a/topi/python/topi/nn/bitserial_conv2d.py b/topi/python/topi/nn/bitserial_conv2d.py index f18a5aae7eedf..e1a7697ca4deb 100644 --- a/topi/python/topi/nn/bitserial_conv2d.py +++ b/topi/python/topi/nn/bitserial_conv2d.py @@ -17,8 +17,8 @@ # pylint: disable=invalid-name, too-many-locals, too-many-arguments # pylint: disable=unused-argument, redefined-builtin """Bitserial Conv2D operators""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from .pad import pad from .util import get_pad_tuple from .bitserial_util import bitpack @@ -30,10 +30,10 @@ def bitserial_conv2d_nchw(data, kernel, stride, padding, activation_bits, weight Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -59,7 +59,7 @@ def bitserial_conv2d_nchw(data, kernel, stride, padding, activation_bits, weight Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert isinstance(stride, int) or len(stride) == 2 @@ -88,32 +88,32 @@ def bitserial_conv2d_nchw(data, kernel, stride, padding, activation_bits, weight out_height = (in_height - kernel_h + TPAD + DPAD) // stride_h + 1 out_width = (in_width - kernel_w + LPAD + RPAD) // stride_w + 1 - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - b1 = tvm.reduce_axis((0, activation_bits), name='b1') - b2 = tvm.reduce_axis((0, weight_bits), name='b2') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + b1 = te.reduce_axis((0, activation_bits), name='b1') + b2 = te.reduce_axis((0, weight_bits), name='b2') if unipolar: def _conv(nn, ff, yy, xx): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum( - ((tvm.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & - Filter_q[ff, rc, ry, rx, b2]) - - tvm.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & - ~Filter_q[ff, rc, ry, rx, b2])) + return te.sum( + ((tvm.tir.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & + Filter_q[ff, rc, ry, rx, b2]) - + tvm.tir.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & + ~Filter_q[ff, rc, ry, rx, b2])) << (b1b2)).astype(out_dtype), axis=[rc, ry, rx, b2, b1]).astype(out_dtype) else: def _conv(nn, ff, yy, xx): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & Filter_q[ff, rc, ry, rx, b2])<< (b1b2)).astype(out_dtype), - axis=[rc, ry, rx, b2, b1]).astype(out_dtype) + axis=[rc, ry, rx, b2, b1]).astype(out_dtype) - return tvm.compute((batch, out_channel, out_height, out_width), _conv, - name="Conv2dOutput", tag="bitserial_conv2d_nchw") + return te.compute((batch, out_channel, out_height, out_width), _conv, + name="Conv2dOutput", tag="bitserial_conv2d_nchw") def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight_bits, pack_dtype='uint32', out_dtype='int16', unipolar=True): @@ -121,10 +121,10 @@ def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of two ints @@ -150,7 +150,7 @@ def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ assert isinstance(stride, int) or len(stride) == 2 @@ -180,33 +180,33 @@ def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight out_width = (in_width - kernel_w + LPAD + RPAD) // stride_w + 1 PadInput_q = pad(Input_q, pad_before, pad_after, name="PaddedInput") - rc = tvm.reduce_axis((0, in_channel_q), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - b1 = tvm.reduce_axis((0, activation_bits), name='b1') - b2 = tvm.reduce_axis((0, weight_bits), name='b2') + rc = te.reduce_axis((0, in_channel_q), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + b1 = te.reduce_axis((0, activation_bits), name='b1') + b2 = te.reduce_axis((0, weight_bits), name='b2') if unipolar: def _conv(nn, yy, xx, ff): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum( - ((tvm.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & - Filter_q[ry, rx, rc, ff, b2]) - - tvm.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & - ~Filter_q[ry, rx, rc, ff, b2])) + return te.sum( + ((tvm.tir.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & + Filter_q[ry, rx, rc, ff, b2]) - + tvm.tir.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & + ~Filter_q[ry, rx, rc, ff, b2])) << b1b2).astype(out_dtype), axis=[rc, ry, rx, b2, b1]) else: def _conv(nn, yy, xx, ff): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & Filter_q[ry, rx, rc, ff, b2]) << b1b2).astype(out_dtype), - axis=[rc, ry, rx, b2, b1]) + axis=[rc, ry, rx, b2, b1]) - conv = tvm.compute((batch, out_height, out_width, out_channel), _conv, - name="Conv2dOutput", tag="bitserial_conv2d_nhwc") + conv = te.compute((batch, out_height, out_width, out_channel), _conv, + name="Conv2dOutput", tag="bitserial_conv2d_nhwc") return conv diff --git a/topi/python/topi/nn/bitserial_dense.py b/topi/python/topi/nn/bitserial_dense.py index fa1b5df7d066e..10635d8e9f2ce 100644 --- a/topi/python/topi/nn/bitserial_dense.py +++ b/topi/python/topi/nn/bitserial_dense.py @@ -18,6 +18,7 @@ """Bitserial Dense operator.""" from __future__ import absolute_import import tvm +from tvm import te from topi.util import get_const_tuple from .bitserial_util import bitpack @@ -27,14 +28,14 @@ def bitserial_dense(data, weight, data_bits, weight_bits, pack_dtype='uint32', Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] or 3-D with shape [out_dim, weight_bits, in_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype) @@ -46,18 +47,18 @@ def bitserial_dense(data, weight, data_bits, weight_bits, pack_dtype='uint32', X, WB, _ = get_const_tuple(weight_packed.shape) oshape = (Y, X) - k = tvm.reduce_axis((0, K), name='k') - db = tvm.reduce_axis((0, DB), name='db') - wb = tvm.reduce_axis((0, WB), name='wb') + k = te.reduce_axis((0, K), name='k') + db = te.reduce_axis((0, DB), name='db') + wb = te.reduce_axis((0, WB), name='wb') - matmul_unipolar = tvm.compute(oshape, lambda i, j: tvm.sum( - (tvm.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]) - - tvm.popcount(~weight_packed[j, wb, k] & data_packed[i, db, k])).astype(out_dtype) + matmul_unipolar = te.compute(oshape, lambda i, j: te.sum( + (tvm.tir.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]) - + tvm.tir.popcount(~weight_packed[j, wb, k] & data_packed[i, db, k])).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), - tag='bitserial_dense_unipolar') + tag='bitserial_dense_unipolar') - matmul = tvm.compute(oshape, lambda i, j: tvm.sum( - tvm.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]).astype(out_dtype) + matmul = te.compute(oshape, lambda i, j: te.sum( + tvm.tir.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense') diff --git a/topi/python/topi/nn/bitserial_util.py b/topi/python/topi/nn/bitserial_util.py index def5b5e2e1935..a25aa91198d8e 100644 --- a/topi/python/topi/nn/bitserial_util.py +++ b/topi/python/topi/nn/bitserial_util.py @@ -18,6 +18,7 @@ """Utility functions for bitserial operators""" import numpy as np import tvm +from tvm import te from topi.transform import concatenate from ..util import get_const_int @@ -52,7 +53,7 @@ def bitpack(data, bits, pack_axis, bit_axis, pack_type, name="QuantizeInput"): pack_axis += 1 def _bitpack(*indices): - packed_data = [tvm.const(0, pack_type)] * bits + packed_data = [tvm.tir.const(0, pack_type)] * bits for k in range(data_width): # Translate indices for packed data back to original idx = [0] * n @@ -68,7 +69,8 @@ def _bitpack(*indices): element = data(*idx) for b in range(bits): - extracted_bit = ((element & tvm.const(masks[b], "int32")) >> b).astype(pack_type) + extracted_bit = ( + (element & tvm.tir.const(masks[b], "int32")) >> b).astype(pack_type) packed_data[b] = (packed_data[b] | extracted_bit) if k < data_width - 1: packed_data[b] = packed_data[b] << 1 @@ -77,7 +79,7 @@ def _bitpack(*indices): return tuple(packed_data) return tuple(packed_data) - output_tuple = tvm.compute(bitserial_oshape, _bitpack, name=name, tag='bitpack') + output_tuple = te.compute(bitserial_oshape, _bitpack, name=name, tag='bitpack') if bits > 1: return concatenate(output_tuple, axis=bit_axis) diff --git a/topi/python/topi/nn/bnn.py b/topi/python/topi/nn/bnn.py index e3b841e0b673e..d7355fb0c2978 100644 --- a/topi/python/topi/nn/bnn.py +++ b/topi/python/topi/nn/bnn.py @@ -17,6 +17,7 @@ """Binary Neural Network (BNN) Operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from ..util import simplify, get_const_int @@ -26,7 +27,7 @@ def binarize_pack(data, axis=None, name="PackedInput"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D input, can be any layout. axis : None or int @@ -38,7 +39,7 @@ def binarize_pack(data, axis=None, name="PackedInput"): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D, the same layout as input, dtype is uint32. """ ishape = data.shape @@ -47,11 +48,11 @@ def binarize_pack(data, axis=None, name="PackedInput"): assert get_const_int(ishape[axis]) % 32 == 0 n = len(ishape) oshape = tuple(simplify(ishape[i] // 32) if i == axis \ - else ishape[i] for i in range(n)) + else ishape[i] for i in range(n)) def _binarize_pack(*indices): start_idx = [indices[i] * 32 if i == axis else indices[i] for i in range(n)] - packed = tvm.const(0, 'uint32') + packed = tvm.tir.const(0, 'uint32') for j in range(32): idx = [start_idx[i] + j if i == axis else start_idx[i] for i in range(n)] sign = (data(*idx) >= 0).astype("uint32") @@ -61,7 +62,7 @@ def _binarize_pack(*indices): packed = packed << 1 raise RuntimeError("not resach") - return tvm.compute(oshape, _binarize_pack, name=name, tag='binarize_pack') + return te.compute(oshape, _binarize_pack, name=name, tag='binarize_pack') def binary_dense(data, weight): @@ -69,15 +70,15 @@ def binary_dense(data, weight): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim], dtype is uint32. - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim], dtype is uint32. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim], dtype is float32. """ assert data.dtype == 'uint32' and weight.dtype == 'uint32', \ @@ -86,11 +87,11 @@ def binary_dense(data, weight): "only support 2-dim binary dense" batch, in_dim = data.shape out_dim, _ = weight.shape - k = tvm.reduce_axis((0, in_dim), name='k') - matmul = tvm.compute((batch, out_dim), lambda i, j: \ - tvm.sum(tvm.popcount(data[i, k] ^ weight[j, k]), axis=k), \ - tag='binary_dense') + k = te.reduce_axis((0, in_dim), name='k') + matmul = te.compute((batch, out_dim), lambda i, j: \ + te.sum(tvm.tir.popcount(data[i, k] ^ weight[j, k]), axis=k), \ + tag='binary_dense') - return tvm.compute((batch, out_dim), lambda i, j: \ - 32 * in_dim - 2. * matmul(i, j), \ - tag=tag.ELEMWISE) + return te.compute((batch, out_dim), lambda i, j: \ + 32 * in_dim - 2. * matmul(i, j), \ + tag=tag.ELEMWISE) diff --git a/topi/python/topi/nn/conv1d.py b/topi/python/topi/nn/conv1d.py index 4565fd2f5a461..8049dff01ffad 100644 --- a/topi/python/topi/nn/conv1d.py +++ b/topi/python/topi/nn/conv1d.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """1D convolution operators.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .pad import pad from ..util import simplify from .util import get_pad_tuple1d @@ -34,11 +33,11 @@ def conv1d(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D input shape [batch, in_channel, in_width] for layout == 'NCW' and [batch, in_width, in_channel] for layout == 'NWC' - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D kernel with shape [num_filter, in_channel, filter_size] for layout == 'NCW' and [filter_size, in_channel, num_filter] for layout == 'NWC' @@ -81,10 +80,10 @@ def conv1d_ncw(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D with shape [batch, in_channel, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D with shape [num_filter, in_channel, filter_size] strides : int or tuple @@ -123,12 +122,12 @@ def conv1d_ncw(data, temp = pad(data, pad_before, pad_after, name='pad_temp') # Compute graph - rc = tvm.reduce_axis((0, in_channels), name='rc') - rw = tvm.reduce_axis((0, kernel_size), name='rw') + rc = te.reduce_axis((0, in_channels), name='rc') + rw = te.reduce_axis((0, kernel_size), name='rw') - return tvm.compute( + return te.compute( (batch, out_channels, out_width), - lambda b, c, w: tvm.sum( + lambda b, c, w: te.sum( temp[b, rc, w * strides + rw * dilation].astype(out_dtype) * kernel[c, rc, rw].astype(out_dtype), axis=[rc, rw]), @@ -145,10 +144,10 @@ def conv1d_nwc(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D with shape [batch, in_width, in_channel] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D with shape [filter_size, in_channel, num_filter] strides : int or tuple @@ -187,12 +186,12 @@ def conv1d_nwc(data, temp = pad(data, pad_before, pad_after, name='pad_temp') # Compute graph - rc = tvm.reduce_axis((0, in_channels), name='rc') - rw = tvm.reduce_axis((0, kernel_size), name='rw') + rc = te.reduce_axis((0, in_channels), name='rc') + rw = te.reduce_axis((0, kernel_size), name='rw') - return tvm.compute( + return te.compute( (batch, out_width, out_channels), - lambda b, w, c: tvm.sum( + lambda b, w, c: te.sum( temp[b, w * strides + rw * dilation, rc].astype(out_dtype) * kernel[rw, rc, c].astype(out_dtype), axis=[rc, rw]), diff --git a/topi/python/topi/nn/conv1d_transpose.py b/topi/python/topi/nn/conv1d_transpose.py index 8d224247db011..1895b1f04a1f4 100644 --- a/topi/python/topi/nn/conv1d_transpose.py +++ b/topi/python/topi/nn/conv1d_transpose.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Transposed 1D convolution operators (sometimes called Deconvolution).""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .dilate import dilate from .pad import pad from ..util import simplify @@ -29,10 +28,10 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D with shape [batch, in_channel, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D with shape [in_channel, num_filter, filter_width] stride : ints @@ -46,7 +45,7 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, out_channel, out_width] """ @@ -63,18 +62,18 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype): data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name='data_pad') # transpose kernel, switch kernel layout to IOW - kernel = tvm.compute((channels_out, channels_in, kernel_width), \ - lambda o, i, w: kernel[i][o][kernel_width-1-w],\ - name='kernel') + kernel = te.compute((channels_out, channels_in, kernel_width), \ + lambda o, i, w: kernel[i][o][kernel_width-1-w],\ + name='kernel') # convolution _, _, data_width = data.shape out_w = simplify(data_width - kernel_width + 1) - dc = tvm.reduce_axis((0, channels_in), name='dc') - dw = tvm.reduce_axis((0, kernel_width), name='dw') - output = tvm.compute( + dc = te.reduce_axis((0, channels_in), name='dc') + dw = te.reduce_axis((0, kernel_width), name='dw') + output = te.compute( (batch, channels_out, out_w), - lambda b, c, w: tvm.sum( + lambda b, c, w: te.sum( data[b, dc, w+dw].astype(out_dtype) * kernel[c, dc, dw].astype(out_dtype), axis=[dc, dw]), tag="conv1d_transpose_ncw") diff --git a/topi/python/topi/nn/conv2d.py b/topi/python/topi/nn/conv2d.py index a7a75ed0ef0ca..4c7941b49692c 100644 --- a/topi/python/topi/nn/conv2d.py +++ b/topi/python/topi/nn/conv2d.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs from collections import namedtuple import tvm +from tvm import te from .pad import pad from .util import get_pad_tuple @@ -36,10 +37,10 @@ def conv2d(input, filter, strides, padding, dilation, layout='NCHW', out_dtype=N Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints @@ -58,7 +59,7 @@ def conv2d(input, filter, strides, padding, dilation, layout='NCHW', out_dtype=N Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ # search platform specific declaration first @@ -170,10 +171,10 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -189,7 +190,7 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if out_dtype is None: @@ -220,12 +221,12 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - return tvm.compute( + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + return te.compute( (batch, out_channel, out_height, out_width), - lambda nn, ff, yy, xx: tvm.sum( + lambda nn, ff, yy, xx: te.sum( temp[nn, rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * Filter[ff, rc, ry, rx].astype(out_dtype), @@ -237,10 +238,10 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [in_height, in_width, in_channel, batch] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of two ints @@ -256,7 +257,7 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [out_height, out_width, out_channel, batch] """ if out_dtype is None: @@ -287,12 +288,12 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): pad_before = [pad_top, pad_left, 0, 0] pad_after = [pad_down, pad_right, 0, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - Output = tvm.compute( + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + Output = te.compute( (out_height, out_width, out_channel, batch), - lambda yy, xx, ff, nn: tvm.sum( + lambda yy, xx, ff, nn: te.sum( PaddedInput[yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc, nn].astype(out_dtype) * Filter[ry, rx, rc, ff].astype(out_dtype), axis=[ry, rx, rc]), @@ -305,10 +306,10 @@ def conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of two ints @@ -324,7 +325,7 @@ def conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ assert isinstance(stride, int) or len(stride) == 2 @@ -353,12 +354,12 @@ def conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): pad_before = [0, pad_top, pad_left, 0] pad_after = [0, pad_down, pad_right, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - Output = tvm.compute( + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + Output = te.compute( (batch, out_height, out_width, out_channel), - lambda nn, yy, xx, ff: tvm.sum( + lambda nn, yy, xx, ff: te.sum( PaddedInput[nn, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc].astype(out_dtype) * Filter[ry, rx, rc, ff].astype(out_dtype), axis=[ry, rx, rc]), @@ -371,10 +372,10 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 6-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, in_channel_block, num_filter_block] @@ -401,7 +402,7 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ @@ -441,27 +442,27 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou else: data_pad = data - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - - return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - tvm.sum(data_pad[n, - idxdiv(ic, ic_bn), - oh * HSTR + kh * dilation_h, - ow * WSTR + kw * dilation_w, - idxmod(ic, ic_bn)].astype(out_dtype) - * kernel[oc_chunk, - idxdiv(ic, ic_bn), - kh, - kw, - idxmod(ic, ic_bn), - oc_block], - axis=[ic, kh, kw]), - name='conv2d_NCHWc', tag="conv2d_NCHWc") + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + + return te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + te.sum(data_pad[n, + idxdiv(ic, ic_bn), + oh * HSTR + kh * dilation_h, + ow * WSTR + kw * dilation_w, + idxmod(ic, ic_bn)].astype(out_dtype) + * kernel[oc_chunk, + idxdiv(ic, ic_bn), + kh, + kw, + idxmod(ic, ic_bn), + oc_block], + axis=[ic, kh, kw]), + name='conv2d_NCHWc', tag="conv2d_NCHWc") def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layout, @@ -470,10 +471,10 @@ def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layou Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 7-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, in_channel_block/4, num_filter_block, 4] @@ -500,7 +501,7 @@ def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layou Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ @@ -540,53 +541,53 @@ def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layou else: data_pad = data - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') if groups == 1: n_elems = 4 - ic_outer = tvm.reduce_axis((0, in_channel//ic_bn), name='ic_outer') - ic_f_inner = tvm.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') - ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner') - return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - tvm.sum(data_pad[n, - ic_outer, - oh * HSTR + kh * dilation_h, - ow * WSTR + kw * dilation_w, - ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) - * kernel[oc_chunk, - ic_outer, - kh, - kw, - ic_f_inner, - oc_block, - ic_s_inner].astype(out_dtype), - axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), - name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") + ic_outer = te.reduce_axis((0, in_channel//ic_bn), name='ic_outer') + ic_f_inner = te.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') + ic_s_inner = te.reduce_axis((0, n_elems), name='ic_s_inner') + return te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + te.sum(data_pad[n, + ic_outer, + oh * HSTR + kh * dilation_h, + ow * WSTR + kw * dilation_w, + ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) + * kernel[oc_chunk, + ic_outer, + kh, + kw, + ic_f_inner, + oc_block, + ic_s_inner].astype(out_dtype), + axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), + name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") # for int8 group conv support n_elems = 4 ic_chunk = in_channel//ic_bn - ic_outer = tvm.reduce_axis((0, ic_chunk//groups), name='ic_outer') - ic_f_inner = tvm.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') - ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner') + ic_outer = te.reduce_axis((0, ic_chunk//groups), name='ic_outer') + ic_f_inner = te.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') + ic_s_inner = te.reduce_axis((0, n_elems), name='ic_s_inner') oshape = (n, oc_chunk, out_height, out_width, oc_bn) - return tvm.compute(oshape, lambda n, occ, oh, ow, oc_block: - tvm.sum(data_pad[n, - (occ * oc_bn // (oc_chunk * oc_bn // groups)) - * (ic_chunk // groups) + ic_outer, - oh * HSTR + kh, - ow * WSTR + kw, - ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) - * kernel[occ, - ic_outer, - kh, - kw, - ic_f_inner, - oc_block, - ic_s_inner].astype(out_dtype), - axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), - name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") + return te.compute(oshape, lambda n, occ, oh, ow, oc_block: + te.sum(data_pad[n, + (occ * oc_bn // (oc_chunk * oc_bn // groups)) + * (ic_chunk // groups) + ic_outer, + oh * HSTR + kh, + ow * WSTR + kw, + ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) + * kernel[occ, + ic_outer, + kh, + kw, + ic_f_inner, + oc_block, + ic_s_inner].astype(out_dtype), + axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), + name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") def conv2d_winograd_weight_transform(kernel, tile_size): @@ -601,7 +602,7 @@ def conv2d_winograd_weight_transform(kernel, tile_size): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [alpha, alpha, CO, CI] """ shape = get_const_tuple(kernel.shape) @@ -613,12 +614,12 @@ def conv2d_winograd_weight_transform(kernel, tile_size): _, _, G = winograd_transform_matrices(tile_size, K, kernel.dtype) - r_kh = tvm.reduce_axis((0, K), name='r_kh') - r_kw = tvm.reduce_axis((0, K), name='r_kw') - return tvm.compute(shape, lambda eps, nu, co, ci: - tvm.sum(kernel[co][ci][r_kh][r_kw] * - G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), name='transform_weight') + r_kh = te.reduce_axis((0, K), name='r_kh') + r_kw = te.reduce_axis((0, K), name='r_kw') + return te.compute(shape, lambda eps, nu, co, ci: + te.sum(kernel[co][ci][r_kh][r_kw] * + G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), name='transform_weight') def conv2d_winograd_nnpack_weight_transform(kernel, convolution_algorithm, out_dtype): @@ -631,7 +632,7 @@ def conv2d_winograd_nnpack_weight_transform(kernel, convolution_algorithm, out_d The convolution algorithm for Winograd NNPACK. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [alpha, alpha, CO, CI] """ # pylint: disable=import-outside-toplevel @@ -645,10 +646,10 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel // groups, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -670,7 +671,7 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if out_dtype is None: @@ -705,12 +706,12 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = tvm.reduce_axis((0, in_channel // groups), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - return tvm.compute( + rc = te.reduce_axis((0, in_channel // groups), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + return te.compute( (batch, out_channel, out_height, out_width), - lambda nn, ff, yy, xx: tvm.sum( + lambda nn, ff, yy, xx: te.sum( temp[nn, ff // (num_filter//groups) * (in_channel//groups) + rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * @@ -723,7 +724,7 @@ def unpack_NCHWc_to_nchw(packed_out, out_dtype): Parameters ----------- - packed_out : tvm.Tensor + packed_out : tvm.te.Tensor The output tensor of conv2d_NCHWc. out_dtype : str @@ -731,20 +732,20 @@ def unpack_NCHWc_to_nchw(packed_out, out_dtype): Returns ------- - unpacked_out : tvm.Tensor + unpacked_out : tvm.te.Tensor The unpacked output tensor in NCHW layout. """ n, oc_chunk, oh, ow, oc_bn = get_const_tuple(packed_out.shape) - idxmod = tvm.indexmod - idxdiv = tvm.indexdiv + idxmod = tvm.tir.indexmod + idxdiv = tvm.tir.indexdiv oshape = (n, oc_chunk * oc_bn, oh, ow) unpacked_out = \ - tvm.compute(oshape, - lambda n, c, h, w: - packed_out[n, idxdiv(c, oc_bn), h, w, idxmod(c, oc_bn)] - .astype(out_dtype), - name='output_unpack', - tag=tag.INJECTIVE+",unpack_nchwc") + te.compute(oshape, + lambda n, c, h, w: + packed_out[n, idxdiv(c, oc_bn), h, w, idxmod(c, oc_bn)] + .astype(out_dtype), + name='output_unpack', + tag=tag.INJECTIVE+",unpack_nchwc") return unpacked_out diff --git a/topi/python/topi/nn/conv2d_transpose.py b/topi/python/topi/nn/conv2d_transpose.py index db132fc81f132..3563112ed2446 100644 --- a/topi/python/topi/nn/conv2d_transpose.py +++ b/topi/python/topi/nn/conv2d_transpose.py @@ -16,8 +16,8 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Transposed 2D convolution operators (sometimes called Deconvolution).""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import relay from .dilate import dilate from .pad import pad @@ -30,10 +30,10 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, num_filter, filter_height, filter_width] strides : tuple of two ints @@ -47,7 +47,7 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype) @@ -72,9 +72,9 @@ def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype): [0, 0, bpad_bottom, bpad_right], \ name='data_pad') # transform kernel layout from IOHW to OIHW, and rotate kernel by 180 degrees - kernel_transform = tvm.compute((out_c, in_c, filter_h, filter_w), \ - lambda o, i, h, w: kernel[i][o][filter_h-1-h][filter_w-1-w], \ - name='kernel_transform') + kernel_transform = te.compute((out_c, in_c, filter_h, filter_w), \ + lambda o, i, h, w: kernel[i][o][filter_h-1-h][filter_w-1-w], \ + name='kernel_transform') return data_pad, kernel_transform @@ -90,13 +90,13 @@ def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype) out_c = simplify(out_c) out_h = simplify(in_h - filter_h + 1) out_w = simplify(in_w - filter_w + 1) - dc = tvm.reduce_axis((0, in_c), name='dc') - dh = tvm.reduce_axis((0, filter_h), name='dh') - dw = tvm.reduce_axis((0, filter_w), name='dw') + dc = te.reduce_axis((0, in_c), name='dc') + dh = te.reduce_axis((0, filter_h), name='dh') + dw = te.reduce_axis((0, filter_w), name='dw') - Output = tvm.compute( + Output = te.compute( (batch, out_c, out_h, out_w), - lambda b, c, h, w: tvm.sum( + lambda b, c, h, w: te.sum( data_pad[b, dc, h+dh, w+dw].astype(out_dtype) * kernel_transform[c, dc, dh, dw].astype(out_dtype), axis=[dc, dh, dw]), tag="conv2d_transpose_nchw") diff --git a/topi/python/topi/nn/conv3d.py b/topi/python/topi/nn/conv3d.py index 88c7c6a3ed90d..d6bd6424a9477 100644 --- a/topi/python/topi/nn/conv3d.py +++ b/topi/python/topi/nn/conv3d.py @@ -17,8 +17,7 @@ # pylint: disable=invalid-name, unused-variable, too-many-locals # pylint: disable=unused-argument, redefined-builtin, no-else-return """Conv3D operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .pad import pad from .util import get_pad_tuple3d @@ -30,10 +29,10 @@ def conv3d_ncdhw(Input, Filter, stride, padding, dilation, out_dtype=None): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] stride : int or a list/tuple of three ints @@ -47,7 +46,7 @@ def conv3d_ncdhw(Input, Filter, stride, padding, dilation, out_dtype=None): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ if out_dtype is None: @@ -80,14 +79,14 @@ def conv3d_ncdhw(Input, Filter, stride, padding, dilation, out_dtype=None): pad_before = [0, 0, pad_front, pad_top, pad_left] pad_after = [0, 0, pad_back, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = tvm.reduce_axis((0, in_channel), name='rc') - rz = tvm.reduce_axis((0, kernel_d), name='rz') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + rz = te.reduce_axis((0, kernel_d), name='rz') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') - return tvm.compute( + return te.compute( (batch, out_channel, out_depth, out_height, out_width), - lambda nn, ff, zz, yy, xx: tvm.sum( + lambda nn, ff, zz, yy, xx: te.sum( temp[nn, rc, zz * stride_d + rz * dilation_d, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * Filter[ff, rc, rz, ry, rx].astype(out_dtype), @@ -99,10 +98,10 @@ def conv3d_ndhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_depth, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of three ints @@ -116,7 +115,7 @@ def conv3d_ndhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_depth, out_height, out_width, out_channel] """ assert isinstance(stride, int) or len(stride) == 3 @@ -148,13 +147,13 @@ def conv3d_ndhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): pad_before = [0, pad_front, pad_top, pad_left, 0] pad_after = [0, pad_back, pad_down, pad_right, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rd = tvm.reduce_axis((0, kernel_d), name='rd') - rh = tvm.reduce_axis((0, kernel_h), name='rh') - rw = tvm.reduce_axis((0, kernel_w), name='rw') - rc = tvm.reduce_axis((0, in_channel), name='rc') - Output = tvm.compute( + rd = te.reduce_axis((0, kernel_d), name='rd') + rh = te.reduce_axis((0, kernel_h), name='rh') + rw = te.reduce_axis((0, kernel_w), name='rw') + rc = te.reduce_axis((0, in_channel), name='rc') + Output = te.compute( (batch, out_depth, out_height, out_width, out_channel), - lambda nn, dd, hh, ww, cc: tvm.sum( + lambda nn, dd, hh, ww, cc: te.sum( PaddedInput[nn, dd * stride_d + rd * dilation_d, hh * stride_h + rh * dilation_h, ww * stride_w + rw * dilation_w, rc].astype(out_dtype) * Filter[rd, rh, rw, rc, cc].astype(out_dtype), axis=[rd, rh, rw, rc]), diff --git a/topi/python/topi/nn/deformable_conv2d.py b/topi/python/topi/nn/deformable_conv2d.py index 251f68aa8c258..9f95fd1ae790c 100644 --- a/topi/python/topi/nn/deformable_conv2d.py +++ b/topi/python/topi/nn/deformable_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, too-many-locals, too-many-arguments """Deformable Conv2D operators""" import tvm +from tvm import te from .util import get_pad_tuple from ..util import get_const_tuple @@ -30,14 +31,14 @@ def deformable_conv2d_nchw(data, offset, kernel, strides, padding, dilation, def Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - offset : tvm.Tensor + offset : tvm.te.Tensor 4-D with shape [batch, deformable_groups * filter_height * filter_width * 2, out_height, out_width]. - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints @@ -57,7 +58,7 @@ def deformable_conv2d_nchw(data, offset, kernel, strides, padding, dilation, def Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if out_dtype is None: @@ -85,30 +86,30 @@ def deformable_conv2d_nchw(data, offset, kernel, strides, padding, dilation, def dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 pad_top, pad_left, _, _ = get_pad_tuple( padding, (dilated_kernel_h, dilated_kernel_w)) - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') - zero = tvm.const(0.0, data.dtype) + zero = tvm.tir.const(0.0, data.dtype) def _bilinear(n, c, h, w): - outside = tvm.any(h < 0, w < 0, h >= in_height, w >= in_width) + outside = tvm.tir.any(h < 0, w < 0, h >= in_height, w >= in_width) val = bilinear_sample_nchw(data, (n, c, h, w), in_height - 1, in_width - 1) - return tvm.if_then_else(outside, zero, val) + return tvm.tir.if_then_else(outside, zero, val) data_deform = \ - tvm.compute((batch, in_channel, kernel_h, kernel_w, out_height, out_width), - lambda n, c, kh, kw, y, x: - _bilinear(n, c, - y * stride_h - pad_top + kh * dilation_h + - offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + - (kh * kernel_w + kw) * 2, y, x], - x * stride_w - pad_left + kw * dilation_w + - offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + - (kh * kernel_w + kw) * 2 + 1, y, x])) - return tvm.compute( + te.compute((batch, in_channel, kernel_h, kernel_w, out_height, out_width), + lambda n, c, kh, kw, y, x: + _bilinear(n, c, + y * stride_h - pad_top + kh * dilation_h + + offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + + (kh * kernel_w + kw) * 2, y, x], + x * stride_w - pad_left + kw * dilation_w + + offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + + (kh * kernel_w + kw) * 2 + 1, y, x])) + return te.compute( (batch, out_channel, out_height, out_width), - lambda n, f, y, x: tvm.sum( + lambda n, f, y, x: te.sum( data_deform[n, rc, ry, rx, y, x].astype(out_dtype) * kernel[f, rc, ry, rx].astype(out_dtype), axis=[rc, ry, rx]), tag="deformable_conv2d_nchw") diff --git a/topi/python/topi/nn/dense.py b/topi/python/topi/nn/dense.py index fe21e7417bdad..7d7ef6c23d56b 100644 --- a/topi/python/topi/nn/dense.py +++ b/topi/python/topi/nn/dense.py @@ -15,8 +15,7 @@ # specific language governing permissions and limitations # under the License. """TVM operator fully connected compute.""" -from __future__ import absolute_import -import tvm +from tvm import te from .. import tag def dense(data, weight, bias=None, out_dtype=None): @@ -24,13 +23,13 @@ def dense(data, weight, bias=None, out_dtype=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [out_dim] out_dtype : str @@ -38,7 +37,7 @@ def dense(data, weight, bias=None, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ assert len(data.shape) == 2 and len(weight.shape) == 2, \ @@ -49,13 +48,13 @@ def dense(data, weight, bias=None, out_dtype=None): out_dtype = data.dtype batch, in_dim = data.shape out_dim, _ = weight.shape - k = tvm.reduce_axis((0, in_dim), name='k') - matmul = tvm.compute((batch, out_dim), \ - lambda i, j: tvm.sum(data[i, k].astype(out_dtype) * \ - weight[j, k].astype(out_dtype), axis=k), \ - name='T_dense', tag='dense') + k = te.reduce_axis((0, in_dim), name='k') + matmul = te.compute((batch, out_dim), \ + lambda i, j: te.sum(data[i, k].astype(out_dtype) * \ + weight[j, k].astype(out_dtype), axis=k), \ + name='T_dense', tag='dense') if bias is not None: - matmul = tvm.compute((batch, out_dim), \ - lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \ - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), \ + lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \ + tag=tag.BROADCAST) return matmul diff --git a/topi/python/topi/nn/depth_to_space.py b/topi/python/topi/nn/depth_to_space.py index d847c08daf277..a9fbfea186cbc 100644 --- a/topi/python/topi/nn/depth_to_space.py +++ b/topi/python/topi/nn/depth_to_space.py @@ -18,6 +18,7 @@ """TVM operator depth_to_space compute.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag @@ -26,7 +27,7 @@ def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D tensor in either NCHW or NHWC layout. block_size : int @@ -42,17 +43,17 @@ def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor Output of shape [N, C / block_size**2, H * block_size, W * block_size] """ if layout == 'NCHW': in_n, in_c, in_h, in_w = data.shape - channel_factor = tvm.truncdiv(in_c, (block_size * block_size)) + channel_factor = tvm.tir.truncdiv(in_c, (block_size * block_size)) output_shape = [in_n, channel_factor, in_h * block_size, in_w * block_size] elif layout == 'NHWC': in_n, in_h, in_w, in_c = data.shape - channel_factor = tvm.truncdiv(in_c, (block_size * block_size)) + channel_factor = tvm.tir.truncdiv(in_c, (block_size * block_size)) output_shape = [in_n, in_h * block_size, in_w * block_size, channel_factor] else: @@ -66,10 +67,10 @@ def _get_indices(*indices): return n, c, y, x def _get_pixel(n, c, y, x): - block_x = tvm.truncdiv(x, block_size) - block_y = tvm.truncdiv(y, block_size) - idx_x = tvm.truncmod(x, block_size) - idx_y = tvm.truncmod(y, block_size) + block_x = tvm.tir.truncdiv(x, block_size) + block_y = tvm.tir.truncdiv(y, block_size) + idx_x = tvm.tir.truncmod(x, block_size) + idx_y = tvm.tir.truncmod(y, block_size) if mode == "DCR": channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c else: @@ -85,4 +86,4 @@ def _compute(*indices): n, c, y, x = _get_indices(*indices) return _get_pixel(n, c, y, x) - return tvm.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE) + return te.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE) diff --git a/topi/python/topi/nn/depthwise_conv2d.py b/topi/python/topi/nn/depthwise_conv2d.py index 49aaace0f833b..32a92585c8cbe 100644 --- a/topi/python/topi/nn/depthwise_conv2d.py +++ b/topi/python/topi/nn/depthwise_conv2d.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs from collections import namedtuple import tvm +from tvm import te from .dilate import dilate from .pad import pad @@ -52,10 +53,10 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=No Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, channel_multiplier, filter_height, filter_width] stride : tuple of two ints @@ -72,7 +73,7 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=No Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ out_dtype = Input.dtype if out_dtype is None else out_dtype @@ -104,13 +105,13 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=No pad_after = [0, 0, pad_down, pad_right] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") # depthconv stage - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - di = tvm.reduce_axis((0, filter_height), name='di') - dj = tvm.reduce_axis((0, filter_width), name='dj') - Output = tvm.compute( + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + di = te.reduce_axis((0, filter_height), name='di') + dj = te.reduce_axis((0, filter_width), name='dj') + Output = te.compute( (batch, out_channel, out_height, out_width), - lambda b, c, i, j: tvm.sum( + lambda b, c, i, j: te.sum( (PaddedInput[b, idxdiv(c, channel_multiplier), i*stride_h+di*dilation_h, j*stride_w+dj*dilation_w].astype(out_dtype) * Filter[idxdiv(c, channel_multiplier), @@ -125,10 +126,10 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype=No Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, channel_multiplier] stride : tuple of two ints @@ -145,7 +146,7 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype=No Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ out_dtype = Input.dtype if out_dtype is None else out_dtype @@ -177,14 +178,14 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype=No pad_after = [0, pad_down, pad_right, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") # depthconv stage - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - di = tvm.reduce_axis((0, filter_height), name='di') - dj = tvm.reduce_axis((0, filter_width), name='dj') - Output = tvm.compute( + di = te.reduce_axis((0, filter_height), name='di') + dj = te.reduce_axis((0, filter_width), name='dj') + Output = te.compute( (batch, out_height, out_width, out_channel), - lambda b, i, j, c: tvm.sum( + lambda b, i, j, c: te.sum( (PaddedInput[b, i*stride_h + di*dilation_h, j*stride_w + dj*dilation_w, idxdiv(c, channel_multiplier)].astype(out_dtype) * Filter[di, dj, @@ -199,10 +200,10 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid Parameters ---------- - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, channel_multiplier] - Out_grad : tvm.Tensor + Out_grad : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] stride : tuple of two ints @@ -213,7 +214,7 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] """ batch, in_h, in_w, in_c = ishape @@ -235,19 +236,19 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid bpad_right = (filter_w - 1 - fpad_right) + (stride_w - 1) padded_out_grad = pad(dilated_out_grad, \ - [0, bpad_top, bpad_left, 0], \ - [0, bpad_bottom, bpad_right, 0], \ - name='padded_out_grad') + [0, bpad_top, bpad_left, 0], \ + [0, bpad_bottom, bpad_right, 0], \ + name='padded_out_grad') - dh = tvm.reduce_axis((0, filter_h), name='dh') - dw = tvm.reduce_axis((0, filter_w), name='dw') - dc = tvm.reduce_axis((0, channel_multiplier), name='dc') + dh = te.reduce_axis((0, filter_h), name='dh') + dw = te.reduce_axis((0, filter_w), name='dw') + dc = te.reduce_axis((0, channel_multiplier), name='dc') - In_grad = tvm.compute( + In_grad = te.compute( (batch, in_h, in_w, in_c), - lambda b, h, w, c: tvm.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \ - Filter[filter_h-1-dh, filter_w-1-dw, c, dc], - axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc') + lambda b, h, w, c: te.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \ + Filter[filter_h-1-dh, filter_w-1-dw, c, dc], + axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc') return In_grad @@ -257,10 +258,10 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - Out_grad : tvm.Tensor + Out_grad : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] stride : tuple of two ints @@ -271,7 +272,7 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, channel_multiplier] """ batch, out_h, out_w, out_c = oshape @@ -285,19 +286,19 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (filter_h, filter_w)) padded_in = pad(Input, \ - [0, pad_top, pad_left, 0], \ - [0, pad_bottom, pad_right, 0], \ - name='padded_in') + [0, pad_top, pad_left, 0], \ + [0, pad_bottom, pad_right, 0], \ + name='padded_in') - dh = tvm.reduce_axis((0, Out_grad.shape[1].value), name='dh') - dw = tvm.reduce_axis((0, Out_grad.shape[2].value), name='dw') - db = tvm.reduce_axis((0, batch), name='db') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + dh = te.reduce_axis((0, Out_grad.shape[1].value), name='dh') + dw = te.reduce_axis((0, Out_grad.shape[2].value), name='dw') + db = te.reduce_axis((0, batch), name='db') + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - Weight_grad = tvm.compute( + Weight_grad = te.compute( (filter_h, filter_w, in_c, channel_multiplier), - lambda fh, fw, c, m: tvm.sum( + lambda fh, fw, c, m: te.sum( Out_grad[db, dh, dw, c*channel_multiplier+idxmod(m, channel_multiplier)] * padded_in[db, fh+dh*stride_h, fw+dw*stride_w, c], axis=[db, dh, dw]), tag='depthwise_conv2d_backward_weight_nhwc') @@ -311,10 +312,10 @@ def depthwise_conv2d_NCHWc(Input, Filter, stride, padding, dilation, Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 6-D with shape [out_channel_chunk, 1, filter_height, filter_width, 1, out_channel_block] In NCHWc depthwise convolution, we group kernel's in_channel and channel_multiplier together then do the tiling. @@ -339,7 +340,7 @@ def depthwise_conv2d_NCHWc(Input, Filter, stride, padding, dilation, Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ raise ValueError("missing register for topi.nn.depthwise_conv2d_NCHWc") diff --git a/topi/python/topi/nn/dilate.py b/topi/python/topi/nn/dilate.py index d95245395a116..eab612df9ca14 100644 --- a/topi/python/topi/nn/dilate.py +++ b/topi/python/topi/nn/dilate.py @@ -16,18 +16,18 @@ # under the License. # pylint: disable=invalid-name """Dilation operators""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import util from .. import tag -@tvm.tag_scope(tag=tag.INJECTIVE+",dilate") +@te.tag_scope(tag=tag.INJECTIVE+",dilate") def dilate(data, strides, name="DilatedInput"): """Dilate data with zeros. Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D, can be any layout. strides : list / tuple of n ints @@ -38,7 +38,7 @@ def dilate(data, strides, name="DilatedInput"): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor n-D, the same layout as data. """ n = len(data.shape) @@ -52,8 +52,8 @@ def dilate(data, strides, name="DilatedInput"): def _dilate(*indices): not_zero = [] index_tuple = [] - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod for i in range(n): if not util.equal_const_int(strides[i], 1): index_tuple.append(idxdiv(indices[i], strides[i])) @@ -61,8 +61,9 @@ def _dilate(*indices): else: index_tuple.append(indices[i]) if not_zero: - not_zero = tvm.all(*not_zero) - return tvm.if_then_else(not_zero, data(*index_tuple), tvm.const(0.0, data.dtype)) + not_zero = tvm.tir.all(*not_zero) + return tvm.tir.if_then_else( + not_zero, data(*index_tuple), tvm.tir.const(0.0, data.dtype)) return data(*index_tuple) - return tvm.compute(out_shape, _dilate, name=name) + return te.compute(out_shape, _dilate, name=name) diff --git a/topi/python/topi/nn/elemwise.py b/topi/python/topi/nn/elemwise.py index e9f301942aa38..292dbca71b4d6 100644 --- a/topi/python/topi/nn/elemwise.py +++ b/topi/python/topi/nn/elemwise.py @@ -17,6 +17,7 @@ """Elementwise operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from ..util import get_const_int @@ -26,15 +27,15 @@ def relu(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.max(x(*i), tvm.const(0, x.dtype))) + return te.compute(x.shape, lambda *i: tvm.te.max(x(*i), tvm.tir.const(0, x.dtype))) @tvm.tag_scope(tag=tag.ELEMWISE) @@ -43,7 +44,7 @@ def leaky_relu(x, alpha): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. alpha : float @@ -51,14 +52,14 @@ def leaky_relu(x, alpha): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ def _compute(*indices): value = x(*indices) - calpha = tvm.const(alpha, value.dtype) - return tvm.expr.Select(value > 0, value, value * calpha) - return tvm.compute(x.shape, _compute) + calpha = tvm.tir.const(alpha, value.dtype) + return tvm.tir.Select(value > 0, value, value * calpha) + return te.compute(x.shape, _compute) @tvm.tag_scope(tag=tag.BROADCAST) def prelu(x, slope, axis=1): @@ -68,17 +69,17 @@ def prelu(x, slope, axis=1): where :math:`*` is an elementwise multiplication for each sample in the batch. Arguments: - x : tvm.Tensor + x : tvm.te.Tensor Input argument. - slope : tvm.Tensor + slope : tvm.te.Tensor Channelised slope tensor for prelu axis : int The axis where the channel data needs to be applied Returns: - y : tvm.Tensor + y : tvm.te.Tensor The result. Links: @@ -91,5 +92,5 @@ def prelu(x, slope, axis=1): def _compute_channelwise(*indices): xval = x(*indices) - return tvm.expr.Select(xval > 0, xval, xval * slope(indices[axis])) - return tvm.compute(x.shape, _compute_channelwise) + return tvm.tir.Select(xval > 0, xval, xval * slope(indices[axis])) + return te.compute(x.shape, _compute_channelwise) diff --git a/topi/python/topi/nn/fifo_buffer.py b/topi/python/topi/nn/fifo_buffer.py index 946b8d1e31807..32008855bd129 100644 --- a/topi/python/topi/nn/fifo_buffer.py +++ b/topi/python/topi/nn/fifo_buffer.py @@ -18,6 +18,7 @@ """FIFO buffer op""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from ..transform import concatenate, strided_slice @@ -42,16 +43,16 @@ def fifo_buffer(data, buffer, axis): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input data - buffer : tvm.Tensor + buffer : tvm.te.Tensor Previous value of the FIFO buffer axis : int Specify which axis should be used for buffering Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor Updated value for the buffer """ assert len(data.shape) == len(buffer.shape), \ @@ -70,80 +71,80 @@ def fifo_buffer(data, buffer, axis): # Explicitly write out formula up to 4D, and then use concat+slice combo for 5D and higher if len(buffer.shape) == 1: - return tvm.compute(buffer.shape, - lambda i: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size], - data[i - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size], + data[i - buflen + data_size]), + name='new_buffer') if len(buffer.shape) == 2: if axis == 0: - return tvm.compute(buffer.shape, - lambda i, j: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size, j], - data[i - buflen + data_size, j]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size, j], + data[i - buflen + data_size, j]), + name='new_buffer') if axis == 1: - return tvm.compute(buffer.shape, - lambda i, j: - tvm.if_then_else(j < buflen - data_size, - buffer[i, j + data_size], - data[i, j - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j: + tvm.tir.if_then_else(j < buflen - data_size, + buffer[i, j + data_size], + data[i, j - buflen + data_size]), + name='new_buffer') assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape)) elif len(buffer.shape) == 3: if axis == 0: - return tvm.compute(buffer.shape, - lambda i, j, k: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size, j, k], - data[i - buflen + data_size, j, k]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size, j, k], + data[i - buflen + data_size, j, k]), + name='new_buffer') if axis == 1: - return tvm.compute(buffer.shape, - lambda i, j, k: - tvm.if_then_else(j < buflen - data_size, - buffer[i, j + data_size, k], - data[i, j - buflen + data_size, k]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k: + tvm.tir.if_then_else(j < buflen - data_size, + buffer[i, j + data_size, k], + data[i, j - buflen + data_size, k]), + name='new_buffer') if axis == 2: - return tvm.compute(buffer.shape, - lambda i, j, k: - tvm.if_then_else(k < buflen - data_size, - buffer[i, j, k + data_size], - data[i, j, k - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k: + tvm.tir.if_then_else(k < buflen - data_size, + buffer[i, j, k + data_size], + data[i, j, k - buflen + data_size]), + name='new_buffer') assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape)) elif len(buffer.shape) == 4: if axis == 0: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size, j, k, l], - data[i - buflen + data_size, j, k, l]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size, j, k, l], + data[i - buflen + data_size, j, k, l]), + name='new_buffer') if axis == 1: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(j < buflen - data_size, - buffer[i, j + data_size, k, l], - data[i, j - buflen + data_size, k, l]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(j < buflen - data_size, + buffer[i, j + data_size, k, l], + data[i, j - buflen + data_size, k, l]), + name='new_buffer') if axis == 2: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(k < buflen - data_size, - buffer[i, j, k + data_size, l], - data[i, j, k - buflen + data_size, l]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(k < buflen - data_size, + buffer[i, j, k + data_size, l], + data[i, j, k - buflen + data_size, l]), + name='new_buffer') if axis == 3: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(l < buflen - data_size, - buffer[i, j, k, l + data_size], - data[i, j, k, l - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(l < buflen - data_size, + buffer[i, j, k, l + data_size], + data[i, j, k, l - buflen + data_size]), + name='new_buffer') assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape)) else: # Implement FIFO buffer as combination of concat and slice diff --git a/topi/python/topi/nn/flatten.py b/topi/python/topi/nn/flatten.py index dba9b7cd60050..63636b98c138e 100644 --- a/topi/python/topi/nn/flatten.py +++ b/topi/python/topi/nn/flatten.py @@ -17,6 +17,7 @@ """TVM operator flatten compute.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag @tvm.tag_scope(tag=tag.INJECTIVE) @@ -25,12 +26,12 @@ def flatten(data): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor Input array. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D array with collapsed higher dimensions. """ ishape = data.shape @@ -38,8 +39,8 @@ def flatten(data): for i in range(1, len(ishape)): dim = dim * ishape[i] oshape = [ishape[0], dim] - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod def unwrap(idx, shape): index = [] @@ -48,4 +49,4 @@ def unwrap(idx, shape): idx = idxdiv(idx, s) return list(reversed(index)) - return tvm.compute(oshape, lambda i, j: data(i, *unwrap(j, ishape[1:]))) + return te.compute(oshape, lambda i, j: data(i, *unwrap(j, ishape[1:]))) diff --git a/topi/python/topi/nn/local_response_norm.py b/topi/python/topi/nn/local_response_norm.py index 1b41c7dbfb5e3..35c76d2b0a6ae 100644 --- a/topi/python/topi/nn/local_response_norm.py +++ b/topi/python/topi/nn/local_response_norm.py @@ -31,7 +31,7 @@ def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] size : int @@ -52,7 +52,7 @@ def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D output with same shape """ return cpp.nn.lrn(data, size, axis, alpha, beta, bias) diff --git a/topi/python/topi/nn/mapping.py b/topi/python/topi/nn/mapping.py index b2222bdeb87dd..101bbd80087c2 100644 --- a/topi/python/topi/nn/mapping.py +++ b/topi/python/topi/nn/mapping.py @@ -18,6 +18,7 @@ """Operators of one-to-one-mapping on the first input""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag @tvm.tag_scope(tag=tag.BROADCAST) @@ -26,21 +27,21 @@ def scale_shift_nchw(Input, Scale, Shift): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor Input tensor, layout is NCHW - Scale : tvm.Tensor + Scale : tvm.te.Tensor Scale tensor, 1-D of size channel number - Shift : tvm.Tensor + Shift : tvm.te.Tensor Shift tensor, 1-D of size channel number Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor Output tensor, layout is NCHW """ - return tvm.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift') + return te.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift') @tvm.tag_scope(tag=tag.BROADCAST) @@ -49,18 +50,18 @@ def scale_shift_nhwc(Input, Scale, Shift): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor Input tensor, layout is NHWC - Scale : tvm.Tensor + Scale : tvm.te.Tensor Scale tensor, 1-D of size channel number - Shift : tvm.Tensor + Shift : tvm.te.Tensor Shift tensor, 1-D of size channel number Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor Output tensor, layout is NHWC """ - return tvm.compute(Input.shape, lambda b, i, j, c: Input[b, i, j, c] * Scale[c] + Shift[c], name='ScaleShift') + return te.compute(Input.shape, lambda b, i, j, c: Input[b, i, j, c] * Scale[c] + Shift[c], name='ScaleShift') diff --git a/topi/python/topi/nn/pad.py b/topi/python/topi/nn/pad.py index 13f8e720288b6..6617c3aa8237a 100644 --- a/topi/python/topi/nn/pad.py +++ b/topi/python/topi/nn/pad.py @@ -17,6 +17,7 @@ """Pad the data by constant value """ from __future__ import absolute_import as _abs import tvm +from tvm import te from ..util import equal_const_int from .. import tag @@ -26,7 +27,7 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D input, can be any layout. pad_before : list / tuple of n ints @@ -43,7 +44,7 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor n-D, the same layout as Input. """ n = len(data.shape) @@ -58,7 +59,7 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): tvm.ir_pass.Simplify( (data.shape[i] + pad_before[i] + pad_after[i])) for i in range(n)) pad_value = (pad_value if isinstance(pad_value, tvm.expr.PrimExpr) - else tvm.const(pad_value, data.dtype)) + else tvm.tir.const(pad_value, data.dtype)) def _pad(*indices): not_zero = [] index_tuple = [] @@ -70,10 +71,10 @@ def _pad(*indices): not_zero.append(indices[i] >= pad_before[i]) not_zero.append(indices[i] < data.shape[i] + pad_before[i]) if not_zero: - not_zero = tvm.all(*not_zero) - return tvm.if_then_else(not_zero, data(*index_tuple), pad_value) + not_zero = tvm.tir.all(*not_zero) + return tvm.tir.if_then_else(not_zero, data(*index_tuple), pad_value) return data(*index_tuple) - return tvm.compute(out_shape, _pad, name=name) + return te.compute(out_shape, _pad, name=name) @tvm.tag_scope(tag=tag.INJECTIVE + ",pad") @@ -86,7 +87,7 @@ def mirror_pad(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D input, can be any layout. pad_before : list / tuple of n ints @@ -103,7 +104,7 @@ def mirror_pad(data, Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor n-D, the same layout as Input. """ n = len(data.shape) @@ -136,10 +137,10 @@ def _pad(*indices): below.append(indices[i] < pad_before[i]) mapped_tuple = [] for i, axis in enumerate(index_tuple): - mapped_axis = tvm.if_then_else(below[i], -axis - mode, axis) - mapped_axis = tvm.if_then_else( + mapped_axis = tvm.tir.if_then_else(below[i], -axis - mode, axis) + mapped_axis = tvm.tir.if_then_else( above[i], (2 * (data.shape[i] - 1)) - axis + mode, mapped_axis) mapped_tuple.append(mapped_axis) return data(*mapped_tuple) - return tvm.compute(out_shape, _pad, name=name) + return te.compute(out_shape, _pad, name=name) diff --git a/topi/python/topi/nn/pooling.py b/topi/python/topi/nn/pooling.py index 5fd2dedf9619e..e3d57ce8ed780 100644 --- a/topi/python/topi/nn/pooling.py +++ b/topi/python/topi/nn/pooling.py @@ -34,7 +34,7 @@ def global_pool(data, pool_type, layout="NCHW"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout pool_type : str @@ -51,7 +51,7 @@ def global_pool(data, pool_type, layout="NCHW"): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in same layout with height and width dimension size of 1. e.g., for NCHW, the output shape will be [batch, channel, 1, 1] """ @@ -76,7 +76,7 @@ def pool(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of two ints @@ -108,7 +108,7 @@ def pool(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.pool(data, kernel, stride, padding, @@ -133,10 +133,10 @@ def pool_grad(grads, Parameters ---------- - grads : tvm.Tensor + grads : tvm.te.Tensor n-D with shape of layout - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of two ints @@ -168,7 +168,7 @@ def pool_grad(grads, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.pool_grad(grads, data, kernel, @@ -192,7 +192,7 @@ def adaptive_pool(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout output_size : tuple of int @@ -212,7 +212,7 @@ def adaptive_pool(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.adaptive_pool(data, output_size, POOL_TYPE_CODE[pool_type], layout) @@ -236,7 +236,7 @@ def pool1d(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of one int or int @@ -268,7 +268,7 @@ def pool1d(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ if isinstance(kernel, int): @@ -297,7 +297,7 @@ def pool3d(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of three ints @@ -329,7 +329,7 @@ def pool3d(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.pool3d(data, kernel, stride, padding, diff --git a/topi/python/topi/nn/softmax.py b/topi/python/topi/nn/softmax.py index 16ffd797aafc2..8765a3558c366 100644 --- a/topi/python/topi/nn/softmax.py +++ b/topi/python/topi/nn/softmax.py @@ -18,6 +18,7 @@ """TVM operator for softmax and log_softmax compute.""" from __future__ import absolute_import import tvm +from tvm import te @tvm.tag_scope(tag='softmax_output') def softmax(x, axis=-1): @@ -25,7 +26,7 @@ def softmax(x, axis=-1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor can be any dimension axis : int @@ -33,7 +34,7 @@ def softmax(x, axis=-1): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor output shape is the same as input """ shape = x.shape @@ -42,8 +43,8 @@ def softmax(x, axis=-1): if axis >= len(shape): ValueError("axis parameter should be less than input dim") - k1 = tvm.reduce_axis((0, shape[axis]), name='k') - k2 = tvm.reduce_axis((0, shape[axis]), name='k') + k1 = te.reduce_axis((0, shape[axis]), name='k') + k2 = te.reduce_axis((0, shape[axis]), name='k') def insert_reduce_index(indices, reduce_index): return indices[:axis] + (reduce_index,) + indices[axis:] @@ -53,28 +54,28 @@ def get_non_reduce_indices(indices): def _compute_max(*indices): eval_range = insert_reduce_index(indices, k1) - return tvm.max(x[eval_range], axis=k1) + return tvm.te.max(x[eval_range], axis=k1) def _compute_exp(max_elem, *indices): non_reduce_indices = get_non_reduce_indices(indices) - return tvm.exp(x[indices] - max_elem[non_reduce_indices]) + return te.exp(x[indices] - max_elem[non_reduce_indices]) def _compute_expsum(exp, *indices): eval_range = insert_reduce_index(indices, k2) - return tvm.sum(exp[eval_range], axis=k2) + return te.sum(exp[eval_range], axis=k2) def _normalize(exp, expsum, *indices): non_reduce_indices = get_non_reduce_indices(indices) return exp[indices] / expsum[non_reduce_indices] reduced_shape = tuple([dim for (i, dim) in enumerate(shape) if i != axis]) - max_elem = tvm.compute(reduced_shape, _compute_max, name='T_softmax_maxelem') - exp = tvm.compute(shape, lambda *indices: _compute_exp(max_elem, *indices), - name='T_softmax_exp') - expsum = tvm.compute(reduced_shape, lambda *indices: _compute_expsum(exp, *indices), - name='T_softmax_expsum') - return tvm.compute(shape, lambda *indices: _normalize(exp, expsum, *indices), - name='T_softmax_norm', attrs={"axis" : axis}) + max_elem = te.compute(reduced_shape, _compute_max, name='T_softmax_maxelem') + exp = te.compute(shape, lambda *indices: _compute_exp(max_elem, *indices), + name='T_softmax_exp') + expsum = te.compute(reduced_shape, lambda *indices: _compute_expsum(exp, *indices), + name='T_softmax_expsum') + return te.compute(shape, lambda *indices: _normalize(exp, expsum, *indices), + name='T_softmax_norm', attrs={"axis" : axis}) @tvm.tag_scope(tag='log_softmax_output') @@ -83,21 +84,21 @@ def log_softmax(x): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D input data Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D output with same shape """ assert len(x.shape) == 2, "only support 2-dim log softmax" m, n = x.shape - k = tvm.reduce_axis((0, n), name='k') - max_elem = tvm.compute((m, ), lambda i: tvm.max(x[i, k], axis=k)) - k = tvm.reduce_axis((0, n), name='k') - expsum = tvm.compute( - (m, ), lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k)) - return tvm.compute( - x.shape, lambda i, j: x[i, j] - max_elem[i] - tvm.log(expsum[i])) + k = te.reduce_axis((0, n), name='k') + max_elem = te.compute((m, ), lambda i: tvm.te.max(x[i, k], axis=k)) + k = te.reduce_axis((0, n), name='k') + expsum = te.compute( + (m, ), lambda i: te.sum(te.exp(x[i, k] - max_elem[i]), axis=k)) + return te.compute( + x.shape, lambda i, j: x[i, j] - max_elem[i] - te.log(expsum[i])) diff --git a/topi/python/topi/nn/space_to_depth.py b/topi/python/topi/nn/space_to_depth.py index 6ed7cd64a4489..b90bd118287ba 100644 --- a/topi/python/topi/nn/space_to_depth.py +++ b/topi/python/topi/nn/space_to_depth.py @@ -18,6 +18,7 @@ """TVM operator space_to_depth compute.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag @@ -26,7 +27,7 @@ def space_to_depth(data, block_size, layout='NCHW'): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D tensor in either NCHW or NHWC layout. block_size : int @@ -37,17 +38,17 @@ def space_to_depth(data, block_size, layout='NCHW'): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor Output of shape [N, C * block_size**2, H / block_size, W / block_size] """ if layout == 'NCHW': in_n, in_c, in_h, in_w = data.shape output_shape = [in_n, in_c * block_size * block_size, - tvm.truncdiv(in_h, block_size), tvm.truncdiv(in_w, block_size)] + tvm.tir.truncdiv(in_h, block_size), tvm.tir.truncdiv(in_w, block_size)] elif layout == 'NHWC': in_n, in_h, in_w, in_c = data.shape - output_shape = [in_n, tvm.truncdiv(in_h, block_size), tvm.truncdiv( + output_shape = [in_n, tvm.tir.truncdiv(in_h, block_size), tvm.tir.truncdiv( in_w, block_size), in_c * block_size * block_size] else: raise ValueError("Only NCHW and NHWC layouts are currently supported.") @@ -60,10 +61,10 @@ def _get_indices(*indices): return n, c, y, x def _get_pixel(n, c, y, x): - block_offset = tvm.truncdiv(c, in_c) - channel_idx = tvm.truncmod(c, in_c) - x_idx = tvm.truncmod(block_offset, block_size) - y_idx = tvm.truncdiv(block_offset, block_size) + block_offset = tvm.tir.truncdiv(c, in_c) + channel_idx = tvm.tir.truncmod(c, in_c) + x_idx = tvm.tir.truncmod(block_offset, block_size) + y_idx = tvm.tir.truncdiv(block_offset, block_size) if layout == 'NCHW': output = data(n, channel_idx, y_idx + @@ -77,4 +78,4 @@ def _compute(*indices): n, c, y, x = _get_indices(*indices) return _get_pixel(n, c, y, x) - return tvm.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE) + return te.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE) diff --git a/topi/python/topi/nn/sparse.py b/topi/python/topi/nn/sparse.py index 6974ff4a13abd..c4f686e08d7df 100644 --- a/topi/python/topi/nn/sparse.py +++ b/topi/python/topi/nn/sparse.py @@ -18,6 +18,7 @@ """Sparse operators""" from __future__ import absolute_import import tvm +from tvm import te from ..util import get_const_tuple @@ -29,24 +30,24 @@ def sparse_dense(data, weight_data, weight_indices, weight_indptr): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor 2-D with shape [M, K], float32 - weight_data : tvm.Tensor + weight_data : tvm.te.Tensor 1-D with shape [nnz] (CSR) or 3-D with shape [num_blocks, bs_r, bs_c] (BSR) - weight_indices : tvm.Tensor + weight_indices : tvm.te.Tensor 1-D with shape [nnz] (CSR) or 1-D with shape [num_blocks] (BSR) - weight_indptr : tvm.Tensor + weight_indptr : tvm.te.Tensor 1-D with shape [N + 1] (CSR) or 1-D with shape [(N + 1) // bs_r] (BSR) Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [M, N] """ assert len(weight_data.shape) in (1, 3) @@ -66,12 +67,12 @@ def f(i, row): row_start = weight_indptr[row] row_end = weight_indptr[row + 1] row_elems = row_end - row_start - elem_idx = tvm.reduce_axis((0, row_elems), name="elem_idx") + elem_idx = te.reduce_axis((0, row_elems), name="elem_idx") elem = row_start + elem_idx a_val = weight_data[elem] weight_val = data[i, weight_indices[elem]] - return tvm.sum(a_val * weight_val, axis=elem_idx) - return tvm.compute(oshape, f, tag="sparse_dense_csrmm") + return te.sum(a_val * weight_val, axis=elem_idx) + return te.compute(oshape, f, tag="sparse_dense_csrmm") def _sparse_dense_bsrmm(data, weight_data, weight_indices, weight_indptr): @@ -84,22 +85,22 @@ def _compute_block(i, nb_j, j): row_start = weight_indptr[nb_j] row_end = weight_indptr[nb_j + 1] row_elems = row_end - row_start - elem_idx = tvm.reduce_axis( + elem_idx = te.reduce_axis( (0, row_elems), name="elem_idx") block_offset = row_start + elem_idx - c = tvm.reduce_axis((0, bs_c), name="c") + c = te.reduce_axis((0, bs_c), name="c") block_j = weight_indices[block_offset] block_ij_val = weight_data[block_offset][j][c] x_val = data[i, bs_c * block_j + c] - return tvm.sum(block_ij_val * x_val, axis=[elem_idx, c]) + return te.sum(block_ij_val * x_val, axis=[elem_idx, c]) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod - bsrmm_block = tvm.compute( + bsrmm_block = te.compute( (m, num_blocks, bs_r), _compute_block, tag="sparse_dense_bsrmm_block") - return tvm.compute( + return te.compute( (m, num_blocks * bs_r), lambda m, n: bsrmm_block[m, idxd(n, bs_r), idxm(n, bs_r)], tag="sparse_dense_bsrmm") @@ -113,24 +114,24 @@ def sparse_transpose(sparse_data, sparse_indices, sparse_indptr): Parameters ---------- - sparse_data : tvm.Tensor + sparse_data : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'float32' - sparse_indices : tvm.Tensor + sparse_indices : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'int32' - sparse_indptr : tvm.Tensor + sparse_indptr : tvm.te.Tensor 1-D with shape [n+1], dtype of 'int32' Returns ------- - out_data : tvm.Tensor + out_data : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'float32' - out_indices : tvm.Tensor + out_indices : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'int32' - out_indptr : tvm.Tensor + out_indptr : tvm.te.Tensor 1-D with shape [n+1], dtype of 'int32' """ assert len(sparse_data.shape) == 1, "error in data dimension" @@ -143,7 +144,7 @@ def sparse_transpose(sparse_data, sparse_indices, sparse_indptr): # TODO: Add BSR transpose support - output_data, output_indices, output_indptr = tvm.extern( + output_data, output_indices, output_indptr = te.extern( shape=output_shape, inputs=[sparse_data, sparse_indices, sparse_indptr], fcompute=lambda ins, outs: diff --git a/topi/python/topi/nn/upsampling.py b/topi/python/topi/nn/upsampling.py index c816bbb3c04ed..008e52e337ae5 100644 --- a/topi/python/topi/nn/upsampling.py +++ b/topi/python/topi/nn/upsampling.py @@ -15,9 +15,8 @@ # specific language governing permissions and limitations # under the License. """TVM operator upsampling compute.""" -from __future__ import absolute_import import topi -import tvm +from tvm import te from ..util import simplify @@ -28,7 +27,7 @@ def upsampling(data, scale_h, scale_w, layout="NCHW", method='nearest_neighbor', Parameters ---------- - inputs : tvm.Tensor + inputs : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -47,17 +46,17 @@ def upsampling(data, scale_h, scale_w, layout="NCHW", method='nearest_neighbor', Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, channel, in_height*scale_h, in_width*scale_w] or [batch, in_height*scale, in_width*scale, channel] """ base_layout = layout[0:4] if base_layout == "NCHW": - out_shape = (simplify(topi.cast(tvm.round(data.shape[2] * scale_h), data.shape[2].dtype)), - simplify(topi.cast(tvm.round(data.shape[3] * scale_w), data.shape[3].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[2] * scale_h), data.shape[2].dtype)), + simplify(topi.cast(te.round(data.shape[3] * scale_w), data.shape[3].dtype))) elif layout == "NHWC": - out_shape = (simplify(topi.cast(tvm.round(data.shape[1] * scale_h), data.shape[1].dtype)), - simplify(topi.cast(tvm.round(data.shape[2] * scale_w), data.shape[2].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[1] * scale_h), data.shape[1].dtype)), + simplify(topi.cast(te.round(data.shape[2] * scale_w), data.shape[2].dtype))) else: raise ValueError("not support this layout {} yet".format(layout)) @@ -73,7 +72,7 @@ def upsampling3d(data, scale_d, scale_h, scale_w, layout="NCDHW", method='neares Parameters ---------- - inputs : tvm.Tensor + inputs : tvm.te.Tensor inputs is a 5-D tensor with shape [batch, channel, in_depth, in_height, in_width] or [batch, in_depth, in_height, in_width, channel] @@ -101,19 +100,19 @@ def upsampling3d(data, scale_d, scale_h, scale_w, layout="NCDHW", method='neares Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, channel, in_depth*scale, in_height*scale, in_width*scale] or [batch, in_depth*scale, in_height*scale, in_width*scale, channel] """ base_layout = layout[0:5] if base_layout == "NCDHW": - out_shape = (simplify(topi.cast(tvm.round(data.shape[2] * scale_d), data.shape[2].dtype)), - simplify(topi.cast(tvm.round(data.shape[3] * scale_h), data.shape[3].dtype)), - simplify(topi.cast(tvm.round(data.shape[4] * scale_w), data.shape[4].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[2] * scale_d), data.shape[2].dtype)), + simplify(topi.cast(te.round(data.shape[3] * scale_h), data.shape[3].dtype)), + simplify(topi.cast(te.round(data.shape[4] * scale_w), data.shape[4].dtype))) elif layout == "NDHWC": - out_shape = (simplify(topi.cast(tvm.round(data.shape[1] * scale_d), data.shape[1].dtype)), - simplify(topi.cast(tvm.round(data.shape[2] * scale_h), data.shape[2].dtype)), - simplify(topi.cast(tvm.round(data.shape[3] * scale_w), data.shape[3].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[1] * scale_d), data.shape[1].dtype)), + simplify(topi.cast(te.round(data.shape[2] * scale_h), data.shape[2].dtype)), + simplify(topi.cast(te.round(data.shape[3] * scale_w), data.shape[3].dtype))) else: raise ValueError("not support this layout {} yet".format(layout)) diff --git a/topi/python/topi/nn/util.py b/topi/python/topi/nn/util.py index f0cdd9a0d3c26..5a9b49e3eceb2 100644 --- a/topi/python/topi/nn/util.py +++ b/topi/python/topi/nn/util.py @@ -107,8 +107,8 @@ def infer_stride(data, kernel, out): _, _, IH, IW = data.shape _, _, KH, KW = kernel.shape _, _, OH, OW = out.shape - hstride = (IH - KH) // tvm.make.Max(OH - 1, 1) + tvm.expr.Select(OH == 1, 1, 0) - wstride = (IW - KW) // tvm.make.Max(OW - 1, 1) + tvm.expr.Select(OW == 1, 1, 0) + hstride = (IH - KH) // tvm.te.max(OH - 1, 1) + tvm.tir.Select(OH == 1, 1, 0) + wstride = (IW - KW) // tvm.te.max(OW - 1, 1) + tvm.tir.Select(OW == 1, 1, 0) return get_const_int(hstride), get_const_int(wstride) diff --git a/topi/python/topi/nn/winograd_util.py b/topi/python/topi/nn/winograd_util.py index 464b63301b409..d967431719fff 100644 --- a/topi/python/topi/nn/winograd_util.py +++ b/topi/python/topi/nn/winograd_util.py @@ -55,7 +55,7 @@ def _B_m(a, n): f = lambda j, i: reduce(mul, ((a[i]-a[k] if k != i else 1) for k in range(0, n-1)), 1) Ff = np.fromfunction(np.vectorize(f), (1, n-1), dtype=int) f = lambda i, nth: (reduce(mul, [(np.poly1d([1, -a[k]]) if k != i else 1) \ - for k in range(0, n-1)], 1)).coef[n-1-nth-1]/Ff[0, i] + for k in range(0, n-1)], 1)).coef[n-1-nth-1]/Ff[0, i] F = np.fromfunction(np.vectorize(f), (n-1, n-1), dtype=int) f = lambda i, j: -a[i]**(n-1) t = np.fromfunction(np.vectorize(f), (n-1, 1), dtype=int) diff --git a/topi/python/topi/opengl/conv2d_nchw.py b/topi/python/topi/opengl/conv2d_nchw.py index 52ed11972e6fa..c93bcc25daefe 100644 --- a/topi/python/topi/opengl/conv2d_nchw.py +++ b/topi/python/topi/opengl/conv2d_nchw.py @@ -17,6 +17,7 @@ #pylint: disable=invalid-name, no-member, too-many-locals, too-many-statements, too-many-arguments, too-many-branches, line-too-long """Schedule for conv2d_nchw with auto fusion""" import tvm +from tvm import te from .. import tag def schedule_conv2d_nchw(outs): @@ -33,8 +34,8 @@ def schedule_conv2d_nchw(outs): s: Schedule The computation schedule for conv2d_nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(conv2d, data): @@ -53,14 +54,14 @@ def traverse(OP): if OP not in s.outputs: s[OP].opengl() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, tvm.te.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule conv2d_nchw elif OP.tag.startswith('conv2d_nchw'): conv2d = OP.output(0) data = OP.input_tensors[0] kernel = OP.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule(conv2d, data) else: diff --git a/topi/python/topi/opengl/dense.py b/topi/python/topi/opengl/dense.py index db2c4a6779044..715f713d56d65 100644 --- a/topi/python/topi/opengl/dense.py +++ b/topi/python/topi/opengl/dense.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable """Schedule for dense operator""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .. import tag def schedule_dense(outs): @@ -34,8 +33,8 @@ def schedule_dense(outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(Dense): @@ -53,7 +52,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule dense elif OP.tag == 'dense': diff --git a/topi/python/topi/opengl/injective.py b/topi/python/topi/opengl/injective.py index 28dc87d1a5fb9..3d45247413d29 100644 --- a/topi/python/topi/opengl/injective.py +++ b/topi/python/topi/opengl/injective.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, """Schedule for composition of injective operator""" -import tvm +from tvm import te def schedule_injective_from_existing(sch, out): """Schedule for injective op from existing schedule. @@ -50,10 +50,10 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + te.schedule.AutoInlineInjective(s) for out in outs: schedule_injective_from_existing(s, out) return s diff --git a/topi/python/topi/opengl/pooling.py b/topi/python/topi/opengl/pooling.py index 3226422048e5f..c30389c7b72c6 100644 --- a/topi/python/topi/opengl/pooling.py +++ b/topi/python/topi/opengl/pooling.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for pooling operators""" -import tvm +from tvm import te from .. import tag def schedule_adaptive_pool(outs): @@ -33,8 +33,8 @@ def schedule_adaptive_pool(outs): s: Schedule The computation schedule for adaptive pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(Pool): @@ -52,7 +52,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].opengl() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('adaptive_pool'): @@ -84,12 +84,12 @@ def schedule_pool(outs, layout): s: Schedule The computation schedule for pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(PaddedInput, Pool): - if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): + if isinstance(PaddedInput.op, te.tensor.ComputeOp): s[PaddedInput].opengl() if Pool.op in s.outputs: Out = Pool @@ -105,7 +105,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op not in scheduled_ops and isinstance(tensor.op, tvm.tensor.ComputeOp): + if tensor.op not in scheduled_ops and isinstance(tensor.op, te.tensor.ComputeOp): traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): diff --git a/topi/python/topi/opengl/softmax.py b/topi/python/topi/opengl/softmax.py index ff218d13c2b16..e725134494fcd 100644 --- a/topi/python/topi/opengl/softmax.py +++ b/topi/python/topi/opengl/softmax.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, trailing-whitespace """Schedule for softmax operator""" -import tvm +from tvm import te def schedule_softmax(outs): """Schedule for softmax op. @@ -32,8 +32,8 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) softmax = outs[0] op_tag = softmax.op.tag diff --git a/topi/python/topi/reduction.py b/topi/python/topi/reduction.py index 7c4e059d8334e..74ba688483531 100644 --- a/topi/python/topi/reduction.py +++ b/topi/python/topi/reduction.py @@ -45,7 +45,7 @@ def sum(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -60,7 +60,7 @@ def sum(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.sum(data, axis, keepdims) @@ -70,7 +70,7 @@ def all(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm boolean tensor axis : None or int or tuple of int @@ -85,7 +85,7 @@ def all(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.all(data, axis, keepdims) @@ -95,7 +95,7 @@ def any(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm boolean tensor axis : None or int or tuple of int @@ -110,7 +110,7 @@ def any(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.any(data, axis, keepdims) @@ -120,7 +120,7 @@ def max(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -135,7 +135,7 @@ def max(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.max(data, axis, keepdims) @@ -145,7 +145,7 @@ def min(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -160,7 +160,7 @@ def min(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.min(data, axis, keepdims) @@ -170,7 +170,7 @@ def argmax(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -185,7 +185,7 @@ def argmax(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.argmax(data, axis, keepdims) @@ -195,7 +195,7 @@ def argmin(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -210,7 +210,7 @@ def argmin(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.argmin(data, axis, keepdims) @@ -220,7 +220,7 @@ def prod(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -235,6 +235,6 @@ def prod(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.prod(data, axis, keepdims) diff --git a/topi/python/topi/rocm/conv2d.py b/topi/python/topi/rocm/conv2d.py index ce56dc4e08477..713647e4ca8ae 100644 --- a/topi/python/topi/rocm/conv2d.py +++ b/topi/python/topi/rocm/conv2d.py @@ -32,10 +32,10 @@ def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, out_dtype= cfg: ConfigEntity The config for this template - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints @@ -51,7 +51,7 @@ def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, out_dtype= Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ @@ -67,7 +67,7 @@ def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, out_dtype= OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 cfg.add_flop(2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) *\ - ((KW - 1) * dilation_w + 1)) + ((KW - 1) * dilation_w + 1)) return miopen.conv2d_forward(data, kernel, diff --git a/topi/python/topi/rocm/dense.py b/topi/python/topi/rocm/dense.py index 8729a62bd677f..097120da88d65 100644 --- a/topi/python/topi/rocm/dense.py +++ b/topi/python/topi/rocm/dense.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for dense operator""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm import autotvm from tvm.contrib import rocblas from .. import generic, nn @@ -30,13 +29,13 @@ def dense(cfg, data, weight, bias=None, out_dtype=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [out_dim] out_dtype : str @@ -44,7 +43,7 @@ def dense(cfg, data, weight, bias=None, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ assert len(data.shape) == 2 and len(weight.shape) == 2, \ @@ -71,8 +70,8 @@ def schedule_dense(cfg, outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -87,11 +86,11 @@ def _callback(op): else: Out = outs[0].op.output(0) s[Dense].compute_at(s[Out], s[Out].op.axis[1]) - s[Out].bind(s[Out].op.axis[0], tvm.thread_axis("blockIdx.y")) - s[Out].bind(s[Out].op.axis[1], tvm.thread_axis("blockIdx.x")) + s[Out].bind(s[Out].op.axis[0], te.thread_axis("blockIdx.y")) + s[Out].bind(s[Out].op.axis[1], te.thread_axis("blockIdx.x")) tx = s[Dense].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[Dense].bind(tx, thread_x) s[DenseF].compute_at(s[Dense], tx) s[Dense].set_store_predicate(thread_x.var.equal(0)) @@ -107,13 +106,13 @@ def dense_rocblas(cfg, data, weight, bias=None, out_dtype=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [out_dim] out_dtype : str @@ -121,7 +120,7 @@ def dense_rocblas(cfg, data, weight, bias=None, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ assert out_dtype == data.dtype, "Mixed precision not supported." @@ -130,9 +129,9 @@ def dense_rocblas(cfg, data, weight, bias=None, out_dtype=None): out_dim, _ = weight.shape cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: - matmul = tvm.compute((batch, out_dim), - lambda i, j: matmul[i, j] + bias[j], - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), + lambda i, j: matmul[i, j] + bias[j], + tag=tag.BROADCAST) return matmul diff --git a/topi/python/topi/sort.py b/topi/python/topi/sort.py index 96a088923d2dc..744da622adc20 100644 --- a/topi/python/topi/sort.py +++ b/topi/python/topi/sort.py @@ -17,7 +17,7 @@ # pylint: disable=too-many-arguments """Argsort operator""" import tvm -from tvm import api +from tvm import te from .util import get_const_tuple def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): @@ -27,14 +27,14 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tensor. - valid_count : tvm.Tensor, optional + valid_count : tvm.te.Tensor, optional 1-D tensor for valid number of boxes only for ssd. axis : int, optional - Axis along which to sort the input tensor. + Axis along which to sort the input tensor. By default the flattened array is used. is_ascend : boolean, optional @@ -45,7 +45,7 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Sorted index tensor. Example @@ -54,7 +54,7 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): # An example to use argsort dshape = (1, 5, 6) - data = tvm.placeholder(dshape, name="data") + data = te.placeholder(dshape, name="data") axis = 0 is_ascend = False out = argsort(data, axis=axis, is_ascend=is_ascend) @@ -66,35 +66,36 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx) f(tvm_data, tvm_out) """ - data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) + data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) if valid_count is not None: - valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype, - "valid_count_buf", data_alignment=4) - out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8) + valid_count_buf = tvm.tir.decl_buffer( + valid_count.shape, valid_count.dtype, + "valid_count_buf", data_alignment=4) + out_buf = tvm.tir.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8) out = \ - tvm.extern(data.shape, - [data, valid_count], - lambda ins, outs: tvm.call_packed( - "tvm.contrib.sort.argsort_nms", ins[0], ins[1], - outs[0], axis, is_ascend), - dtype="int32", - in_buffers=[data_buf, valid_count_buf], - out_buffers=out_buf, - name="argsort_nms_cpu", - tag="argsort_nms_cpu") + te.extern(data.shape, + [data, valid_count], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.argsort_nms", ins[0], ins[1], + outs[0], axis, is_ascend), + dtype="int32", + in_buffers=[data_buf, valid_count_buf], + out_buffers=out_buf, + name="argsort_nms_cpu", + tag="argsort_nms_cpu") else: - out_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) + out_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) out = \ - tvm.extern(data.shape, - [data], - lambda ins, outs: tvm.call_packed( - "tvm.contrib.sort.argsort", ins[0], - outs[0], axis, is_ascend), - dtype=dtype, - in_buffers=[data_buf], - out_buffers=out_buf, - name="argsort_cpu", - tag="argsort_cpu") + te.extern(data.shape, + [data], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.argsort", ins[0], + outs[0], axis, is_ascend), + dtype=dtype, + in_buffers=[data_buf], + out_buffers=out_buf, + name="argsort_cpu", + tag="argsort_cpu") return out @@ -103,7 +104,7 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tensor. k : int, optional @@ -126,27 +127,27 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Returns ------- - out : tvm.Tensor or List[tvm.Tensor] + out : tvm.te.Tensor or List[tvm.te.Tensor] The computed result. """ assert ret_type in ["both", "values", "indices"] - data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) + data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) out_shape = list(get_const_tuple(data.shape)) if k >= 1: out_shape[axis] = k out_bufs = [] if ret_type in ["both", "values"]: - out_bufs.append(api.decl_buffer(out_shape, data.dtype, "value_buf", data_alignment=8)) + out_bufs.append(tvm.tir.decl_buffer(out_shape, data.dtype, "value_buf", data_alignment=8)) if ret_type in ["both", "indices"]: - out_bufs.append(api.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8)) + out_bufs.append(tvm.tir.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8)) out_shapes = [out_shape] * len(out_bufs) - out = tvm.extern(out_shapes, - [data], - lambda ins, outs: tvm.call_packed( - "tvm.contrib.sort.topk", ins[0], *outs, k, axis, ret_type, is_ascend), - in_buffers=[data_buf], - out_buffers=out_bufs, - name="topk_cpu", - tag="topk_cpu") + out = te.extern(out_shapes, + [data], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.topk", ins[0], *outs, k, axis, ret_type, is_ascend), + in_buffers=[data_buf], + out_buffers=out_bufs, + name="topk_cpu", + tag="topk_cpu") return out diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py index 29f9cb4dbaa60..dc7afec4b2cac 100644 --- a/topi/python/topi/sparse/csrmm.py +++ b/topi/python/topi/sparse/csrmm.py @@ -17,6 +17,7 @@ """TVM operator compute SpMM in CSR format.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag from ..util import simplify @@ -26,30 +27,30 @@ def csrmm_default(data, indices, indptr, weight, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 1-D with shape [nonzeros] - indices : tvm.Tensor + indices : tvm.te.Tensor 1-D with shape [nonzeros] - indptr : tvm.Tensor + indptr : tvm.te.Tensor 1-D with shape [m+1] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [k, n] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [m] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \ and len(weight.shape) == 2, "only support 2-dim csrmm" - assert isinstance(weight, tvm.tensor.Tensor), \ - "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight)) + assert isinstance(weight, te.tensor.Tensor), \ + "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight)) if bias is not None: assert len(bias.shape) == 1 M = simplify(indptr.shape[0]-1) @@ -78,12 +79,12 @@ def csrmm_default_ir(data, indices, indptr, weight, out): out_ptr[row*N+n] += dot[0] return irb.get() oshape = (M, N) - matmul = tvm.extern(oshape, [data, indices, indptr, weight], - lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="csrmm", dtype='float32', name='out') + matmul = te.extern(oshape, [data, indices, indptr, weight], + lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="csrmm", dtype='float32', name='out') if bias is not None: - matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \ - tag=tag.BROADCAST) + matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \ + tag=tag.BROADCAST) return matmul @@ -96,15 +97,15 @@ def csrmm(a, b, c=None): a : tvm.contrib.sparse.CSRNDArray 2-D sparse matrix with shape [m, k] - b : tvm.Tensor + b : tvm.te.Tensor 2-D dense matrix with shape [k, n] - c : tvm.Tensor, optional + c : tvm.te.Tensor, optional 1-D dense vector with shape [n] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ return csrmm_default(a.data, a.indices, a.indptr, b, c) diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py index 8a21f0db6d96a..82cc0c2fa14cb 100644 --- a/topi/python/topi/sparse/csrmv.py +++ b/topi/python/topi/sparse/csrmv.py @@ -17,6 +17,7 @@ """TVM operator compute SpMV in CSR format.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag def csrmv_default(data, indices, indptr, weight, bias=None): @@ -24,30 +25,30 @@ def csrmv_default(data, indices, indptr, weight, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 1-D with shape [nonzeros] - indices : tvm.Tensor + indices : tvm.te.Tensor 1-D with shape [nonzeros] - indptr : tvm.Tensor + indptr : tvm.te.Tensor 1-D with shape [m+1] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [k, 1] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [1] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, 1] """ assert len(data.shape) == 1 and len(weight.shape) == 2, \ "only support 2-dim csrmv" - assert isinstance(weight, tvm.tensor.Tensor), \ - "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight)) + assert isinstance(weight, te.tensor.Tensor), \ + "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight)) if bias is not None: assert len(bias.shape) == 1 batch = indptr.shape[0]-1 @@ -73,12 +74,12 @@ def csrmv_default_ir(data, indices, indptr, weight, out): out_ptr[row] += dot[0] return irb.get() oshape = (batch, 1) - matmul = tvm.extern(oshape, [data, indices, indptr, weight], - lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="csrmv", dtype='float32', name='csrmv') + matmul = te.extern(oshape, [data, indices, indptr, weight], + lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="csrmv", dtype='float32', name='csrmv') if bias is not None: - matmul = tvm.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \ - tag=tag.BROADCAST) + matmul = te.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \ + tag=tag.BROADCAST) return matmul @@ -91,15 +92,15 @@ def csrmv(a, x, y=None): a : tvm.contrib.sparse.CSRNDArray 2-D sparse matrix with shape [m, k] - x : tvm.Tensor + x : tvm.te.Tensor 2-D dense matrix with shape [k, 1] - y : tvm.Tensor, optional + y : tvm.te.Tensor, optional 1-D dense vector with shape [1] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D dense matrix with shape [m, 1] """ return csrmv_default(a.data, a.indices, a.indptr, x, y) diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py index fe21e2fdf90ef..c3099b7df367b 100644 --- a/topi/python/topi/sparse/dense.py +++ b/topi/python/topi/sparse/dense.py @@ -17,6 +17,7 @@ """TVM operator compute Dense in CSR format.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag from ..util import simplify @@ -26,30 +27,30 @@ def dense_si(data, indices, indptr, weight, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 1-D with shape [num_nonzeros] - indices : tvm.Tensor + indices : tvm.te.Tensor 1-D with shape [num_nonzeros] - indptr : tvm.Tensor + indptr : tvm.te.Tensor 1-D with shape [m+1] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [k, n] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [m] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \ and len(weight.shape) == 2, "only support 2-dim dense" - assert isinstance(weight, tvm.tensor.Tensor), \ - "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight)) + assert isinstance(weight, te.tensor.Tensor), \ + "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight)) if bias is not None: assert len(bias.shape) == 1 dtype = data.dtype @@ -69,8 +70,8 @@ def dense_default_ir(data, indices, indptr, weight, out): with irb.for_range(0, N, for_type="vectorize", name='n') as n: with irb.for_range(0, M, for_type="parallel", name='m') as m: dot = irb.allocate(dtype, (1,), name='dot', scope='local') - out_ptr[m*N+n] = tvm.const(0, dtype) - dot[0] = tvm.const(0, dtype) + out_ptr[m*N+n] = tvm.tir.const(0, dtype) + dot[0] = tvm.tir.const(0, dtype) row_start = indptr_ptr[m] row_elems = indptr_ptr[m+1]-row_start with irb.for_range(0, row_elems, name='k') as k: @@ -79,12 +80,12 @@ def dense_default_ir(data, indices, indptr, weight, out): out_ptr[m*N+n] += dot[0] return irb.get() oshape = (M, N) - matmul = tvm.extern(oshape, [data, indices, indptr, weight], - lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="dense", dtype=dtype, name='out') + matmul = te.extern(oshape, [data, indices, indptr, weight], + lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="dense", dtype=dtype, name='out') if bias is not None: - matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ - tag=tag.BROADCAST) + matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ + tag=tag.BROADCAST) return matmul @@ -94,30 +95,30 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [m, k] - w_data : tvm.Tensor + w_data : tvm.te.Tensor 1-D with shape [nonzeros] - w_indices : tvm.Tensor + w_indices : tvm.te.Tensor 1-D with shape [nonzeros] - w_indptr : tvm.Tensor + w_indptr : tvm.te.Tensor 1-D with shape [n+1] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [n] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ assert len(w_data.shape) == 1 and len(w_indices.shape) == 1 and len(w_indptr.shape) == 1 \ and len(data.shape) == 2, "only support 2-dim dense" - assert isinstance(data, tvm.tensor.Tensor), \ - "data matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(data)) + assert isinstance(data, te.tensor.Tensor), \ + "data matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(data)) if bias is not None: assert len(bias.shape) == 1 dtype = data.dtype @@ -137,8 +138,8 @@ def dense_default_ir(data, w_data, w_indices, w_indptr, out): with irb.for_range(0, M, for_type="vectorize", name='m') as m: with irb.for_range(0, N, for_type="parallel", name='n') as n: dot = irb.allocate(dtype, (1,), name='dot', scope='local') - out_ptr[m*N+n] = tvm.const(0, dtype) - dot[0] = tvm.const(0, dtype) + out_ptr[m*N+n] = tvm.tir.const(0, dtype) + dot[0] = tvm.tir.const(0, dtype) row_start = w_indptr_ptr[n] row_elems = w_indptr_ptr[n+1]-row_start with irb.for_range(0, row_elems, name='k') as k: @@ -147,12 +148,12 @@ def dense_default_ir(data, w_data, w_indices, w_indptr, out): out_ptr[m*N+n] += dot[0] return irb.get() oshape = (M, N) - matmul = tvm.extern(oshape, [data, w_data, w_indices, w_indptr], - lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="dense", dtype=dtype, name='out') + matmul = te.extern(oshape, [data, w_data, w_indices, w_indptr], + lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="dense", dtype=dtype, name='out') if bias is not None: - matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ - tag=tag.BROADCAST) + matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ + tag=tag.BROADCAST) return matmul @@ -162,26 +163,26 @@ def dense(data, weight, bias=None): Parameters ---------- - data : tvm.contrib.sparse.CSRNDArray or tvm.tensor.Tensor + data : tvm.contrib.sparse.CSRNDArray or te.tensor.Tensor 2-D with shape [batch, in_dim] - weight : tvm.tensor.Tensor or tvm.contrib.sparse.CSRNDArray + weight : te.tensor.Tensor or tvm.contrib.sparse.CSRNDArray 2-D with shape [out_dim, in_dim] - bias : tvm.tensor.Tensor, optional + bias : te.tensor.Tensor, optional 1-D with shape [out_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ ret = None if isinstance(data, tvm.contrib.sparse.CSRPlaceholderOp) and \ - isinstance(weight, tvm.tensor.Tensor): + isinstance(weight, te.tensor.Tensor): ret = dense_si(data.data, data.indices, data.indptr, weight, bias) - elif isinstance(data, tvm.tensor.Tensor) and \ - isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp): + elif isinstance(data, te.tensor.Tensor) and \ + isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp): ret = dense_sw(data, weight.data, weight.indices, weight.indptr, bias) else: raise NotImplementedError("implementation for %s as data and %s as weights, " diff --git a/topi/python/topi/tensor.py b/topi/python/topi/tensor.py index 0231efcca2728..00712420ee075 100644 --- a/topi/python/topi/tensor.py +++ b/topi/python/topi/tensor.py @@ -24,12 +24,12 @@ def elemwise_sum(xs): Parameters ---------- - xs : list of tvm.Tensor + xs : list of tvm.te.Tensor Input arguments. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.elemwise_sum(xs) @@ -49,7 +49,7 @@ def full(shape, dtype, fill_value): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.full(shape, dtype, fill_value) @@ -61,14 +61,14 @@ def full_like(x, fill_value): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. fill_value : float Value to be filled Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.full_like(x, fill_value) diff --git a/topi/python/topi/testing/conv2d_transpose_python.py b/topi/python/topi/testing/conv2d_transpose_python.py index 50c43eb70e3e0..c789feca617f4 100644 --- a/topi/python/topi/testing/conv2d_transpose_python.py +++ b/topi/python/topi/testing/conv2d_transpose_python.py @@ -59,9 +59,9 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding): bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_top+bpad_bottom, \ - dilated_a_np.shape[3]+bpad_left+bpad_right)) + dilated_a_np.shape[3]+bpad_left+bpad_right)) padded_a_np[:, :, bpad_top:dilated_a_np.shape[2]+bpad_top, \ - bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np + bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np # convolution stage out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w diff --git a/topi/python/topi/testing/conv3d_ncdhw_python.py b/topi/python/topi/testing/conv3d_ncdhw_python.py index 825ec622a1ecd..063c07d941330 100644 --- a/topi/python/topi/testing/conv3d_ncdhw_python.py +++ b/topi/python/topi/testing/conv3d_ncdhw_python.py @@ -48,7 +48,7 @@ def _conv3d_ncdhw_python(a_np, w_np, stride, padding): if pad_d > 0 or pad_h > 0 or pad_w > 0: apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w)) apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\ - pad_left:pad_left + in_width] = a_np[n, c] + pad_left:pad_left + in_width] = a_np[n, c] else: apad = a_np[n, c] out = scipy.signal.convolve( diff --git a/topi/python/topi/testing/conv3d_ndhwc_python.py b/topi/python/topi/testing/conv3d_ndhwc_python.py index 2810f72b094f6..85b991f3ec5fb 100644 --- a/topi/python/topi/testing/conv3d_ndhwc_python.py +++ b/topi/python/topi/testing/conv3d_ndhwc_python.py @@ -73,7 +73,7 @@ def conv3d_ndhwc_python(a_np, w_np, stride, padding): if pad_d > 0 or pad_h > 0 or pad_w > 0: apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w)) apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\ - pad_left:pad_left + in_width] = at[n, c] + pad_left:pad_left + in_width] = at[n, c] else: apad = at[n, c] out = scipy.signal.convolve( diff --git a/topi/python/topi/testing/depthwise_conv2d_python.py b/topi/python/topi/testing/depthwise_conv2d_python.py index 566bb93f42a78..5addc7578d10b 100644 --- a/topi/python/topi/testing/depthwise_conv2d_python.py +++ b/topi/python/topi/testing/depthwise_conv2d_python.py @@ -57,8 +57,8 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, j, :, :] = signal.convolve2d(input_np[i, j//channel_multiplier, :, :], \ - np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ - mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] + np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ + mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] if padding == 'SAME': out_channel = in_channel * channel_multiplier out_height = np.int(np.ceil(float(in_height) / float(stride_h))) @@ -75,8 +75,8 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, j, :, :] = signal.convolve2d(input_np[i, j//channel_multiplier, :, :], \ - np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ - mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] + np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ + mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] return output_np @@ -118,8 +118,8 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, :, :, j] = signal.convolve2d(input_np[i, :, :, j//channel_multiplier], \ - np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ - mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] + np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ + mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] if padding == 'SAME': out_channel = in_channel * channel_multiplier out_height = np.int(np.ceil(float(in_height) / float(stride_h))) @@ -136,7 +136,7 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, :, :, j] = signal.convolve2d(input_np[i, :, :, j//channel_multiplier], \ - np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ - mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] + np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ + mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] return output_np diff --git a/topi/python/topi/testing/pool3d_python.py b/topi/python/topi/testing/pool3d_python.py index 631a995e7c129..2606650b33cf1 100644 --- a/topi/python/topi/testing/pool3d_python.py +++ b/topi/python/topi/testing/pool3d_python.py @@ -40,9 +40,9 @@ def pool3d_ncdhw_python(np_data, kernel, assert out_shape[3] == int(math.floor(float(in_shape[3] - k_h + pt + pb) / s_h) + 1) assert out_shape[4] == int(math.floor(float(in_shape[4] - k_w + pl + pr) / s_w) + 1) - fill_value = tvm.const(0.0, dtype).value + fill_value = tvm.tir.const(0.0, dtype).value if not(count_include_pad) and pool_type == 'max': - fill_value = tvm.min_value(dtype).value + fill_value = tvm.te.min_value(dtype).value pad_np = np.full(shape=(in_n, in_c, in_d + pf + pk, diff --git a/topi/python/topi/testing/pool_grad_python.py b/topi/python/topi/testing/pool_grad_python.py index f1e51f0c957ee..ee671c2c49d23 100644 --- a/topi/python/topi/testing/pool_grad_python.py +++ b/topi/python/topi/testing/pool_grad_python.py @@ -53,7 +53,7 @@ def pool_grad_nchw(a_np, out_grad_np, # take the first element, as they are the same across batch and channel pad_count = pad_count.ravel()[0] pad_pool_grad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] += \ - out_grad_np[:, :, i, j].reshape(n, ic, 1, 1) / np.maximum(pad_count, 1) + out_grad_np[:, :, i, j].reshape(n, ic, 1, 1) / np.maximum(pad_count, 1) elif pool_type == 'max': for i in range(oh): for j in range(ow): diff --git a/topi/python/topi/testing/roi_align_python.py b/topi/python/topi/testing/roi_align_python.py index 6ba2061abd759..d3285490d4c8f 100644 --- a/topi/python/topi/testing/roi_align_python.py +++ b/topi/python/topi/testing/roi_align_python.py @@ -45,8 +45,8 @@ def _bilinear(b, c, y, x): lx = x - x_low return (1 - ly) * (1 - lx) * a_np[b, c, y_low, x_low] + \ (1 - ly) * lx * a_np[b, c, y_low, x_high] + \ - ly * (1 - lx) * a_np[b, c, y_high, x_low] + \ - ly * lx * a_np[b, c, y_high, x_high] + ly * (1 - lx) * a_np[b, c, y_high, x_low] + \ + ly * lx * a_np[b, c, y_high, x_high] for i in range(num_roi): roi = rois_np[i] diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index bdeb22304b076..49c71bd3320d1 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -18,6 +18,7 @@ """Injective transformation operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te import topi from . import cpp from . import tag @@ -29,7 +30,7 @@ def expand_dims(a, axis, num_newaxis=1): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. num_newaxis: int, optional @@ -37,7 +38,7 @@ def expand_dims(a, axis, num_newaxis=1): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.expand_dims(a, axis, num_newaxis) @@ -63,21 +64,21 @@ def expand_like(a, shape_like, axis): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. - shape_like : tvm.Tensor + shape_like : tvm.te.Tensor The tensor to with target shape. axis: list of int axis to be expanded on Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ odim = len(axis) + len(a.shape) if odim != len(shape_like.shape): if len(a.shape) == 1 and len(axis) == len(shape_like.shape): # A special case: `a` is a scalar represented as a 1-dim tensor - return tvm.compute(shape_like.shape, lambda *idxs: a(0)) + return te.compute(shape_like.shape, lambda *idxs: a(0)) raise ValueError("shape inconsistent when expand_like ({}, {}, {})".format( len(axis), len(a.shape), len(shape_like.shape))) @@ -92,7 +93,7 @@ def _compute(*idxs): indices.append(idxs[i]) axis_index += 1 return a(*indices) - return tvm.compute(shape_like.shape, _compute) + return te.compute(shape_like.shape, _compute) def transpose(a, axes=None): @@ -100,7 +101,7 @@ def transpose(a, axes=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. axes: tuple of ints, optional @@ -108,7 +109,7 @@ def transpose(a, axes=None): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.transpose(a, axes) @@ -118,7 +119,7 @@ def flip(a, axis=0): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. axis : int, optional @@ -126,7 +127,7 @@ def flip(a, axis=0): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.flip(a, axis) @@ -135,7 +136,7 @@ def strided_slice(a, begin, end, strides=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be sliced. begin: list of int @@ -151,7 +152,7 @@ def strided_slice(a, begin, end, strides=None): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ if strides is None: strides = [] @@ -163,26 +164,26 @@ def strided_set(a, v, begin, end, strides=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be sliced. - v : tvm.Tensor + v : tvm.te.Tensor The values to set - begin: tvm.Tensor + begin: tvm.te.Tensor The indices to begin with in the slicing. - end: tvm.Tensor + end: tvm.te.Tensor Indicies indicating end of the slice. - strides: tvm.Tensor, optional + strides: tvm.te.Tensor, optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ n = len(a.shape) @@ -201,38 +202,38 @@ def strided_set(a, v, begin, end, strides=None): raise TypeError("strides should be int32") def _max(a, b): - return tvm.expr.Select(a > b, a, b) + return tvm.tir.Select(a > b, a, b) if strides is None: - strides = [tvm.const(1, 'int32')] * n + strides = [tvm.tir.const(1, 'int32')] * n else: - strides = [tvm.if_then_else(strides.shape[0] > i, - strides[i], - tvm.const(1, 'int32')) + strides = [tvm.tir.if_then_else(strides.shape[0] > i, + strides[i], + tvm.tir.const(1, 'int32')) for i in range(n)] - begin = [tvm.if_then_else(begin.shape[0] > i, - begin[i], - tvm.expr.Select(strides[i] > 0, - tvm.const(0, 'int32'), - a.shape[i])) + begin = [tvm.tir.if_then_else(begin.shape[0] > i, + begin[i], + tvm.tir.Select(strides[i] > 0, + tvm.tir.const(0, 'int32'), + a.shape[i])) for i in range(n)] - end = [tvm.if_then_else(end.shape[0] > i, - end[i], - tvm.expr.Select(strides[i] > 0, - a.shape[i] + 1, - -(a.shape[i] + 1))) + end = [tvm.tir.if_then_else(end.shape[0] > i, + end[i], + tvm.tir.Select(strides[i] > 0, + a.shape[i] + 1, + -(a.shape[i] + 1))) for i in range(n)] # Convert negative indexes for i in range(n): - begin[i] = tvm.if_then_else(begin[i] < 0, - begin[i] + a.shape[i], - begin[i]) - end[i] = tvm.if_then_else(end[i] < 0, - end[i] + a.shape[i], - end[i]) + begin[i] = tvm.tir.if_then_else(begin[i] < 0, + begin[i] + a.shape[i], + begin[i]) + end[i] = tvm.tir.if_then_else(end[i] < 0, + end[i] + a.shape[i], + end[i]) def _select(*indices): from_val = [] @@ -241,9 +242,9 @@ def _select(*indices): from_val.append(within_index(begin[i], end[i], strides[i], indices[i])) index_tuple.append( make_idx(begin[i], end[i], strides[i], a.shape[i], indices[i])) - return tvm.if_then_else(tvm.all(*from_val), v(*index_tuple), a(*indices)) + return tvm.tir.if_then_else(tvm.tir.all(*from_val), v(*index_tuple), a(*indices)) - return tvm.compute(a.shape, _select, name="strided_set") + return te.compute(a.shape, _select, name="strided_set") def reshape(a, newshape): @@ -251,14 +252,14 @@ def reshape(a, newshape): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be reshaped newshape : tuple of ints The new shape Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.reshape(a, newshape) @@ -268,7 +269,7 @@ def squeeze(a, axis=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor axis : None or int or tuple of ints, optional Selects a subset of the single-dimensional entries in the shape. @@ -276,7 +277,7 @@ def squeeze(a, axis=None): Returns ------- - squeezed : tvm.Tensor + squeezed : tvm.te.Tensor """ return cpp.squeeze(a, axis) @@ -286,7 +287,7 @@ def concatenate(a_tuple, axis=0): Parameters ---------- - a_tuple : tuple of tvm.Tensor + a_tuple : tuple of tvm.te.Tensor The arrays to concatenate axis : int, optional @@ -294,7 +295,7 @@ def concatenate(a_tuple, axis=0): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.concatenate(a_tuple, axis) @@ -304,7 +305,7 @@ def stack(a, axis): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be stacked. axis : int, optional @@ -313,7 +314,7 @@ def stack(a, axis): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.stack(a, axis) @@ -323,7 +324,7 @@ def split(ary, indices_or_sections, axis=0): Parameters ---------- - ary : tvm.Tensor + ary : tvm.te.Tensor indices_or_sections : int or 1-D array @@ -331,7 +332,7 @@ def split(ary, indices_or_sections, axis=0): Returns ------- - ret : tuple of tvm.Tensor + ret : tuple of tvm.te.Tensor """ return cpp.split(ary, indices_or_sections, axis) @@ -341,10 +342,10 @@ def take(a, indices, axis=None, mode="clip"): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The source array. - indices : tvm.Tensor + indices : tvm.te.Tensor The indices of the values to extract. axis : int, optional @@ -359,7 +360,7 @@ def take(a, indices, axis=None, mode="clip"): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ if axis is None: return cpp.take(a, indices, mode) @@ -371,15 +372,15 @@ def gather_nd(a, indices): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The source array. - indices : tvm.Tensor + indices : tvm.te.Tensor The indices of the values to extract. Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.gather_nd(a, indices) @@ -444,7 +445,7 @@ def arange(start, stop=None, step=1, dtype="float32"): Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor The resulting tensor. """ if stop is None: @@ -458,7 +459,7 @@ def repeat(a, repeats, axis): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be repeated. repeats: int, required @@ -469,7 +470,7 @@ def repeat(a, repeats, axis): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.repeat(a, repeats, axis) @@ -479,7 +480,7 @@ def tile(a, reps): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be tiled. reps: tuple of ints, required @@ -487,7 +488,7 @@ def tile(a, reps): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.tile(a, reps) @@ -497,7 +498,7 @@ def layout_transform(array, src_layout, dst_layout): Parameters ---------- - array : tvm.Tensor + array : tvm.te.Tensor The source array. src_layout : str @@ -514,7 +515,7 @@ def shape(array, dtype="int32"): Parameters ---------- - array : tvm.Tensor + array : tvm.te.Tensor The source tensor. dtype : str, optional @@ -522,7 +523,7 @@ def shape(array, dtype="int32"): Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor The resulting tensor. """ return cpp.shape(array, dtype) @@ -543,11 +544,11 @@ def sequence_mask(data, valid_length, mask_value=0, axis=0): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor N-D with shape [MAX_LENGTH, batch_size, ...] or [batch_size, MAX_LENGTH, ...] depending on the value of `axis`. - valid_length : tvm.Tensor + valid_length : tvm.te.Tensor 1-D with shape [batch_size,] mask_value : float, optional @@ -558,7 +559,7 @@ def sequence_mask(data, valid_length, mask_value=0, axis=0): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor N-D with shape [MAX_LENGTH, batch_size, ...] or [batch_size, MAX_LENGTH, ...] depending on the value of `axis`. """ @@ -574,7 +575,7 @@ def ndarray_size(array, dtype="int32"): Parameters ---------- - array : tvm.Tensor + array : tvm.te.Tensor The source tensor. dtype : str, optional @@ -582,7 +583,7 @@ def ndarray_size(array, dtype="int32"): Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor The resulting tensor. """ return cpp.ndarray_size(array, dtype) @@ -593,18 +594,18 @@ def where(condition, x, y): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor The condition array. - x : tvm.Tensor + x : tvm.te.Tensor First array to be selected. - y : tvm.Tensor + y : tvm.te.Tensor Second array to be selected. Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor A Tensor selected from x or y depending on condition. """ return cpp.where(condition, x, y) @@ -617,13 +618,13 @@ def one_hot(indices, on_value, off_value, depth, axis, dtype): Parameters ---------- - indices : tvm.Tensor + indices : tvm.te.Tensor Locations to set to on_value. - on_value : tvm.Tensor + on_value : tvm.te.Tensor Value to fill at indices. - off_value : tvm.Tensor + off_value : tvm.te.Tensor Value to fill at all other positions besides indices. depth : int diff --git a/topi/python/topi/util.py b/topi/python/topi/util.py index c4c3ee6dd37c6..94c6068c7b216 100644 --- a/topi/python/topi/util.py +++ b/topi/python/topi/util.py @@ -20,7 +20,8 @@ from numbers import Integral import tvm -from tvm.api import layout, bijective_layout +from tvm import te +from tvm.tir import layout, bijective_layout from . import tag, cpp class InvalidShapeError(ValueError): @@ -56,7 +57,7 @@ def _traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): _traverse(tensor.op) callback(op) @@ -77,7 +78,7 @@ def prod(x): The result value """ if not x: - return tvm.const(1, "int32") + return tvm.tir.const(1, "int32") res = x[0] for i in range(1, len(x)): res = res * x[i] @@ -249,8 +250,8 @@ def unravel_index(idx, shape): indices : tuple of int or tvm.expr.IntImm Corresponding coordinate of the 1D index """ - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod indices = [] for i in range(len(shape) - 1, -1, -1): indices.append(idxm(idx, shape[i])) @@ -276,18 +277,18 @@ def const_matrix(matrix, name="const_matrix"): """ row, col = matrix.shape dtype = str(matrix.dtype) - idxm = tvm.indexmod + idxm = tvm.tir.indexmod def select_array(i, j): - now = tvm.const(0.0, dtype) + now = tvm.tir.const(0.0, dtype) for ii in range(row): for jj in range(col): - now = tvm.expr.Select(tvm.all(idxm(i, row) == ii, idxm(j, col) == jj), - tvm.const(matrix[ii][jj], dtype), + now = tvm.expr.Select(tvm.tir.all(idxm(i, row) == ii, idxm(j, col) == jj), + tvm.tir.const(matrix[ii][jj], dtype), now) return now - return tvm.compute(matrix.shape, select_array, name=name) + return te.compute(matrix.shape, select_array, name=name) def get_max_power2_factor(n, max_value=None): @@ -349,7 +350,7 @@ def get_shape(src_shape, src_layout, dst_layout): layout_mapping = bijective_layout(src_layout, dst_layout) dst_indices = layout_mapping.forward_index( - tvm.convert(list(range(len(src_layout))))) + tvm.runtime.convert(list(range(len(src_layout))))) return get_const_tuple(tuple([src_shape[i.value] for i in dst_indices])) @@ -379,10 +380,10 @@ def within_index(b, e, s, i): """ bc = tvm.expr.Select(s < 0, i <= e, i < b) ec = tvm.expr.Select(s < 0, i > b, i >= e) - ss = tvm.if_then_else(s < 0, - ((i - e) + (e % tvm.abs(s)) + 1) % tvm.abs(s), - (i - b) % s) - return tvm.expr.Select(tvm.expr.Or(bc, ec), tvm.const(False), ss.equal(0)) + ss = te.if_then_else(s < 0, + ((i - e) + (e % te.abs(s)) + 1) % te.abs(s), + (i - b) % s) + return tvm.expr.Select(tvm.expr.Or(bc, ec), tvm.tir.const(False), ss.equal(0)) def make_idx(b, e, s, z, i): @@ -420,10 +421,10 @@ def make_idx(b, e, s, z, i): # Clamp to array size b = tvm.expr.Select(z < b, z - 1, b) - ss = tvm.if_then_else(s < 0, - (b - i) // tvm.abs(s), - (i - b) // s) - return tvm.if_then_else(tvm.expr.Or(bc, ec), 88, ss) + ss = tvm.tir.if_then_else(s < 0, + (b - i) // te.abs(s), + (i - b) // s) + return tvm.tir.if_then_else(tvm.expr.Or(bc, ec), 88, ss) def is_empty_shape(shape): diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index c171f8ca5fe34..d95ca75a2d1bb 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -17,6 +17,7 @@ # pylint: disable=import-error, invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches, too-many-statements, too-many-function-args """Non-maximum suppression operator""" import tvm +from tvm import te from tvm import hybrid from ..sort import argsort @@ -28,16 +29,16 @@ def hybrid_rearrange_out(data, one): Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. - one: tvm.const + one: tvm.tir.const Constant one with the same dtype as data. Returns ------- - output : tvm.Tensor or numpy NDArray + output : tvm.te.Tensor or numpy NDArray Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. """ @@ -70,28 +71,28 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray Input data. 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. - score_threshold : tvm.const + score_threshold : tvm.tir.const Lower limit of score for valid bounding boxes. - id_index : tvm.const + id_index : tvm.tir.const index of the class categories, -1 to disable. - score_index: tvm.const + score_index: tvm.tir.const Index of the scores/confidence of boxes. - one: tvm.const + one: tvm.tir.const Constant one with the same dtype as data. Returns ------- - out_tensor : tvm.Tensor or numpy NDArray + out_tensor : tvm.te.Tensor or numpy NDArray Rearranged data tensor. - valid_count : tvm.Tensor or numpy NDArray + valid_count : tvm.te.Tensor or numpy NDArray 1-D tensor for valid number of boxes. """ batch_size = data.shape[0] @@ -123,7 +124,7 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor Input data. 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. @@ -138,18 +139,18 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Returns ------- - out_tensor : tvm.Tensor + out_tensor : tvm.te.Tensor Rearranged data tensor. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. """ - score_threshold_const = tvm.const(score_threshold, data.dtype) - id_index_const = tvm.const(id_index, "int32") - score_index_const = tvm.const(score_index, "int32") + score_threshold_const = tvm.tir.const(score_threshold, data.dtype) + id_index_const = tvm.tir.const(id_index, "int32") + score_index_const = tvm.tir.const(score_index, "int32") return hybrid_get_valid_counts(data, score_threshold_const, id_index_const, score_index_const, - tvm.const(1, data.dtype)) + tvm.tir.const(1, data.dtype)) @hybrid.script @@ -160,51 +161,51 @@ def hybrid_nms(data, sorted_index, valid_count, Parameters ---------- - data: tvm.Tensor or numpy NDArray + data: tvm.te.Tensor or numpy NDArray Bounding boxes with class and score. 3-D tensor with shape [batch_size, num_anchors, 6]. - sorted_index : tvm.Tensor or numpy NDArray + sorted_index : tvm.te.Tensor or numpy NDArray Bounding box indexes sorted by score, with shape [batch_size, num_anchors]. - valid_count : tvm.Tensor or numpy NDArray + valid_count : tvm.te.Tensor or numpy NDArray 1-D tensor for valid number of boxes. - max_output_size : tvm.const + max_output_size : tvm.tir.const Max number of output valid boxes for each instance. By default all valid boxes are returned. - iou_threshold : tvm.const + iou_threshold : tvm.tir.const Overlapping(IoU) threshold to suppress object with smaller score. - force_suppress : tvm.const + force_suppress : tvm.tir.const Whether to suppress all detections regardless of class_id. - top_k : tvm.const + top_k : tvm.tir.const Keep maximum top k detections before nms, -1 for no limit. - coord_start : tvm.const + coord_start : tvm.tir.const Start index of the consecutive 4 coordinates. - id_index : tvm.const + id_index : tvm.tir.const index of the class categories, -1 to disable. - score_index: tvm.const + score_index: tvm.tir.const Index of the scores/confidence of boxes. - zero: tvm.const + zero: tvm.tir.const Constant zero with the same dtype as data. - one: tvm.const + one: tvm.tir.const Constant one with the same dtype as data. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6]. - box_indices: tvm.Tensor + box_indices: tvm.te.Tensor 2-D tensor with shape [batch_size, num_anchors]. """ batch_size = data.shape[0] @@ -297,10 +298,10 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. max_output_size : optional, int @@ -333,7 +334,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6]. Example @@ -342,8 +343,8 @@ def non_max_suppression(data, valid_count, max_output_size=-1, # An example to use non_max_suppression dshape = (1, 5, 6) - data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") + data = te.placeholder(dshape, name="data") + valid_count = te.placeholder((dshape[0],), dtype="int32", name="valid_count") iou_threshold = 0.7 force_suppress = True top_k = -1 @@ -363,19 +364,19 @@ def non_max_suppression(data, valid_count, max_output_size=-1, num_anchors = data.shape[1] score_axis = score_index score_shape = (batch_size, num_anchors) - score_tensor = tvm.compute(score_shape, lambda i, j: data[i, j, score_axis]) + score_tensor = te.compute(score_shape, lambda i, j: data[i, j, score_axis]) sort_tensor = argsort(score_tensor, valid_count=valid_count, axis=1, is_ascend=False) out, box_indices = hybrid_nms(data, sort_tensor, valid_count, - tvm.const(max_output_size, dtype="int32"), - tvm.const(iou_threshold, dtype=data.dtype), - tvm.const(force_suppress, dtype="bool"), - tvm.const(top_k, dtype="int32"), - tvm.const(coord_start, dtype="int32"), - tvm.const(id_index, dtype="int32"), - tvm.const(score_index, dtype="int32"), - zero=tvm.const(0, dtype=data.dtype), - one=tvm.const(1, dtype=data.dtype)) + tvm.tir.const(max_output_size, dtype="int32"), + tvm.tir.const(iou_threshold, dtype=data.dtype), + tvm.tir.const(force_suppress, dtype="bool"), + tvm.tir.const(top_k, dtype="int32"), + tvm.tir.const(coord_start, dtype="int32"), + tvm.tir.const(id_index, dtype="int32"), + tvm.tir.const(score_index, dtype="int32"), + zero=tvm.tir.const(0, dtype=data.dtype), + one=tvm.tir.const(1, dtype=data.dtype)) if not return_indices and invalid_to_bottom: - out = hybrid_rearrange_out(out, one=tvm.const(1, dtype=data.dtype)) + out = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) return box_indices if return_indices else out diff --git a/topi/python/topi/vision/rcnn/proposal.py b/topi/python/topi/vision/rcnn/proposal.py index 5de4998c066cf..3d2c09e884e60 100644 --- a/topi/python/topi/vision/rcnn/proposal.py +++ b/topi/python/topi/vision/rcnn/proposal.py @@ -18,6 +18,7 @@ """Proposal operator""" import math import tvm +from tvm import te from ...util import get_const_tuple, get_const_int from ...sort import argsort @@ -43,8 +44,8 @@ def reg_bbox(x1, y1, x2, y2, dx, dy, dw, dh): pred_ctr_x = dx * bbox_w + ctr_x pred_ctr_y = dy * bbox_h + ctr_y - pred_w = tvm.exp(dw) * bbox_w - pred_h = tvm.exp(dh) * bbox_h + pred_w = te.exp(dw) * bbox_w + pred_h = te.exp(dh) * bbox_h pred_x1 = pred_ctr_x - 0.5 * (pred_w - 1.0) pred_y1 = pred_ctr_y - 0.5 * (pred_h - 1.0) @@ -67,16 +68,16 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r Parameters ---------- - cls_prob_buf : tvm.schedule.Buffer + cls_prob_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred_buf : tvm.schedule.Buffer + bbox_pred_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 4 * num_anchors, height, width] - im_info_buf : tvm.schedule.Buffer + im_info_buf : tvm.te.schedule.Buffer 2-D with shape [batch, 3] - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5] The last dimension is in format of [w_start, h_start, w_end, h_end, score] @@ -110,8 +111,8 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r p_im_info = ib.buffer_ptr(im_info_buf) p_out = ib.buffer_ptr(out_buf) - idxm = tvm.indexmod - idxd = tvm.indexdiv + idxm = tvm.tir.indexmod + idxd = tvm.tir.indexdiv with ib.for_range(0, batch * height * width) as tid: w = idxm(tid, width) @@ -135,10 +136,10 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r regression_func = reg_iou if iou_loss else reg_bbox pred_x1, pred_y1, pred_x2, pred_y2 = regression_func(x1, y1, x2, y2, *delta) - pred_x1 = tvm.max(tvm.min(pred_x1, im_width - 1.0), 0.0) - pred_y1 = tvm.max(tvm.min(pred_y1, im_height - 1.0), 0.0) - pred_x2 = tvm.max(tvm.min(pred_x2, im_width - 1.0), 0.0) - pred_y2 = tvm.max(tvm.min(pred_y2, im_height - 1.0), 0.0) + pred_x1 = tvm.te.max(tvm.te.min(pred_x1, im_width - 1.0), 0.0) + pred_y1 = tvm.te.max(tvm.te.min(pred_y1, im_height - 1.0), 0.0) + pred_x2 = tvm.te.max(tvm.te.min(pred_x2, im_width - 1.0), 0.0) + pred_y2 = tvm.te.max(tvm.te.min(pred_y2, im_height - 1.0), 0.0) real_height = (im_height / feature_stride).astype('int32') real_width = (im_width / feature_stride).astype('int32') @@ -148,15 +149,15 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r min_size = p_im_info[b * 3 + 2] * rpn_min_size pred_score = p_score[((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w] - pred_score = tvm.expr.Select(tvm.any(h >= real_height, w >= real_width), - -1.0, pred_score) + pred_score = tvm.tir.Select(tvm.tir.any(h >= real_height, w >= real_width), + -1.0, pred_score) p_out[out_index * 5 + 0] = pred_x1 p_out[out_index * 5 + 1] = pred_y1 p_out[out_index * 5 + 2] = pred_x2 p_out[out_index * 5 + 3] = pred_y2 p_out[out_index * 5 + 4] = pred_score - with ib.if_scope(tvm.any(bbox_w < min_size, bbox_h < min_size)): + with ib.if_scope(tvm.tir.any(bbox_w < min_size, bbox_h < min_size)): p_out[out_index * 5 + 0] -= min_size / 2.0 p_out[out_index * 5 + 1] -= min_size / 2.0 p_out[out_index * 5 + 2] += min_size / 2.0 @@ -171,10 +172,10 @@ def argsort_ir(data_buf, out_index_buf): Parameters ---------- - data_buf : tvm.schedule.Buffer + data_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox] - out_index_buf : tvm.schedule.Buffer + out_index_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Indices of data in sorted order. Returns @@ -188,7 +189,7 @@ def argsort_ir(data_buf, out_index_buf): index_out = ib.buffer_ptr(out_index_buf) temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local") temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local") - idxm = tvm.indexmod + idxm = tvm.tir.indexmod with ib.for_range(0, batch, for_type="unroll") as b: start = b * num_bbox for i in range(2): @@ -199,8 +200,8 @@ def argsort_ir(data_buf, out_index_buf): with ib.for_range(0, num_bbox) as k: with ib.for_range(0, (num_bbox + 1) // 2) as tid: offset = start + 2 * tid + idxm(k, 2) - with ib.if_scope(tvm.all(offset + 1 < num_bbox, - p_data[offset] < p_data[offset + 1])): + with ib.if_scope(tvm.tir.all(offset + 1 < num_bbox, + p_data[offset] < p_data[offset + 1])): temp_data[0] = p_data[offset] p_data[offset] = p_data[offset + 1] p_data[offset + 1] = temp_data[0] @@ -215,11 +216,11 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. nms_threshold : float @@ -233,10 +234,10 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): def calculate_overlap(out_tensor, box_a_idx, box_b_idx): """Calculate overlap of two boxes. """ - w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) - h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) + w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) + - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) + h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) + - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) i = w * h u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * \ (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + \ @@ -254,7 +255,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): p_out[base_idx + i] = False with ib.for_range(0, num_bbox - 1) as l: with ib.for_range(0, num_bbox) as i: - with ib.if_scope(tvm.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): + with ib.if_scope(tvm.tir.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): iou = calculate_overlap(p_data, (base_idx + l) * 5, (base_idx + i) * 5) with ib.if_scope(iou > nms_threshold): p_out[base_idx + i] = True @@ -266,14 +267,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - remove_mask_buf : tvm.schedule.Buffer + remove_mask_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. @@ -302,14 +303,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): nkeep[b] += 1 with ib.for_range(0, batch) as b: with ib.if_scope(nkeep[b] > 0): - with ib.for_range(0, tvm.ceil( - tvm.const(rpn_post_nms_top_n, 'float32') / nkeep[b]).astype('int32')): + with ib.for_range(0, te.ceil( + tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[b]).astype('int32')): with ib.for_range(0, num_bbox) as j: offset_j = (b * num_bbox + j) * 5 offset_i = (b * rpn_post_nms_top_n + i[b]) * 5 - with ib.if_scope(tvm.all(i[b] < rpn_post_nms_top_n, - p_remove[(b*num_bbox+j)] == False)): - p_out[offset_i] = tvm.expr.Cast('float32', b) + with ib.if_scope(tvm.tir.all(i[b] < rpn_post_nms_top_n, + p_remove[(b*num_bbox+j)] == False)): + p_out[offset_i] = tvm.tir.Cast('float32', b) with ib.for_range(0, 4, for_type='unroll') as k: p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k] i[b] = i[b] + 1 @@ -324,13 +325,13 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred : tvm.Tensor + bbox_pred : tvm.te.Tensor 4-D with shape [batch, 4 * num_anchors, height, width] - im_info : tvm.Tensor + im_info : tvm.te.Tensor 2-D with shape [batch, 3] scales : list/tuple of float @@ -360,7 +361,7 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 2-D tensor with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. """ @@ -370,20 +371,20 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres num_bbox = height * width * num_anchors rpn_pre_nms_top_n = min(rpn_pre_nms_top_n, num_bbox) if rpn_pre_nms_top_n > 0 else num_bbox - bbox = tvm.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: - predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, - feature_stride, rpn_min_size, iou_loss), - dtype=bbox_pred.dtype) - score = tvm.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') + bbox = te.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: + predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, + feature_stride, rpn_min_size, iou_loss), + dtype=bbox_pred.dtype) + score = te.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') valid_count_shape = (1,) - valid_count = tvm.compute(valid_count_shape, lambda i: num_bbox) + valid_count = te.compute(valid_count_shape, lambda i: num_bbox) sorted_index = argsort(score, valid_count=valid_count, axis=1, is_ascend=False) - sorted_bbox = tvm.compute((batch, rpn_pre_nms_top_n, 5), - lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') - nms_remove_mask = tvm.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], - lambda ins, outs: nms_ir(ins[0], outs[0], threshold), - dtype='bool') - nms_out = tvm.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], - lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), - dtype=sorted_bbox.dtype) + sorted_bbox = te.compute((batch, rpn_pre_nms_top_n, 5), + lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') + nms_remove_mask = te.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], + lambda ins, outs: nms_ir(ins[0], outs[0], threshold), + dtype='bool') + nms_out = te.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], + lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), + dtype=sorted_bbox.dtype) return nms_out diff --git a/topi/python/topi/vision/rcnn/roi_align.py b/topi/python/topi/vision/rcnn/roi_align.py index a0bc5e2915972..9aa1ef9c75a2a 100644 --- a/topi/python/topi/vision/rcnn/roi_align.py +++ b/topi/python/topi/vision/rcnn/roi_align.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """Roi align operator""" import tvm +from tvm import te from ...util import get_const_tuple from ...cpp.util import bilinear_sample_nchw @@ -26,10 +27,10 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor + rois : tvm.te.Tensor 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] @@ -45,7 +46,7 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ dtype = rois.dtype @@ -58,11 +59,11 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): pooled_size_h, pooled_size_w = pooled_size def _bilinear(i, c, y, x): - outside = tvm.any(y < -1.0, x < -1.0, y > height, x > width) - y = tvm.max(y, 0.0) - x = tvm.max(x, 0.0) + outside = tvm.tir.any(y < -1.0, x < -1.0, y > height, x > width) + y = tvm.te.max(y, 0.0) + x = tvm.te.max(x, 0.0) val = bilinear_sample_nchw(data, (i, c, y, x), height - 1, width - 1) - return tvm.if_then_else(outside, 0.0, val) + return tvm.tir.if_then_else(outside, 0.0, val) def _sample(i, c, ph, pw): roi = rois[i] @@ -74,27 +75,27 @@ def _sample(i, c, ph, pw): roi_end_w *= spatial_scale # force malformed ROIs to be 1x1 - roi_h = tvm.max(roi_end_h - roi_start_h, tvm.const(1.0, dtype)) - roi_w = tvm.max(roi_end_w - roi_start_w, tvm.const(1.0, dtype)) + roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype)) + roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype)) bin_h = roi_h / pooled_size_h bin_w = roi_w / pooled_size_w if sample_ratio > 0: - roi_bin_grid_h = roi_bin_grid_w = tvm.const(sample_ratio, 'int32') + roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, 'int32') else: - roi_bin_grid_h = tvm.ceil(roi_h / pooled_size_h).astype('int32') - roi_bin_grid_w = tvm.ceil(roi_w / pooled_size_w).astype('int32') + roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype('int32') + roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype('int32') count = roi_bin_grid_h * roi_bin_grid_w - rh = tvm.reduce_axis((0, roi_bin_grid_h)) - rw = tvm.reduce_axis((0, roi_bin_grid_w)) + rh = te.reduce_axis((0, roi_bin_grid_h)) + rw = te.reduce_axis((0, roi_bin_grid_w)) roi_start_h += ph * bin_h roi_start_w += pw * bin_w - return tvm.sum(_bilinear(batch_index, c, - roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, - roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count, - axis=[rh, rw]) + return te.sum(_bilinear(batch_index, c, + roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, + roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count, + axis=[rh, rw]) - return tvm.compute((num_roi, channel, pooled_size_h, pooled_size_w), _sample, - tag='pool,roi_align_nchw') + return te.compute((num_roi, channel, pooled_size_h, pooled_size_w), _sample, + tag='pool,roi_align_nchw') diff --git a/topi/python/topi/vision/rcnn/roi_pool.py b/topi/python/topi/vision/rcnn/roi_pool.py index f346f580b3ba9..a206f34831a1e 100644 --- a/topi/python/topi/vision/rcnn/roi_pool.py +++ b/topi/python/topi/vision/rcnn/roi_pool.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """ROI pool operator""" import tvm +from tvm import te from ...util import get_const_tuple def roi_pool_nchw(data, rois, pooled_size, spatial_scale): @@ -24,10 +25,10 @@ def roi_pool_nchw(data, rois, pooled_size, spatial_scale): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor + rois : tvm.te.Tensor 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] @@ -40,7 +41,7 @@ def roi_pool_nchw(data, rois, pooled_size, spatial_scale): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ dtype = rois.dtype @@ -57,36 +58,36 @@ def _pool(i, c, ph, pw): batch_index = roi[0].astype('int32') roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4] - roi_start_h = tvm.round(roi_start_h * spatial_scale).astype('int32') - roi_start_w = tvm.round(roi_start_w * spatial_scale).astype('int32') - roi_end_h = tvm.round(roi_end_h * spatial_scale).astype('int32') - roi_end_w = tvm.round(roi_end_w * spatial_scale).astype('int32') + roi_start_h = te.round(roi_start_h * spatial_scale).astype('int32') + roi_start_w = te.round(roi_start_w * spatial_scale).astype('int32') + roi_end_h = te.round(roi_end_h * spatial_scale).astype('int32') + roi_end_w = te.round(roi_end_w * spatial_scale).astype('int32') # force malformed ROIs to be 1x1 - roi_h = tvm.max(roi_end_h - roi_start_h + 1, tvm.const(1, 'int32')) - roi_w = tvm.max(roi_end_w - roi_start_w + 1, tvm.const(1, 'int32')) + roi_h = tvm.te.max(roi_end_h - roi_start_h + 1, tvm.tir.const(1, 'int32')) + roi_w = tvm.te.max(roi_end_w - roi_start_w + 1, tvm.tir.const(1, 'int32')) bin_h = roi_h.astype(dtype) / pooled_size_h bin_w = roi_w.astype(dtype) / pooled_size_w # use epsilon to prevent floating point precision loss in floor/ceil - epsilon = tvm.const(0.00001, dtype) - hstart = tvm.floor(ph * bin_h + epsilon).astype('int32') - wstart = tvm.floor(pw * bin_w + epsilon).astype('int32') - hend = tvm.ceil((ph + 1) * bin_h - epsilon).astype('int32') - wend = tvm.ceil((pw + 1) * bin_w - epsilon).astype('int32') - hstart = tvm.min(tvm.max(hstart + roi_start_h, 0), height) - wstart = tvm.min(tvm.max(wstart + roi_start_w, 0), width) - hend = tvm.min(tvm.max(hend + roi_start_h, 0), height) - wend = tvm.min(tvm.max(wend + roi_start_w, 0), width) + epsilon = tvm.tir.const(0.00001, dtype) + hstart = te.floor(ph * bin_h + epsilon).astype('int32') + wstart = te.floor(pw * bin_w + epsilon).astype('int32') + hend = te.ceil((ph + 1) * bin_h - epsilon).astype('int32') + wend = te.ceil((pw + 1) * bin_w - epsilon).astype('int32') + hstart = tvm.te.min(tvm.te.max(hstart + roi_start_h, 0), height) + wstart = tvm.te.min(tvm.te.max(wstart + roi_start_w, 0), width) + hend = tvm.te.min(tvm.te.max(hend + roi_start_h, 0), height) + wend = tvm.te.min(tvm.te.max(wend + roi_start_w, 0), width) - non_empty = tvm.all(hstart < hend, wstart < wend) - min_value = lambda dtype: tvm.if_then_else(non_empty, tvm.min_value(dtype), - tvm.const(0.0, dtype)) + non_empty = tvm.tir.all(hstart < hend, wstart < wend) + min_value = lambda dtype: tvm.tir.if_then_else( + non_empty, tvm.te.min_value(dtype), tvm.tir.const(0.0, dtype)) # pylint: disable=unnecessary-lambda - _max = tvm.comm_reducer(lambda x, y: tvm.max(x, y), min_value, name='max') - rh = tvm.reduce_axis((0, hend - hstart), 'rh') - rw = tvm.reduce_axis((0, wend - wstart), 'rw') + _max = te.comm_reducer(lambda x, y: tvm.te.max(x, y), min_value, name='max') + rh = te.reduce_axis((0, hend - hstart), 'rh') + rw = te.reduce_axis((0, wend - wstart), 'rw') return _max(data[batch_index, c, hstart+rh, wstart+rw], axis=[rh, rw]) - return tvm.compute((num_roi, channel, pooled_size_h, pooled_size_w), _pool, tag="pool,roi_pool") + return te.compute((num_roi, channel, pooled_size_h, pooled_size_w), _pool, tag="pool,roi_pool") diff --git a/topi/python/topi/vision/reorg.py b/topi/python/topi/vision/reorg.py index 3ba5e8495a223..ec790fafbdddf 100644 --- a/topi/python/topi/vision/reorg.py +++ b/topi/python/topi/vision/reorg.py @@ -27,7 +27,7 @@ def reorg(data, stride): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] stride : int @@ -35,7 +35,7 @@ def reorg(data, stride): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return cpp.vision.reorg(data, stride) diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index 4309af4303f1c..8f287b949f2cf 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -16,11 +16,10 @@ # under the License. # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable """SSD multibox operators""" -from __future__ import absolute_import as _abs import tvm from tvm import hybrid -from tvm.intrin import exp, sqrt +from tvm.tir import exp, sqrt import topi @@ -32,7 +31,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray 4-D tensor with shape [batch, channel, height, width]] sizes : tvm ConsExpr @@ -49,7 +48,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): Returns ------- - output : tvm.Tensor or numpy NDArray + output : tvm.te.Tensor or numpy NDArray 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ in_height = data.shape[2] @@ -80,7 +79,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): * sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0 h = sizes[0] / sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0 count = i * in_width * (num_sizes + num_ratios - 1) \ - + j * (num_sizes + num_ratios - 1) + k + + j * (num_sizes + num_ratios - 1) + k output[0, count, 0] = center_w - w output[0, count, 1] = center_h - h output[0, count, 2] = center_w + w @@ -94,7 +93,7 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, c_in, h_in, w_in]] sizes : tuple of float @@ -114,11 +113,11 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ - out = hybrid_multibox_prior(data, tvm.convert(sizes), tvm.convert(ratios), - tvm.convert(steps), tvm.convert(offsets)) + out = hybrid_multibox_prior(data, tvm.runtime.convert(sizes), tvm.runtime.convert(ratios), + tvm.runtime.convert(steps), tvm.runtime.convert(offsets)) if clip: out = topi.clip(out, 0, 1) return out @@ -166,19 +165,19 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, Parameters ---------- - cls_prob : tvm.Tensor or numpy NDArray + cls_prob : tvm.te.Tensor or numpy NDArray 3-D tensor of class probabilities. - loc_pred : tvm.Tensor or numpy NDArray + loc_pred : tvm.te.Tensor or numpy NDArray 2-D tensor of location regression predictions. - anchor : tvm.Tensor or numpy NDArray + anchor : tvm.te.Tensor or numpy NDArray 3-D tensor of prior anchor boxes. - clip : tvm.const + clip : tvm.tir.const Whether to clip out-of-boundary boxes. - threshold : tvm.const + threshold : tvm.tir.const Threshold to be a positive prediction. variances : tvm.nd.NDArray @@ -186,10 +185,10 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, Returns ------- - out_loc : tvm.Tensor or numpy NDArray + out_loc : tvm.te.Tensor or numpy NDArray 3-D tensor of transformed location. - valid_count : tvm.Tensor or numpy NDArray + valid_count : tvm.te.Tensor or numpy NDArray 1_d tensor of valid counts for boxes. """ batch_size = cls_prob.shape[0] @@ -238,13 +237,13 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -258,12 +257,12 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 Returns ------- - ret : tuple of tvm.Tensor + ret : tuple of tvm.te.Tensor """ return hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, - tvm.const(clip, "bool"), - tvm.const(threshold, "float32"), - tvm.convert(variances)) + tvm.tir.const(clip, "bool"), + tvm.tir.const(threshold, "float32"), + tvm.runtime.convert(variances)) def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nms_threshold=0.5, force_suppress=False, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=-1): @@ -271,13 +270,13 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -300,7 +299,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape (batch_size, num_anchors, 6) """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, diff --git a/topi/python/topi/x86/batch_matmul.py b/topi/python/topi/x86/batch_matmul.py index a7cb9e98f11fa..539a918f1f87e 100644 --- a/topi/python/topi/x86/batch_matmul.py +++ b/topi/python/topi/x86/batch_matmul.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,too-many-locals,unused-variable """x86 batch_matmul operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity from tvm.contrib import cblas @@ -34,13 +33,13 @@ def batch_matmul(cfg, x, y): ---------- cfg : ConfigSpace Autotvm tuning space config file - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len( @@ -54,10 +53,10 @@ def batch_matmul(cfg, x, y): if cfg.is_fallback: _default_batch_matmul_config(cfg, M, N, K) - k = tvm.reduce_axis((0, K), name='k') - C = tvm.compute( + k = te.reduce_axis((0, K), name='k') + C = te.compute( (B, M, N), - lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k), + lambda b, i, j: te.sum(x[b, i, k] * y[b, j, k], axis=k), tag='batch_matmul') return C @@ -79,7 +78,7 @@ def schedule_batch_matmul(cfg, outs): sch: Schedule The computation schedule for the op. """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if "batch_matmul" in op.tag: @@ -140,13 +139,13 @@ def batch_matmul_cblas(cfg, x, y): ---------- cfg : ConfigSpace Autotvm tuning space config file - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len( diff --git a/topi/python/topi/x86/binarize_pack.py b/topi/python/topi/x86/binarize_pack.py index bab91a940edc5..b4a01a5d2220f 100644 --- a/topi/python/topi/x86/binarize_pack.py +++ b/topi/python/topi/x86/binarize_pack.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name """Schedule for binarization and bit-packing.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te def schedule_binarize_pack(outs): @@ -34,8 +33,8 @@ def schedule_binarize_pack(outs): s: Schedule The computation schedule for binarize_pack. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Out): s[Out].parallel(Out.op.axis[0]) diff --git a/topi/python/topi/x86/binary_dense.py b/topi/python/topi/x86/binary_dense.py index ccf74e7bd230d..d90694e819cb3 100644 --- a/topi/python/topi/x86/binary_dense.py +++ b/topi/python/topi/x86/binary_dense.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for binary dense operator.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .. import tag @@ -35,8 +34,8 @@ def schedule_binary_dense(outs): s: Schedule The computation schedule for binary_dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(A, B, C): @@ -56,7 +55,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule binary_dense elif OP.tag == 'binary_dense': diff --git a/topi/python/topi/x86/bitserial_conv2d.py b/topi/python/topi/x86/bitserial_conv2d.py index 2ec5653756540..37fe352827b01 100644 --- a/topi/python/topi/x86/bitserial_conv2d.py +++ b/topi/python/topi/x86/bitserial_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name,unused-variable,invalid-name """Bitserial conv2d schedule on x86""" import tvm +from tvm import te from tvm import autotvm from .. import tag from ..util import get_const_int, get_const_tuple @@ -94,40 +95,40 @@ def bitserial_conv2d_nchw(cfg, data, kernel, stride, padding, in_bits, weight_bi else: data_pad = data_q - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw, b: \ - data_pad[b][n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw, b: \ + data_pad[b][n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec') if len(kernel.shape) == 4: - kernel_vec = tvm.compute(kvshape, lambda co, ci, dh, dw, b, vc: \ - kernel_q[b][co*VC+vc][ci][dh][dw], name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, ci, dh, dw, b, vc: \ + kernel_q[b][co*VC+vc][ci][dh][dw], name='kernel_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') - b1 = tvm.reduce_axis((0, IB), name='ib') - b2 = tvm.reduce_axis((0, KB), name='kb') + ci = te.reduce_axis((0, CI), name='ci') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') + b1 = te.reduce_axis((0, IB), name='ib') + b2 = te.reduce_axis((0, KB), name='kb') def _conv(n, co, h, w, vh, vw, vc): b1b2 = (b1+b2).astype(out_dtype) if unipolar: - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) & kernel_vec[co, ci, dh, dw, b2, vc].astype(out_dtype)) - - tvm.popcount( - data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) - & ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2, - axis=[ci, dh, dw, b1, b2]) + tvm.tir.popcount( + data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) + & ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2, + axis=[ci, dh, dw, b1, b2]) - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1] & kernel_vec[co, ci, dh, dw, b2, vc])).astype(out_dtype) << b1b2, - axis=[ci, dh, dw, b1, b2]) + axis=[ci, dh, dw, b1, b2]) - conv = tvm.compute(ovshape, _conv, name='conv_out') - idxd = tvm.indexdiv - idxm = tvm.indexmod + conv = te.compute(ovshape, _conv, name='conv_out') + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod - return tvm.compute( + return te.compute( oshape, lambda n, co, h, w: conv[n, idxd(co, VC), idxd(h, VH), idxd(w, VW), @@ -202,38 +203,38 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, in_bits, weight_bi else: data_pad = data_q - data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, ci, b: \ - data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][ci][b], name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, vh, vw, ci, b: \ + data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][ci][b], name='data_vec') - kernel_vec = tvm.compute(kvshape, lambda co, dh, dw, ci, vc, b: \ - kernel_q[dh][dw][ci][co*VC+vc][b], name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, dh, dw, ci, vc, b: \ + kernel_q[dh][dw][ci][co*VC+vc][b], name='kernel_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') - b1 = tvm.reduce_axis((0, IB), name='ib') - b2 = tvm.reduce_axis((0, KB), name='kb') + ci = te.reduce_axis((0, CI), name='ci') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') + b1 = te.reduce_axis((0, IB), name='ib') + b2 = te.reduce_axis((0, KB), name='kb') def _conv(n, h, w, co, vh, vw, vc): b1b2 = (b1+b2).astype(out_dtype) if unipolar: - return tvm.sum( - ((tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] & - kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) - - tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1]& - ~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2), + return te.sum( + ((tvm.tir.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] & + kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) - + tvm.tir.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1]& + ~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2), axis=[dh, dw, ci, b1, b2]) - return tvm.sum(tvm.popcount( + return te.sum(tvm.tir.popcount( data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] & kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) << b1b2, - axis=[dh, dw, ci, b1, b2]) + axis=[dh, dw, ci, b1, b2]) - conv = tvm.compute(ovshape, _conv, name='conv') + conv = te.compute(ovshape, _conv, name='conv') - idxd = tvm.indexdiv - idxm = tvm.indexmod - return tvm.compute( + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod + return te.compute( oshape, lambda n, h, w, co: conv[n, idxd(h, VH), idxd(w, VW), idxd(co, VC), @@ -250,7 +251,7 @@ def schedule_bitserial_conv2d_nhwc(cfg, outs): def _schedule_bitserial_conv2d(cfg, outs): """CPU schedule for bitserial convolutions NCHW and NHWC""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -262,7 +263,7 @@ def traverse(op): s[op].compute_inline() for tensor in op.input_tensors: if tensor.op.input_tensors and (tensor.op not in scheduled_ops): - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif 'spatial_bitserial_conv_nchw' in op.tag or 'spatial_bitserial_conv_nhwc' in op.tag: @@ -273,7 +274,7 @@ def traverse(op): data_q = data_vec.op.input_tensors[0] data = data_q.op.input_tensors[0] data_pad = None - if isinstance(data_q.op, tvm.tensor.ComputeOp) and "pad" in data_q.op.tag: + if isinstance(data_q.op, tvm.te.ComputeOp) and "pad" in data_q.op.tag: data_pad = data_q data_q = data data = data_q.op.input_tensors[0] @@ -320,7 +321,7 @@ def _schedule_bitserial_conv2d_nchw(cfg, s, data_q, data_pad, data_vec, VH = cfg["tile_oh"].size[-1] VW = cfg["tile_ow"].size[-1] - ##### Schedule Data padding, and bitpacking + ##### Schedule Data padding, and bitpacking if data_pad is not None: s[data_pad].compute_inline() diff --git a/topi/python/topi/x86/bitserial_dense.py b/topi/python/topi/x86/bitserial_dense.py index d464cae951b3c..cbc6ac83ffd9c 100644 --- a/topi/python/topi/x86/bitserial_dense.py +++ b/topi/python/topi/x86/bitserial_dense.py @@ -18,6 +18,7 @@ """Schedule for bitserial dense operator.""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from topi.util import get_const_int, get_const_tuple from .. import tag @@ -30,14 +31,14 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype='uint3 Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] or 3-D with shape [out_dim, weight_bits, in_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype) @@ -68,26 +69,26 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype='uint3 wvshape = (X//VX, WB, VX, K) oshape = (Y, X) - k = tvm.reduce_axis((0, K), name='k') - db = tvm.reduce_axis((0, DB), name='db') - wb = tvm.reduce_axis((0, WB), name='wb') + k = te.reduce_axis((0, K), name='k') + db = te.reduce_axis((0, DB), name='db') + wb = te.reduce_axis((0, WB), name='wb') # Tile data and weights - weight_vec = tvm.compute(wvshape, lambda xo, wb, vx, k: - weight_packed[xo*VX+vx][wb][k], name='weight_vec') + weight_vec = te.compute(wvshape, lambda xo, wb, vx, k: + weight_packed[xo*VX+vx][wb][k], name='weight_vec') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - matmul_unipolar = tvm.compute(oshape, lambda i, j: tvm.sum( - (tvm.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) - - tvm.popcount(~weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) - ).astype(out_dtype) + matmul_unipolar = te.compute(oshape, lambda i, j: te.sum( + (tvm.tir.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) - + tvm.tir.popcount(~weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) + ).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense_unipolar') - matmul = tvm.compute(oshape, lambda i, j: tvm.sum( - tvm.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k] - ).astype(out_dtype) + matmul = te.compute(oshape, lambda i, j: te.sum( + tvm.tir.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k] + ).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense') # binary ops @@ -112,8 +113,8 @@ def schedule_bitserial_dense(cfg, outs): s: Schedule The computation schedule for bitserial_dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(cfg, s, data_vec, weight_vec, output): s[data_vec].parallel(s[data_vec].op.axis[0]) @@ -149,7 +150,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif op.tag == 'bitserial_dense' or 'bitserial_dense_unipolar': diff --git a/topi/python/topi/x86/conv1d.py b/topi/python/topi/x86/conv1d.py index 70c2a6881dbf5..1e30c9f26b7e2 100644 --- a/topi/python/topi/x86/conv1d.py +++ b/topi/python/topi/x86/conv1d.py @@ -16,14 +16,13 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,invalid-name """Conv1D schedule on for Intel CPU""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .. import tag def schedule_conv1d_ncw(outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op scheduled_ops = [] @@ -40,18 +39,18 @@ def traverse(op): s[op].parallel(fused) s[op].vectorize(w) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv1d_ncw' in op.tag: conv = op.output(0) kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, te.tensor.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] @@ -77,7 +76,7 @@ def traverse(op): def schedule_conv1d_nwc(outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op scheduled_ops = [] @@ -94,18 +93,18 @@ def traverse(op): s[op].parallel(fused) s[op].vectorize(c) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv1d_nwc' in op.tag: conv = op.output(0) kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, te.tensor.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] diff --git a/topi/python/topi/x86/conv2d.py b/topi/python/topi/x86/conv2d.py index 2403b01b7453c..81d848a4762f0 100644 --- a/topi/python/topi/x86/conv2d.py +++ b/topi/python/topi/x86/conv2d.py @@ -21,6 +21,7 @@ import logging import tvm +from tvm import te from tvm import autotvm from .. import nn from ..nn.conv2d import conv2d_infer_layout, _get_workload as _get_conv2d_workload @@ -39,11 +40,11 @@ def _get_default_config(cfg, data, kernel, strides, padding, out_dtype, is_depth """ static_data_shape = [] for dim in get_const_tuple(data.shape): - if isinstance(dim, tvm.expr.Var): + if isinstance(dim, tvm.tir.Var): static_data_shape.append(1) else: static_data_shape.append(dim) - data = tvm.placeholder(static_data_shape, dtype=data.dtype) + data = te.placeholder(static_data_shape, dtype=data.dtype) if is_depthwise: wkl = _get_depthwise_conv2d_workload(data, kernel, strides, padding, out_dtype) from .depthwise_conv2d import _fallback_schedule @@ -61,7 +62,7 @@ def _conv2d_infer_layout(workload, cfg): _, data, kernel, strides, padding, dilation, layout, _, dtype = workload batch_size, in_channel, in_height, in_width = data[1] out_channel, _, k_height, k_width = kernel[1] - idxdiv = tvm.indexdiv + idxdiv = tvm.tir.indexdiv pt, pl, pb, pr = get_pad_tuple(padding, (k_height, k_width)) out_height = idxdiv(in_height + pt + pb - k_height, strides[0]) + 1 @@ -75,20 +76,20 @@ def _conv2d_infer_layout(workload, cfg): def schedule_conv2d_nhwc(outs): """Create schedule for conv2d_nhwc""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op def _callback(op): if 'conv2d_nhwc' in op.tag: conv = op.output(0) kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] @@ -132,11 +133,11 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn), lambda occ, icc, k_h, k_w, icb, ocb: kernel[occ * oc_bn + ocb, icc * ic_bn + icb, k_h, k_w], @@ -176,9 +177,9 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_layo # If no config was set, we can fallback to default config. if cfg.is_fallback: - _get_default_config(cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width), - dtype=kernel.dtype), + _get_default_config(cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kernel_height, kernel_width), + dtype=kernel.dtype), strides, padding, out_dtype) # Pack data if raw 4-D data is provided. @@ -198,8 +199,8 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_layo @autotvm.register_topi_schedule("conv2d_NCHWc.x86") def schedule_conv2d_NCHWc(cfg, outs): """Create schedule for tensors""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'conv2d_NCHWc' in op.tag: diff --git a/topi/python/topi/x86/conv2d_alter_op.py b/topi/python/topi/x86/conv2d_alter_op.py index 377d81539b7c3..5ee691b073626 100644 --- a/topi/python/topi/x86/conv2d_alter_op.py +++ b/topi/python/topi/x86/conv2d_alter_op.py @@ -20,6 +20,7 @@ import logging import tvm +from tvm import te from tvm import relay from tvm import autotvm from .conv2d import _get_default_config @@ -79,10 +80,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, - kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn, + kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"], new_attrs["out_layout"], out_dtype], topi_tmpl) @@ -118,15 +119,15 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config. - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel // oc_bn, - in_channel // ic_bn, - kh, - kw, - ic_bn // n_elems, - oc_bn, - n_elems), dtype=kernel_dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel // oc_bn, + in_channel // ic_bn, + kh, + kw, + ic_bn // n_elems, + oc_bn, + n_elems), dtype=kernel_dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'], new_attrs['out_layout'], out_dtype], topi_tmpl) @@ -152,9 +153,9 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config. - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'], new_attrs['out_layout'], out_dtype], topi_tmpl) diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index 083fff48d774a..432f8b2875131 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -18,6 +18,7 @@ """1x1 Conv2D schedule on for Intel CPU""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -65,7 +66,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): _, _, _, _, ic_bn = get_const_tuple(data_vec.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, tvm.te.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -78,7 +79,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for @@ -190,23 +191,23 @@ def _declaration_conv_nhwc_pack(cfg, Input, Filter, stride, padding, dilation, o # packing the Filter to let memory access be consecutive for AVX512 intrinsic # Done in pre-compute stage - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod packw_shape = (kernel_h, kernel_w, idxd(num_filter, 16), 16 * idxd(channel, 4), 4) - PackW = tvm.compute(packw_shape, - lambda a, b, c, d, e: - Filter[a, b, - c*16 + idxm(d, 16), - idxd(d, 16) * 4 + e], - name="packed_filter") - - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - Output = tvm.compute( + PackW = te.compute(packw_shape, + lambda a, b, c, d, e: + Filter[a, b, + c*16 + idxm(d, 16), + idxd(d, 16) * 4 + e], + name="packed_filter") + + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + Output = te.compute( (batch, out_height, out_width, out_channel), - lambda nn, yy, xx, ff: tvm.sum( + lambda nn, yy, xx, ff: te.sum( PaddedInput[nn, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc].astype(out_dtype) * PackW[ry, rx, idxd(ff, 16), @@ -238,7 +239,7 @@ def _schedule_conv_nhwc_pack_int8(s, cfg, data, conv_out, last): ic_factor, oc_factor = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] # schedule data A = data - if isinstance(s[A].op, tvm.tensor.ComputeOp): + if isinstance(s[A].op, tvm.te.ComputeOp): batch, ih, iw, ic = s[A].op.axis d_ic_chunk, d_ic_block = s[A].split(ic, factor=4) s[A].vectorize(d_ic_block) diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index 085d0aeb67c3b..ebed14cb924a4 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -16,7 +16,6 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,invalid-name """Conv2D schedule on for Intel CPU""" -from __future__ import absolute_import as _abs import tvm from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -89,7 +88,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): _, _, _, _, ic_bn = get_const_tuple(data_vec.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, tvm.te.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -102,7 +101,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for diff --git a/topi/python/topi/x86/conv2d_int8.py b/topi/python/topi/x86/conv2d_int8.py index 64fe92bbaaa4c..4b111435f704c 100644 --- a/topi/python/topi/x86/conv2d_int8.py +++ b/topi/python/topi/x86/conv2d_int8.py @@ -19,6 +19,7 @@ """Conv2D int8 schedule on x86""" import tvm +from tvm import te from tvm import autotvm from ..nn.conv2d import _get_workload as _get_conv2d_workload from .. import tag @@ -96,11 +97,11 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, ic_chunk, kh, kw, ic_bn//n_elems, oc_bn, n_elems), lambda occ, icc, k_h, k_w, icbc, ocb, icbb: kernel[occ * oc_bn + ocb, @@ -145,9 +146,9 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, padding, # If no config was set, we can fallback to default config. if cfg.is_fallback: _get_default_config_int8( - cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width), - dtype=kernel.dtype), + cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kernel_height, kernel_width), + dtype=kernel.dtype), strides, padding, out_dtype) # Pack data if raw 4-D data is provided. @@ -168,7 +169,7 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, padding, @autotvm.register_topi_schedule("conv2d_NCHWc_int8.x86") def schedule_conv2d_NCHWc_int8(cfg, outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): """Traverse operators from computation graph""" @@ -192,7 +193,7 @@ def _callback(op): @autotvm.register_topi_schedule("conv2d_nhwc_pack_int8.x86") def schedule_conv2d_nhwc_pack_int8(cfg, outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op scheduled_ops = [] @@ -209,7 +210,7 @@ def traverse(op): s[op].parallel(fused) s[op].vectorize(c) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_nhwc_pack_int8' in op.tag: @@ -217,9 +218,9 @@ def traverse(op): kernel = conv_out.op.input_tensors[1] data_vec = conv_out.op.input_tensors[0] data = data_vec.op.input_tensors[0] \ - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ + if isinstance(data_vec.op, te.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ else data_vec - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] diff --git a/topi/python/topi/x86/conv2d_transpose.py b/topi/python/topi/x86/conv2d_transpose.py index 71f47d6c037b5..f90edb5e29110 100644 --- a/topi/python/topi/x86/conv2d_transpose.py +++ b/topi/python/topi/x86/conv2d_transpose.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,no-member """Conv2D Transpose schedule on x86""" -import tvm +from tvm import te from ..util import traverse_inline from .. import nn from .conv2d import conv2d_nchw, schedule_conv2d_nchw @@ -30,7 +30,7 @@ def conv2d_transpose_nchw(data, kernel, strides, padding, out_dtype): def schedule_conv2d_transpose_nchw(outs): """Create schedule for tensors""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs s = schedule_conv2d_nchw(outs) def _callback(op): if 'unpack_nchwc' in op.tag: diff --git a/topi/python/topi/x86/conv3d.py b/topi/python/topi/x86/conv3d.py index 1e156509c0a8e..989ec4cf4ffc2 100644 --- a/topi/python/topi/x86/conv3d.py +++ b/topi/python/topi/x86/conv3d.py @@ -19,6 +19,7 @@ """Conv3D operators""" from collections import namedtuple import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity from ..util import traverse_inline @@ -39,12 +40,12 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype): Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 5-D input data with shapes: [batch, in_channel, in_depth, in_height, in_width] for NCDHW layout [batch, in_depth, in_height, in_width, in_channel] for NDHWC layout - filter : tvm.Tensor + filter : tvm.te.Tensor 5-D filter with shape [kernel_depth, kernel_height, kernel_width, in_channels, out_channels] strides : int or a list/tuple of three ints @@ -58,7 +59,7 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_depth, out_height, out_width, out_channel] for NDHWC layout 5-D with shape [batch, out_channel, out_depth, out_height, out_width] for NCDHW layout """ @@ -86,7 +87,7 @@ def schedule_conv3d_ndhwc(cfg, outs): s: Schedule The computation schedule for conv3d. """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _traverse(op): if 'conv3d_ndhwc' in op.tag: @@ -94,12 +95,12 @@ def _traverse(op): conv_out = op.input_tensors[0] kernel_vec = conv_out.op.input_tensors[1] kernel = kernel_vec.op.input_tensors[0] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data_vec = conv_out.op.input_tensors[0] data = data_vec.op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] @@ -154,47 +155,47 @@ def _conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype): # fetch schedule ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] shape = (batch_size, in_channel // ic_bn, pad_depth, pad_height, ic_bn, pad_width) - data_vec = tvm.compute(shape, - lambda n, C, d, h, c, w: data_pad[n, d, h, w, C * ic_bn + c], - name='data_vec') + data_vec = te.compute(shape, + lambda n, C, d, h, c, w: data_pad[n, d, h, w, C * ic_bn + c], + name='data_vec') # pack kernel shape = (num_filter//oc_bn, in_channel//ic_bn, kernel_depth, kernel_height, kernel_width, ic_bn, oc_bn) - kernel_vec = tvm.compute(shape, - lambda CO, CI, d, h, w, ci, co: - kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co], - name='kernel_vec') + kernel_vec = te.compute(shape, + lambda CO, CI, d, h, w, ci, co: + kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co], + name='kernel_vec') # convolution oshape = (batch_size, num_filter//oc_bn, out_depth, out_height, out_width, oc_bn) unpack_shape = (batch_size, out_depth, out_height, out_width, num_filter) - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - kd = tvm.reduce_axis((0, kernel_depth), name='kd') - idxmod = tvm.indexmod - idxdiv = tvm.indexdiv - - conv = tvm.compute(oshape, lambda n, oc_chunk, od, oh, ow, oc_block: - tvm.sum(data_vec[n, - idxdiv(ic, ic_bn), - od*DSTR+kd*dilation_d, - oh*HSTR+kh*dilation_h, + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + kd = te.reduce_axis((0, kernel_depth), name='kd') + idxmod = tvm.tir.indexmod + idxdiv = tvm.tir.indexdiv + + conv = te.compute(oshape, lambda n, oc_chunk, od, oh, ow, oc_block: + te.sum(data_vec[n, + idxdiv(ic, ic_bn), + od*DSTR+kd*dilation_d, + oh*HSTR+kh*dilation_h, + idxmod(ic, ic_bn), + ow*WSTR+kw*dilation_w].astype(out_dtype) * + kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kd, kh, kw, idxmod(ic, ic_bn), - ow*WSTR+kw*dilation_w].astype(out_dtype) * - kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kd, kh, kw, - idxmod(ic, ic_bn), - oc_block].astype(out_dtype), - axis=[kd, kh, kw, ic]), name='conv') - conv_unpacked = tvm.compute(unpack_shape, - lambda n, d, h, w, c: conv[n, idxdiv(c, oc_bn), - d, h, w, - idxmod(c, oc_bn)] - .astype(out_dtype), - name='output_unpack', - tag='conv3d_ndhwc') + oc_block].astype(out_dtype), + axis=[kd, kh, kw, ic]), name='conv') + conv_unpacked = te.compute(unpack_shape, + lambda n, d, h, w, c: conv[n, idxdiv(c, oc_bn), + d, h, w, + idxmod(c, oc_bn)] + .astype(out_dtype), + name='output_unpack', + tag='conv3d_ndhwc') return conv_unpacked @@ -231,11 +232,11 @@ def _get_default_config(cfg, data, kernel, strides, padding, out_dtype, layout): static_data_shape = [] for dim in get_const_tuple(data.shape): - if isinstance(dim, tvm.expr.Var): + if isinstance(dim, tvm.tir.Var): static_data_shape.append(1) else: static_data_shape.append(dim) - data = tvm.placeholder(static_data_shape, dtype=data.dtype) + data = te.placeholder(static_data_shape, dtype=data.dtype) wkl = _get_conv3d_workload(data, kernel, strides, padding, out_dtype, layout) _fallback_schedule(cfg, wkl) diff --git a/topi/python/topi/x86/dense.py b/topi/python/topi/x86/dense.py index ea89cf4779b09..3e99d0612f968 100644 --- a/topi/python/topi/x86/dense.py +++ b/topi/python/topi/x86/dense.py @@ -18,6 +18,7 @@ """x86 dense operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity from tvm.contrib import cblas @@ -79,11 +80,11 @@ def _schedule_dense_nopack_template(cfg, s, C): def _default_dense_pack_config(cfg, M, N, K): # Generate default schedule for dynamic shape. - if isinstance(M, tvm.expr.Var): + if isinstance(M, tvm.tir.Var): M = 16 - if isinstance(N, tvm.expr.Var): + if isinstance(N, tvm.tir.Var): N = 16 - if isinstance(K, tvm.expr.Var): + if isinstance(K, tvm.tir.Var): K = 16 vec_width = get_fp32_len() @@ -116,11 +117,11 @@ def _default_dense_pack_config(cfg, M, N, K): def _default_dense_nopack_config(cfg, M, N, K): # Generate default schedule for dynamic shape. - if isinstance(M, tvm.expr.Var): + if isinstance(M, tvm.tir.Var): M = 16 - if isinstance(N, tvm.expr.Var): + if isinstance(N, tvm.tir.Var): N = 16 - if isinstance(K, tvm.expr.Var): + if isinstance(K, tvm.tir.Var): K = 16 vec_width = get_fp32_len() @@ -141,33 +142,33 @@ def dense_nopack(cfg, data, weight, bias=None, out_dtype=None): M, K = get_const_tuple(data.shape) N, _ = get_const_tuple(weight.shape) # create tuning space - cfg.define_split("tile_y", 32 if isinstance(M, tvm.expr.Var) else M, num_outputs=2) - cfg.define_split("tile_x", 32 if isinstance(N, tvm.expr.Var) else N, num_outputs=2) - cfg.define_split("tile_k", 32 if isinstance(K, tvm.expr.Var) else K, num_outputs=2) + cfg.define_split("tile_y", 32 if isinstance(M, tvm.tir.Var) else M, num_outputs=2) + cfg.define_split("tile_x", 32 if isinstance(N, tvm.tir.Var) else N, num_outputs=2) + cfg.define_split("tile_k", 32 if isinstance(K, tvm.tir.Var) else K, num_outputs=2) if cfg.is_fallback: _default_dense_nopack_config(cfg, M, N, K) vec = cfg["tile_k"].size[-1] - k = tvm.reduce_axis((0, K // vec), "k") - CC = tvm.compute((M, N, vec), - lambda z, y, x: tvm.sum( - data[z, k * vec + x].astype(out_dtype) * - weight[y, k * vec + x].astype(out_dtype), axis=k)) - - kk = tvm.reduce_axis((0, vec), "kk") - C = tvm.compute((M, N), - lambda y, x: tvm.sum(CC[y, x, kk], axis=kk), - tag="dense_nopack") + k = te.reduce_axis((0, K // vec), "k") + CC = te.compute((M, N, vec), + lambda z, y, x: te.sum( + data[z, k * vec + x].astype(out_dtype) * + weight[y, k * vec + x].astype(out_dtype), axis=k)) + + kk = te.reduce_axis((0, vec), "kk") + C = te.compute((M, N), + lambda y, x: te.sum(CC[y, x, kk], axis=kk), + tag="dense_nopack") if bias is not None: - C = tvm.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + C = te.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) return C @autotvm.register_topi_schedule("dense_nopack.x86") def schedule_dense_nopack(cfg, outs): """Create the schedule for dense_nopack""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'dense_nopack' in op.tag: @@ -191,27 +192,27 @@ def dense_pack(cfg, data, weight, bias=None, out_dtype=None): packw_bn = cfg["tile_x"].size[-1] packw_shape = (N // packw_bn, K, packw_bn) - packw = tvm.compute(packw_shape, - lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight") - - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - k = tvm.reduce_axis((0, K), name="k") - C = tvm.compute((M, N), - lambda y, x: tvm.sum( - data[y, k].astype(out_dtype) * - packw[idxdiv(x, packw_bn), k, idxmod(x, packw_bn)].astype(out_dtype), - axis=k), - tag="dense_pack") + packw = te.compute(packw_shape, + lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight") + + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + k = te.reduce_axis((0, K), name="k") + C = te.compute((M, N), + lambda y, x: te.sum( + data[y, k].astype(out_dtype) * + packw[idxdiv(x, packw_bn), k, idxmod(x, packw_bn)].astype(out_dtype), + axis=k), + tag="dense_pack") if bias is not None: - C = tvm.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + C = te.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) return C @autotvm.register_topi_schedule("dense_pack.x86") def schedule_dense_pack(cfg, outs): """Create the schedule for dense_pack""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if "dense_pack" in op.tag: @@ -227,8 +228,8 @@ def dense_cblas(cfg, data, weight, bias=None, out_dtype=None): cfg.add_flop(M * K * N * 2) C = cblas.matmul(data, weight, False, True) if bias is not None: - C = tvm.compute(C.shape, lambda i, j: C[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + C = te.compute(C.shape, lambda i, j: C[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) return C @autotvm.register_topi_schedule("dense_cblas.x86") diff --git a/topi/python/topi/x86/depthwise_conv2d.py b/topi/python/topi/x86/depthwise_conv2d.py index 70b30fea8c514..fda964eb66398 100644 --- a/topi/python/topi/x86/depthwise_conv2d.py +++ b/topi/python/topi/x86/depthwise_conv2d.py @@ -18,6 +18,7 @@ # pylint: disable=no-value-for-parameter """Depthwise Conv2D schedule on x86""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity from ..nn.pad import pad @@ -87,11 +88,11 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, 1, kh, kw, 1, oc_bn), lambda occ, icc, k_h, k_w, icb, ocb: kernel[(occ * oc_bn + ocb) // cm, @@ -135,9 +136,9 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, # get workload and related schedule config wkl = _get_workload( - tvm.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype), - tvm.placeholder((out_channel, channel_multiplier, filter_height, filter_width), - dtype=kernel.dtype), + te.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype), + te.placeholder((out_channel, channel_multiplier, filter_height, filter_width), + dtype=kernel.dtype), strides, padding, out_dtype) if cfg.is_fallback: _fallback_schedule(cfg, wkl) @@ -160,14 +161,14 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, data_pad = data # depthconv stage - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - kh = tvm.reduce_axis((0, filter_height), name='kh') - kw = tvm.reduce_axis((0, filter_width), name='kw') - Output = tvm.compute( + kh = te.reduce_axis((0, filter_height), name='kh') + kw = te.reduce_axis((0, filter_width), name='kw') + Output = te.compute( (batch, out_channel_chunk, out_height, out_width, out_channel_block), - lambda b, oco, oh, ow, oci: tvm.sum( + lambda b, oco, oh, ow, oci: te.sum( (data_pad[ b, idxdiv(idxdiv(oco * out_channel_block + oci, channel_multiplier), in_channel_block), @@ -182,8 +183,8 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, @autotvm.register_topi_schedule("depthwise_conv2d_NCHWc.x86") def schedule_depthwise_conv2d_NCHWc(cfg, outs): """CPU schedule for depthwise conv2d in NCHW[x]c layout""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): """Traverse operators from computation graph""" @@ -199,7 +200,7 @@ def _callback(op): def _schedule_depthwise_conv2d_NCHWc_impl(s, cfg, data_vec, kernel_vec, conv_out, output): tile_ow, oc_bn = cfg["tile_ow"].size[-1], cfg["tile_oc"].size[-1] # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, tvm.te.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) diff --git a/topi/python/topi/x86/injective.py b/topi/python/topi/x86/injective.py index 375827bb271c5..7c37ac7bc9b58 100644 --- a/topi/python/topi/x86/injective.py +++ b/topi/python/topi/x86/injective.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name """x86 declaration and schedules.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from ..util import is_empty_shape def schedule_injective_from_existing(sch, out): @@ -65,10 +64,10 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs x = outs[0] - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + s = te.create_schedule([x.op for x in outs]) + te.schedule.AutoInlineInjective(s) if not is_empty_shape(x.shape): schedule_injective_from_existing(s, x) @@ -104,10 +103,10 @@ def vectorize(sch, tensor, vectorize_limit): _, inner_i = sch[tensor].split(inner_axis, split_factor) sch[tensor].vectorize(inner_i) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs x = outs[0] - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + s = te.create_schedule([x.op for x in outs]) + te.schedule.AutoInlineInjective(s) if len(s[x].op.axis) >= 5: fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2]) vectorize(s, x, 64) diff --git a/topi/python/topi/x86/nn.py b/topi/python/topi/x86/nn.py index 3d57b6bbf203e..8f884b8b1a2e6 100644 --- a/topi/python/topi/x86/nn.py +++ b/topi/python/topi/x86/nn.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,too-many-locals,unused-variable """x86 nn operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te def schedule_softmax(outs): """Schedule for softmax @@ -33,9 +32,9 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs softmax = outs[0] - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) op_tag = softmax.op.tag if op_tag == 'softmax_output': diff --git a/topi/python/topi/x86/pooling.py b/topi/python/topi/x86/pooling.py index a8251dd13ae4a..f7664d906799c 100644 --- a/topi/python/topi/x86/pooling.py +++ b/topi/python/topi/x86/pooling.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable """Schedule for pooling operators""" -import tvm +from tvm import te from .. import tag def _parallel_sch(sch, oshape, do_vectorize=False): @@ -75,12 +75,12 @@ def schedule_pool(outs, layout): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(PaddedInput, Pool): - if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): + if isinstance(PaddedInput.op, te.tensor.ComputeOp): s[PaddedInput].compute_inline() do_vectorize = layout[-1] not in "HWhw" _parallel_sch(s[Pool], outs[0].shape, do_vectorize) @@ -92,7 +92,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -129,8 +129,8 @@ def schedule_adaptive_pool(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(OP): @@ -140,7 +140,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('adaptive_pool'): diff --git a/topi/python/topi/x86/reduction.py b/topi/python/topi/x86/reduction.py index b9dd4d4f1b3ce..0dfc3f23c2f2d 100644 --- a/topi/python/topi/x86/reduction.py +++ b/topi/python/topi/x86/reduction.py @@ -16,8 +16,8 @@ # under the License. # pylint: disable=invalid-name """x86 declaration and schedules.""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from .injective import schedule_injective_from_existing from .. import tag from ..util import get_const_tuple @@ -72,13 +72,13 @@ def schedule_reduce(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - sch = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + sch = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse_before_reduce(operator): """Internal traverse function""" - if isinstance(operator, tvm.tensor.PlaceholderOp): + if isinstance(operator, tvm.te.PlaceholderOp): return if tag.is_injective(operator.tag): sch[operator].compute_inline() @@ -108,7 +108,7 @@ def traverse_after_reduce(operator): for tensor in input_tensors: if tensor.op not in scheduled_ops: traverse_before_reduce(tensor.op) - elif isinstance(operator, tvm.tensor.PlaceholderOp): + elif isinstance(operator, tvm.te.PlaceholderOp): pass else: raise RuntimeError("Unsupported operator: %s (tag: %s)" % (operator, operator.tag)) diff --git a/topi/python/topi/x86/roi_align.py b/topi/python/topi/x86/roi_align.py index 203c3dd1802bd..205d70947ab27 100644 --- a/topi/python/topi/x86/roi_align.py +++ b/topi/python/topi/x86/roi_align.py @@ -30,32 +30,32 @@ def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, samp Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor or numpy NDArray + rois : tvm.te.Tensor or numpy NDArray 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] - w_pc : tvm.Tensor or numpy NDArray + w_pc : tvm.te.Tensor or numpy NDArray 3-D weight pre-calculation buffer - pos_pc : tvm.Tensor or numpy NDArray + pos_pc : tvm.te.Tensor or numpy NDArray 3-D position pre-calculation buffer pooled_size : tvm ConsExpr [out_height, out_width] - spatial_scale : tvm.const + spatial_scale : tvm.tir.const Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal of total stride in convolutional layers, which should be in range (0.0, 1.0] - sample_ratio : tvm.const + sample_ratio : tvm.tir.const Sampling ratio of ROI align, using adaptive size by default. Returns ------- - output : tvm.Tensor or numpy NDArray + output : tvm.te.Tensor or numpy NDArray 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ channels = data.shape[1] @@ -161,21 +161,21 @@ def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, samp for iy in range(roi_bin_grid_h): for ix in range(roi_bin_grid_w): output_val += w_pc[n, pre_calc_index, 0] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 2], - pos_pc[n, pre_calc_index, 0]] \ - + w_pc[n, pre_calc_index, 1] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 2], - pos_pc[n, pre_calc_index, 1]] \ - + w_pc[n, pre_calc_index, 2] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 3], - pos_pc[n, pre_calc_index, 0]] \ - + w_pc[n, pre_calc_index, 3] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 3], - pos_pc[n, pre_calc_index, 1]] + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 2], + pos_pc[n, pre_calc_index, 0]] \ + + w_pc[n, pre_calc_index, 1] \ + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 2], + pos_pc[n, pre_calc_index, 1]] \ + + w_pc[n, pre_calc_index, 2] \ + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 3], + pos_pc[n, pre_calc_index, 0]] \ + + w_pc[n, pre_calc_index, 3] \ + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 3], + pos_pc[n, pre_calc_index, 1]] pre_calc_index += 1 output_val /= count @@ -189,10 +189,10 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor + rois : tvm.te.Tensor 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] @@ -208,7 +208,7 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ if not isinstance(pooled_size, (tuple, list)): @@ -226,8 +226,8 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): w_pc_buffer = full(max_pc_shape, data.dtype, 0) pos_pc_buffer = full(max_pc_shape, "int32", 0) - pooled_size = tvm.convert(pooled_size) - spatial_scale = tvm.const(spatial_scale, "float32") - sample_ratio = tvm.const(sample_ratio, "int32") + pooled_size = tvm.runtime.convert(pooled_size) + spatial_scale = tvm.tir.const(spatial_scale, "float32") + sample_ratio = tvm.tir.const(sample_ratio, "int32") return roi_align_nchw_ir(data, rois, w_pc_buffer, pos_pc_buffer, pooled_size, spatial_scale, sample_ratio) diff --git a/topi/python/topi/x86/sparse.py b/topi/python/topi/x86/sparse.py index 898d0e5ea2c6c..54a5af9ca9f0a 100644 --- a/topi/python/topi/x86/sparse.py +++ b/topi/python/topi/x86/sparse.py @@ -16,7 +16,7 @@ # under the License. """sparse_dense schedule on x86""" -import tvm +from tvm import te from ..util import traverse_inline, get_const_int from .util import get_fp32_len @@ -24,7 +24,7 @@ def schedule_sparse_dense(outs): """Create schedule for sparse dense""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): simd_width = get_fp32_len() diff --git a/topi/python/topi/x86/tensor_intrin.py b/topi/python/topi/x86/tensor_intrin.py index dc9e1456d2cde..f3e39c8198131 100644 --- a/topi/python/topi/x86/tensor_intrin.py +++ b/topi/python/topi/x86/tensor_intrin.py @@ -17,6 +17,7 @@ """Core kernel of dot product of 4 Int8 operations""" #pylint: disable=invalid-name import tvm +from tvm import te import tvm.target.codegen @@ -25,7 +26,7 @@ def dot_16x1x16_uint8_int8_int32(): mcpu = tvm.target.Target.current().mcpu assert mcpu in ("skylake-avx512", "cascadelake"), \ - "An old Intel machine that does not have fast Int8 support." + "An old Intel machine that does not have fast Int8 support." if mcpu == "skylake-avx512": return dot_16x1x16_uint8_int8_int32_skylake() # cascadelake @@ -63,43 +64,43 @@ def dot_16x1x16_uint8_int8_int32_skylake(): int32_lanes = 16 # 16 int32 lanes in AVX512 num_int8_elements = 4 # 4 int8 elements in int32 - data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data') - kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int32_lanes,), - lambda i: tvm.sum(data[k].astype('int32') * - kernel[i, k].astype('int32'), - axis=k), - name="C") - - a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", - offset_factor=1, - strides=[tvm.var('ldw'), 1]) + data = te.placeholder((num_int8_elements,), dtype='uint8', name='data') + kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int32_lanes,), + lambda i: te.sum(data[k].astype('int32') * + kernel[i, k].astype('int32'), + axis=k), + name="C") + + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", + offset_factor=1, + strides=[te.var('ldw'), 1]) def _intrin_func(ins, outs): def _instr(index): ib = tvm.ir_builder.create() if index == 1: - ib.emit(outs[0].vstore(0, tvm.const(0, 'int32x16'))) + ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16'))) return ib.get() a_int8 = ins[0].vload([0], "uint8x4") - re_int32 = tvm.call_pure_intrin('int32', 'reinterpret', a_int8) + re_int32 = tvm.tir.call_pure_intrin('int32', 'reinterpret', a_int8) vec_ai32 = re_int32.astype('int32x16') - vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) + vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) vec_b = ins[1].vload([0, 0], "int8x64") - vec_one = tvm.const(1, "int16x32") - pair_reduction = tvm.call_llvm_intrin('int16x32', - 'llvm.x86.avx512.pmaddubs.w.512', - tvm.const(0, 'uint32'), - vec_a, vec_b) - quad_reduction = tvm.call_llvm_intrin('int32x16', - 'llvm.x86.avx512.pmaddw.d.512', - tvm.const(0, 'uint32'), - pair_reduction, vec_one) + vec_one = tvm.tir.const(1, "int16x32") + pair_reduction = tvm.tir.call_llvm_intrin('int16x32', + 'llvm.x86.avx512.pmaddubs.w.512', + tvm.tir.const(0, 'uint32'), + vec_a, vec_b) + quad_reduction = tvm.tir.call_llvm_intrin('int32x16', + 'llvm.x86.avx512.pmaddw.d.512', + tvm.tir.const(0, 'uint32'), + pair_reduction, vec_one) if index == 0: ib.emit(outs[0].vstore(0, quad_reduction)) else: @@ -146,41 +147,41 @@ def dot_16x1x16_uint8_int8_int16(): int16_lanes = 4*32 # 4*32 int32 lanes in 4 AVX512 vector registers num_int8_elements = 2 # 2 int8 elements in int16 - data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data') - kernel = tvm.placeholder((int16_lanes, num_int8_elements), dtype='int8', name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int16_lanes, ), - lambda i: tvm.sum(data[k].astype('int16') * - kernel[i, k].astype('int16'), - axis=k), - name="C") - - a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", - offset_factor=1) - # strides=[tvm.var('ldw'), 1, 1]) + data = te.placeholder((num_int8_elements,), dtype='uint8', name='data') + kernel = te.placeholder((int16_lanes, num_int8_elements), dtype='int8', name='kernel') + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int16_lanes, ), + lambda i: te.sum(data[k].astype('int16') * + kernel[i, k].astype('int16'), + axis=k), + name="C") + + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", + offset_factor=1) + # strides=[te.var('ldw'), 1, 1]) def _intrin_func(ins, outs): def _instr(index): ib = tvm.ir_builder.create() if index == 1: for i in range(4): - ib.emit(outs[0].vstore([i*32], tvm.const(0, 'int16x32'))) + ib.emit(outs[0].vstore([i*32], tvm.tir.const(0, 'int16x32'))) return ib.get() a_int8 = ins[0].vload([0], "uint8x2") - re_int16 = tvm.call_pure_intrin('int16', 'reinterpret', a_int8) + re_int16 = tvm.tir.call_pure_intrin('int16', 'reinterpret', a_int8) vec_ai16 = re_int16.astype('int16x32') - vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai16) + vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai16) for i in range(4): vec_b = ins[1].vload([i*32, 0], "int8x64") - pair_reduction = tvm.call_llvm_intrin('int16x32', - 'llvm.x86.avx512.pmaddubs.w.512', - tvm.const(0, 'uint32'), - vec_a, vec_b) + pair_reduction = tvm.tir.call_llvm_intrin('int16x32', + 'llvm.x86.avx512.pmaddubs.w.512', + tvm.tir.const(0, 'uint32'), + vec_a, vec_b) if index == 0: ib.emit(outs[0].vstore([i*32], pair_reduction)) else: @@ -226,31 +227,31 @@ def dot_16x1x16_uint8_int8_int32_cascadelake(): int32_lanes = 16 # 16 int32 lanes in AVX512 num_int8_elements = 4 # 4 int8 elements in int32 - data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data') - kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int32_lanes,), - lambda i: tvm.sum(data[k].astype('int32') * - kernel[i, k].astype('int32'), - axis=k), - name="C") - - a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", - offset_factor=1, - strides=[tvm.var('ldw'), 1]) + data = te.placeholder((num_int8_elements,), dtype='uint8', name='data') + kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int32_lanes,), + lambda i: te.sum(data[k].astype('int32') * + kernel[i, k].astype('int32'), + axis=k), + name="C") + + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", + offset_factor=1, + strides=[te.var('ldw'), 1]) def _intrin_func(ins, outs): def _instr(index): ib = tvm.ir_builder.create() if index == 1: - ib.emit(outs[0].vstore(0, tvm.const(0, 'int32x16'))) + ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16'))) return ib.get() a_int8 = ins[0].vload([0], "uint8x4") - re_int32 = tvm.call_pure_intrin('int32', 'reinterpret', a_int8) + re_int32 = tvm.tir.call_pure_intrin('int32', 'reinterpret', a_int8) vec_ai32 = re_int32.astype('int32x16') vec_b = ins[1].vload([0, 0], "int8x64") @@ -258,24 +259,24 @@ def _instr(index): llvm_id = tvm.target.codegen.llvm_lookup_intrinsic_id(vnni_inst_name) if llvm_id != 0: # VNNI is available for current LLVM version - vec_bi32 = tvm.call_pure_intrin('int32x16', 'reinterpret', vec_b) - vec_zero = tvm.const(0, "int32x16") - quad_reduction = tvm.call_llvm_intrin('int32x16', - 'llvm.x86.avx512.vpdpbusd.512', - tvm.const(0, 'uint32'), - vec_zero, - vec_ai32, vec_bi32) + vec_bi32 = tvm.tir.call_pure_intrin('int32x16', 'reinterpret', vec_b) + vec_zero = tvm.tir.const(0, "int32x16") + quad_reduction = tvm.tir.call_llvm_intrin('int32x16', + 'llvm.x86.avx512.vpdpbusd.512', + tvm.tir.const(0, 'uint32'), + vec_zero, + vec_ai32, vec_bi32) else: # Fall back to the normal AVX512 - vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) - vec_one = tvm.const(1, "int16x32") - pair_reduction = tvm.call_llvm_intrin('int16x32', - 'llvm.x86.avx512.pmaddubs.w.512', - tvm.const(0, 'uint32'), - vec_a, vec_b) - quad_reduction = tvm.call_llvm_intrin('int32x16', - 'llvm.x86.avx512.pmaddw.d.512', - tvm.const(0, 'uint32'), - pair_reduction, vec_one) + vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) + vec_one = tvm.tir.const(1, "int16x32") + pair_reduction = tvm.tir.call_llvm_intrin('int16x32', + 'llvm.x86.avx512.pmaddubs.w.512', + tvm.tir.const(0, 'uint32'), + vec_a, vec_b) + quad_reduction = tvm.tir.call_llvm_intrin('int32x16', + 'llvm.x86.avx512.pmaddw.d.512', + tvm.tir.const(0, 'uint32'), + pair_reduction, vec_one) if index == 0: ib.emit(outs[0].vstore(0, quad_reduction)) diff --git a/topi/python/topi/x86/util.py b/topi/python/topi/x86/util.py index 04931f577b511..f2a35d2777331 100644 --- a/topi/python/topi/x86/util.py +++ b/topi/python/topi/x86/util.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. """Common x86 related utilities""" -from __future__ import absolute_import as _abs import tvm + def get_fp32_len(): mcpu = tvm.target.Target.current().mcpu fp32_vec_len = 8 diff --git a/topi/recipe/broadcast/test_broadcast_map.py b/topi/recipe/broadcast/test_broadcast_map.py index 4f8a4dece2c5e..2f2bb9e900fe4 100644 --- a/topi/recipe/broadcast/test_broadcast_map.py +++ b/topi/recipe/broadcast/test_broadcast_map.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te from tvm.contrib import nvcc import numpy as np @@ -52,7 +53,7 @@ def test_broadcast_to(in_shape, out_shape): TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\ + "o" + "_".join([str(ele) for ele in out_shape]) # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.broadcast_to(A, out_shape) s = topi.cuda.schedule_broadcast(B) fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to") @@ -72,8 +73,8 @@ def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"): TASK = "bcast_binary_" + typ + "_lhs" +\ "_".join([str(ele) for ele in lhs_shape]) +\ "rhs" + "_".join([str(ele) for ele in rhs_shape]) - A = tvm.placeholder(shape=lhs_shape, name="A") - B = tvm.placeholder(shape=rhs_shape, name="B") + A = te.placeholder(shape=lhs_shape, name="A") + B = te.placeholder(shape=rhs_shape, name="B") if typ == "add": C = topi.broadcast_add(A, B) elif typ == "sub": diff --git a/topi/recipe/conv/depthwise_conv2d_test.py b/topi/recipe/conv/depthwise_conv2d_test.py index 90c61037f9b30..5498645f59920 100644 --- a/topi/recipe/conv/depthwise_conv2d_test.py +++ b/topi/recipe/conv/depthwise_conv2d_test.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te import numpy as np from scipy import signal from tvm.contrib import nvcc @@ -63,11 +64,11 @@ def test_depthwise_conv2d_nchw(): padding = 'SAME' # or 'VALID' # Placeholder - Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') - Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') + Input = te.placeholder((batch, in_channel, in_height, in_width), name='Input') + Filter = te.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') Stride = [stride_h, stride_w] - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') # Declare DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, Stride, padding) ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift) @@ -152,11 +153,11 @@ def test_depthwise_conv2d_nhwc(): padding = 'SAME' # or 'VALID' # Placeholder - Input = tvm.placeholder((batch, in_height, in_width, in_channel), name='Input') - Filter = tvm.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') + Input = te.placeholder((batch, in_height, in_width, in_channel), name='Input') + Filter = te.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') Stride = [stride_h, stride_w] - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') # Declare DepthwiseConv2d = topi.nn.depthwise_conv2d_nhwc(Input, Filter, Stride, padding) ScaleShift = topi.nn.scale_shift_nhwc(DepthwiseConv2d, Scale, Shift) diff --git a/topi/recipe/conv/test_conv2d_hwcn_map.py b/topi/recipe/conv/test_conv2d_hwcn_map.py index 3f7decabfd0e9..47e1601f4487e 100644 --- a/topi/recipe/conv/test_conv2d_hwcn_map.py +++ b/topi/recipe/conv/test_conv2d_hwcn_map.py @@ -19,6 +19,7 @@ import numpy as np import scipy.signal import tvm +from tvm import te from tvm.contrib import nvcc import topi from topi.util import get_const_tuple @@ -55,8 +56,8 @@ def test_conv2d_hwcn_map(): stride = 2 padding = 'SAME' - A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') + A = te.placeholder((in_height, in_width, in_channel, batch), name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') B = topi.nn.conv2d_hwcn(A, W, stride, padding) C = topi.nn.relu(B) s1 = topi.cuda.schedule_conv2d_hwcn([B]) diff --git a/topi/recipe/conv/test_conv_int8_arm.py b/topi/recipe/conv/test_conv_int8_arm.py index ff0d37d9a66d7..336e2f2f405bf 100644 --- a/topi/recipe/conv/test_conv_int8_arm.py +++ b/topi/recipe/conv/test_conv_int8_arm.py @@ -20,6 +20,7 @@ import logging import numpy as np import tvm +from tvm import te import topi logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -92,8 +93,8 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f hstride, wstride, out_dtype) # Create TVM placeholders - data = tvm.placeholder(data_shape, name='data', dtype=data_dtype) - kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) + data = te.placeholder(data_shape, name='data', dtype=data_dtype) + kernel = te.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) # Create the numpy arrays to be used for executing conv models if data_dtype == 'float32': @@ -119,7 +120,7 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f padding=hpad, dilation=(1, 1), layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype) out = topi.nn.relu(conv) - sch = tvm.create_schedule(out.op) + sch = te.create_schedule(out.op) func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out') func(data_array, kernel_array, c_orig) LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True)) diff --git a/topi/recipe/conv/test_conv_int8_intel.py b/topi/recipe/conv/test_conv_int8_intel.py index f39f4cd7b830d..767262d81d83f 100644 --- a/topi/recipe/conv/test_conv_int8_intel.py +++ b/topi/recipe/conv/test_conv_int8_intel.py @@ -20,6 +20,7 @@ import logging import numpy as np import tvm +from tvm import te import topi logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -93,8 +94,8 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f hstride, wstride, out_dtype) # Create TVM placeholders - data = tvm.placeholder(data_shape, name='data', dtype=data_dtype) - kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) + data = te.placeholder(data_shape, name='data', dtype=data_dtype) + kernel = te.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) # Create the numpy arrays to be used for executing conv models if data_dtype == 'float32': @@ -115,7 +116,7 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f padding=hpad, dilation=(1, 1), layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype) out = topi.nn.relu(conv) - sch = tvm.create_schedule(out.op) + sch = te.create_schedule(out.op) func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out') func(data_array, kernel_array, c_orig) LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True)) diff --git a/topi/recipe/gemm/android_gemm_square.py b/topi/recipe/gemm/android_gemm_square.py index 46129cbc1f303..7692f9cf44972 100644 --- a/topi/recipe/gemm/android_gemm_square.py +++ b/topi/recipe/gemm/android_gemm_square.py @@ -16,6 +16,7 @@ # under the License. """Example code to do square matrix multiplication on Android Phone.""" import tvm +from tvm import te import os from tvm import rpc from tvm.contrib import util, ndk @@ -52,28 +53,28 @@ def test_gemm_gpu(N, times, bn, num_block, num_thread): assert(bn <= N) assert(num_thread * num_thread * 16 <= N) assert(num_block * num_block * 2 <= N) - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='Btmp') - k = tvm.reduce_axis((0, N), name='k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='Btmp') + k = te.reduce_axis((0, N), name='k') - packedB = tvm.compute((N, N / bn, bn), + packedB = te.compute((N, N / bn, bn), lambda x, y, z: B[x, y * bn + z], name = 'B') - C = tvm.compute( + C = te.compute( (N, N), - lambda ii, jj: tvm.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k), + lambda ii, jj: te.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) CC = s.cache_write(C, "local") - block_x = tvm.thread_axis("blockIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_x = te.thread_axis("threadIdx.x") + thread_y = te.thread_axis("threadIdx.y") - thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx") - thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy") + thread_xz = te.thread_axis((0, 2), "vthread", name="vx") + thread_yz = te.thread_axis((0, 2), "vthread", name="vy") pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread) pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread) diff --git a/topi/recipe/gemm/cuda_gemm_square.py b/topi/recipe/gemm/cuda_gemm_square.py index 899379e9e4888..6e482b044a3c7 100644 --- a/topi/recipe/gemm/cuda_gemm_square.py +++ b/topi/recipe/gemm/cuda_gemm_square.py @@ -16,6 +16,7 @@ # under the License. """Example code to do square matrix multiplication.""" import tvm +from tvm import te import os from tvm.contrib import nvcc from tvm.contrib import spirv @@ -46,19 +47,19 @@ def tvm_callback_cuda_postproc(code): def test_gemm(): # graph nn = 2048 - n = tvm.var('n') - n = tvm.convert(nn) + n = te.var('n') + n = tvm.runtime.convert(nn) m, l = n, n - A = tvm.placeholder((l, n), name='A') - B = tvm.placeholder((l, m), name='B') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute( + A = te.placeholder((l, n), name='A') + B = te.placeholder((l, m), name='B') + k = te.reduce_axis((0, l), name='k') + C = te.compute( (m, n), - lambda ii, jj: tvm.sum(A[k, jj] * B[k, ii], axis=k), + lambda ii, jj: te.sum(A[k, jj] * B[k, ii], axis=k), name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) AA = s.cache_read(A, "shared", [C]) BB = s.cache_read(B, "shared", [C]) AL = s.cache_read(AA, "local", [C]) @@ -68,12 +69,12 @@ def test_gemm(): scale = 8 num_thread = 8 block_factor = scale * num_thread - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") - thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx") - thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_y = te.thread_axis((0, num_thread), "threadIdx.y") + thread_xz = te.thread_axis((0, 2), "vthread", name="vx") + thread_yz = te.thread_axis((0, 2), "vthread", name="vy") by, yi = s[C].split(C.op.axis[0], factor=block_factor) bx, xi = s[C].split(C.op.axis[1], factor=block_factor) diff --git a/topi/recipe/gemm/gemm_int8.py b/topi/recipe/gemm/gemm_int8.py index cf3621479d418..9d668ebf6fa9a 100644 --- a/topi/recipe/gemm/gemm_int8.py +++ b/topi/recipe/gemm/gemm_int8.py @@ -19,6 +19,7 @@ import sys import numpy as np import tvm +from tvm import te from tvm import autotvm from topi.cuda.tensor_intrin import dp4a @@ -29,15 +30,15 @@ @autotvm.template def gemm_int8(n, m, l): - A = tvm.placeholder((n, l), name='A', dtype='int8') - B = tvm.placeholder((m, l), name='B', dtype='int8') + A = te.placeholder((n, l), name='A', dtype='int8') + B = te.placeholder((m, l), name='B', dtype='int8') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k].astype('int32') * B[j, k].astype( + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: te.sum(A[i, k].astype('int32') * B[j, k].astype( 'int32'), axis=k), name='C') cfg = autotvm.get_config() - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) y, x = C.op.axis AA = s.cache_read(A, 'shared', [C]) @@ -56,10 +57,10 @@ def gemm_int8(n, m, l): s[CC].tensorize(ki, intrin_dp4a) - block_x = tvm.thread_axis('blockIdx.x') - block_y = tvm.thread_axis('blockIdx.y') - thread_x = tvm.thread_axis('threadIdx.x') - thread_y = tvm.thread_axis('threadIdx.y') + block_x = te.thread_axis('blockIdx.x') + block_y = te.thread_axis('blockIdx.y') + thread_x = te.thread_axis('threadIdx.x') + thread_y = te.thread_axis('threadIdx.y') def block_size_filter(entity): return entity.size[0] * 2 >= entity.size[1] * 2 and \ @@ -71,8 +72,8 @@ def block_size_filter(entity): s[C].bind(by, block_y) s[C].bind(bx, block_x) - s[C].bind(tyz, tvm.thread_axis('vthread')) - s[C].bind(txz, tvm.thread_axis('vthread')) + s[C].bind(tyz, te.thread_axis('vthread')) + s[C].bind(txz, te.thread_axis('vthread')) s[C].bind(ty, thread_y) s[C].bind(tx, thread_x) s[C].reorder(by, bx, tyz, txz, ty, tx, yi, xi) diff --git a/topi/recipe/reduce/test_reduce_map.py b/topi/recipe/reduce/test_reduce_map.py index 1adc41374f9c9..96f94305b5b4a 100644 --- a/topi/recipe/reduce/test_reduce_map.py +++ b/topi/recipe/reduce/test_reduce_map.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te from tvm.contrib import nvcc import numpy as np @@ -50,7 +51,7 @@ def tvm_callback_cuda_postproc(code): def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0): global TASK # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") if type == "sum": TASK = "sum_map_id%d" %test_id B = topi.sum(A, axis=axis, keepdims=keepdims) diff --git a/topi/recipe/rnn/lstm.py b/topi/recipe/rnn/lstm.py index 0d7635d082889..172362fdda148 100644 --- a/topi/recipe/rnn/lstm.py +++ b/topi/recipe/rnn/lstm.py @@ -16,6 +16,7 @@ # under the License. """LSTM Example, still work in progress..""" import tvm +from tvm import te import os from tvm.contrib import nvcc import numpy as np @@ -58,52 +59,52 @@ def lstm(): num_thread_x = 16 * 3 // 2 num_sm = 24 n_num_step = 128 - num_step = tvm.var('num_step') + num_step = te.var('num_step') num_hidden = 1152 // 2 batch_size = 1 # Global transition matrix # Input hidden channel can be pre-caculated by a gemm - Xi2h = tvm.placeholder((num_step, batch_size, 4, num_hidden), name="Xi2h") + Xi2h = te.placeholder((num_step, batch_size, 4, num_hidden), name="Xi2h") # Only handle hidden transition, saves space. - Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h") + Wh2h = te.placeholder((4, num_hidden, num_hidden), name="Wh2h") # h: output hidden state, c: cell state. - s_state_h = tvm.placeholder((num_step, batch_size, num_hidden)) - s_state_c = tvm.placeholder((num_step, batch_size, num_hidden)) - s_init_c = tvm.compute((1, batch_size, num_hidden), + s_state_h = te.placeholder((num_step, batch_size, num_hidden)) + s_state_c = te.placeholder((num_step, batch_size, num_hidden)) + s_init_c = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_c") - s_init_h = tvm.compute((1, batch_size, num_hidden), + s_init_h = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_h") # LSTM transition - k = tvm.reduce_axis((0, num_hidden), name="ki2h") - s_h2h = tvm.compute( + k = te.reduce_axis((0, num_hidden), name="ki2h") + s_h2h = te.compute( (num_step, batch_size, 4, num_hidden), - lambda t, i, x, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), + lambda t, i, x, j: te.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), name="s_h2h") # Gate rules - gates = tvm.compute(Xi2h.shape, lambda *i: + gates = te.compute(Xi2h.shape, lambda *i: Xi2h(*i) + s_h2h(*i), name="gates") gshape = (num_step, batch_size, num_hidden) - in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 0, j]), name="in_gate") - in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, i, 1, j]), name="in_transform") - forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 2, j]), name="forget_gate") - out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 3, j]), name="out_gate") - next_c = tvm.compute(gshape, + in_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 0, j]), name="in_gate") + in_transform = te.compute(gshape, lambda t, i, j: te.tanh(gates[t, i, 1, j]), name="in_transform") + forget_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 2, j]), name="forget_gate") + out_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 3, j]), name="out_gate") + next_c = te.compute(gshape, lambda t, i, j: forget_gate[t, i, j] * s_state_c[t - 1, i, j] + in_gate[t, i, j] * in_transform[t, i, j], name="next_c") - next_h = tvm.compute(gshape, - lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h") - update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c") - update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h") + next_h = te.compute(gshape, + lambda t, i, j: out_gate[t, i, j] * te.tanh(next_c[t, i, j]), name="next_h") + update_c = te.compute(gshape, lambda *i: next_c(*i), name="update_c") + update_h = te.compute(gshape, lambda *i: next_h(*i), name="update_h") # schedule - scan_h, scan_c = tvm.scan( + scan_h, scan_c = tvm.te.scan( [s_init_h, s_init_c], [update_h, update_c], [s_state_h, s_state_c], inputs=[Xi2h], name="lstm_scan") # schedule - s = tvm.create_schedule(scan_h.op) + s = te.create_schedule(scan_h.op) # Inline gate computations s[gates].compute_inline() s[in_gate].compute_inline() @@ -111,9 +112,9 @@ def lstm(): s[forget_gate].compute_inline() s[out_gate].compute_inline() - block_x = tvm.thread_axis((0, num_sm), "blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread_x), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread_y), "threadIdx.y") + block_x = te.thread_axis((0, num_sm), "blockIdx.x") + thread_x = te.thread_axis((0, num_thread_x), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread_y), "threadIdx.y") s_state_h_S = s.cache_read(s_state_h, "shared", [s_h2h]) s_state_c_S = s.cache_read(s_state_c, "shared", [next_c]) diff --git a/topi/recipe/rnn/matexp.py b/topi/recipe/rnn/matexp.py index 7466008d81e30..94ec5bc392f1d 100644 --- a/topi/recipe/rnn/matexp.py +++ b/topi/recipe/rnn/matexp.py @@ -24,6 +24,7 @@ ``` """ import tvm +from tvm import te import time import os import argparse @@ -62,25 +63,25 @@ def rnn_matexp(): n_batch_size = 4 detect_global_barrier = DETECT_GLOBAL_BARRIER - num_step = tvm.var("num_step") - num_hidden = tvm.convert(n_num_hidden) - batch_size = tvm.convert(n_batch_size) + num_step = te.var("num_step") + num_hidden = tvm.runtime.convert(n_num_hidden) + batch_size = tvm.runtime.convert(n_batch_size) num_thread_y = 8 num_thread_x = 16 * 3 num_sm = 24 - Whh = tvm.placeholder((num_hidden, num_hidden), name="Whh") - s_init = tvm.compute((1, batch_size, num_hidden), + Whh = te.placeholder((num_hidden, num_hidden), name="Whh") + s_init = te.compute((1, batch_size, num_hidden), lambda _, i, j: 1.0, name="init") - s_state = tvm.placeholder((num_step, batch_size, num_hidden)) - kh = tvm.reduce_axis((0, num_hidden), name="kh") - s_update = tvm.compute( + s_state = te.placeholder((num_step, batch_size, num_hidden)) + kh = te.reduce_axis((0, num_hidden), name="kh") + s_update = te.compute( (num_step, batch_size, num_hidden), - lambda t, i, j: tvm.sum(s_state[t-1, i, kh] * Whh[kh, j], axis=kh), + lambda t, i, j: te.sum(s_state[t-1, i, kh] * Whh[kh, j], axis=kh), name="update") - s_scan = tvm.scan(s_init, s_update, s_state) + s_scan = tvm.te.scan(s_init, s_update, s_state) # schedule - s = tvm.create_schedule(s_scan.op) + s = te.create_schedule(s_scan.op) CL = s_update SS = s.cache_read(s_state, "shared", [CL]) SL = s.cache_read(SS, "local", [CL]) @@ -88,9 +89,9 @@ def rnn_matexp(): ko, ki = s[CL].split(s[CL].op.reduce_axis[0], nparts=num_thread_y) CLF = s.rfactor(CL, ko) - block_x = tvm.thread_axis((0, num_sm), "blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread_x), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread_y), "threadIdx.y") + block_x = te.thread_axis((0, num_sm), "blockIdx.x") + thread_x = te.thread_axis((0, num_thread_x), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread_y), "threadIdx.y") if PERSIST_KERNEL: s[s_scan.op].env_threads([block_x, thread_y, thread_x]) diff --git a/topi/tests/python/common.py b/topi/tests/python/common.py index e03708c67f26d..eeaf6325cec23 100644 --- a/topi/tests/python/common.py +++ b/topi/tests/python/common.py @@ -17,6 +17,7 @@ """Common utility for topi test""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi diff --git a/topi/tests/python/test_fifo_buffer.py b/topi/tests/python/test_fifo_buffer.py index 34c389aad6c9c..676c1f975c93d 100644 --- a/topi/tests/python/test_fifo_buffer.py +++ b/topi/tests/python/test_fifo_buffer.py @@ -17,6 +17,7 @@ """Test code for FIFO buffer""" import tvm +from tvm import te import topi import topi.testing import numpy as np @@ -25,8 +26,8 @@ from common import get_all_backend def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'): - buffer = tvm.placeholder(buffer_shape, name='buffer', dtype=dtype) - data = tvm.placeholder(data_shape, name='data', dtype=dtype) + buffer = te.placeholder(buffer_shape, name='buffer', dtype=dtype) + data = te.placeholder(data_shape, name='data', dtype=dtype) # Use memoize, pickle the test data for next time use @memoize('topi.tests.test_fifo_buffer') @@ -98,12 +99,12 @@ def verify_conv1d_integration(): dtype = 'float32' - inc_input = tvm.placeholder(inc_input_shape, name='inc_input', dtype=dtype) - input_window = tvm.placeholder(input_window_shape, name='input_window', dtype=dtype) - context = tvm.placeholder(context_shape, name='context', dtype=dtype) - kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=dtype) - inc_output = tvm.placeholder(inc_input_shape, name='inc_output', dtype=dtype) - output_window = tvm.placeholder(output_window_shape, name='output_window', dtype=dtype) + inc_input = te.placeholder(inc_input_shape, name='inc_input', dtype=dtype) + input_window = te.placeholder(input_window_shape, name='input_window', dtype=dtype) + context = te.placeholder(context_shape, name='context', dtype=dtype) + kernel = te.placeholder(kernel_shape, name='kernel', dtype=dtype) + inc_output = te.placeholder(inc_input_shape, name='inc_output', dtype=dtype) + output_window = te.placeholder(output_window_shape, name='output_window', dtype=dtype) # Use memoize, pickle the test data for next time use @memoize('topi.tests.test_fifo_buffer_conv1d_integration') diff --git a/topi/tests/python/test_topi_basic.py b/topi/tests/python/test_topi_basic.py index 53b29df4f36d4..83f0469dc00fd 100644 --- a/topi/tests/python/test_topi_basic.py +++ b/topi/tests/python/test_topi_basic.py @@ -15,20 +15,21 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi from topi import util def test_util(): - x = tvm.const(100, "int32") + x = tvm.tir.const(100, "int32") assert util.get_const_int(x) == 100 assert util.get_const_tuple((x, x)) == (100, 100) def test_ewise(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') def test_apply(func, name): B = func(A) diff --git a/topi/tests/python/test_topi_batch_matmul.py b/topi/tests/python/test_topi_batch_matmul.py index 1b38e9037fb9f..b8c8547468478 100644 --- a/topi/tests/python/test_topi_batch_matmul.py +++ b/topi/tests/python/test_topi_batch_matmul.py @@ -17,6 +17,7 @@ """Test code for batch_matmul operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -31,8 +32,8 @@ } def verify_batch_matmul(batch, M, N, K): - x = tvm.placeholder((batch, M, K), name='x') - y = tvm.placeholder((batch, N, K), name='y') + x = te.placeholder((batch, M, K), name='x') + y = te.placeholder((batch, N, K), name='y') dtype = x.dtype # use memoize to pickle the test data for next time use diff --git a/topi/tests/python/test_topi_bitserial_conv2d.py b/topi/tests/python/test_topi_bitserial_conv2d.py index 274743d274ae1..44811d189189b 100644 --- a/topi/tests/python/test_topi_bitserial_conv2d.py +++ b/topi/tests/python/test_topi_bitserial_conv2d.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -33,8 +34,8 @@ def verify_bitserial_conv2d_nchw(batch, in_size, in_channel, num_filter, kernel, out_dtype = 'int32' with tvm.target.create('llvm'): - A = tvm.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W') + A = te.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W') B = topi.x86.bitserial_conv2d_nchw(A, W, stride, padding, activation_bits, weight_bits, input_dtype, out_dtype, unipolar) s = topi.x86.schedule_bitserial_conv2d_nchw([B]) @@ -71,8 +72,8 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel, out_dtype='int32' with tvm.target.create('llvm'): - A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W') + A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W') B = topi.x86.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits, input_dtype, out_dtype, unipolar) s = topi.x86.schedule_bitserial_conv2d_nhwc([B]) diff --git a/topi/tests/python/test_topi_bitserial_conv2d_rasp.py b/topi/tests/python/test_topi_bitserial_conv2d_rasp.py index 1f87785b4f48b..99ba0dba83287 100644 --- a/topi/tests/python/test_topi_bitserial_conv2d_rasp.py +++ b/topi/tests/python/test_topi_bitserial_conv2d_rasp.py @@ -18,6 +18,7 @@ import re import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -37,8 +38,8 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel, device = 'llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon' with tvm.target.create(device): - A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W') + A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W') B = topi.arm_cpu.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits, 'uint8', out_dtype, unipolar) s = topi.arm_cpu.schedule_bitserial_conv2d_nhwc([B]) diff --git a/topi/tests/python/test_topi_bitserial_dense.py b/topi/tests/python/test_topi_bitserial_dense.py index 505ce794312fd..fbb20a663f3b6 100644 --- a/topi/tests/python/test_topi_bitserial_dense.py +++ b/topi/tests/python/test_topi_bitserial_dense.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -54,8 +55,8 @@ def get_ref_data(a_shape, b_shape, input_dtype): print ("Skipped running code, not an arm device") continue input_dtype = 'uint8' if "arm_cpu" in target else "uint32" - A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A') - B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B') + A = te.placeholder((batch, in_dim), dtype=input_dtype, name='A') + B = te.placeholder((out_dim, in_dim), dtype=input_dtype, name='B') fcompute, fschedule = topi.testing.dispatch(target, _bitserial_dense_implement) C = fcompute(A, B, activation_bits, weight_bits, input_dtype, out_dtype, unipolar) diff --git a/topi/tests/python/test_topi_bnn.py b/topi/tests/python/test_topi_bnn.py index ce6a28643b58b..275f34fd916e0 100644 --- a/topi/tests/python/test_topi_bnn.py +++ b/topi/tests/python/test_topi_bnn.py @@ -17,19 +17,20 @@ """Test code for binary neural network operators.""" import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple from tvm.contrib.pickle_memoize import memoize def verify_binary_dense(batch, in_dim, out_dim): - A = tvm.placeholder((batch, in_dim), name='A') - B = tvm.placeholder((out_dim, in_dim), name='B') + A = te.placeholder((batch, in_dim), name='A') + B = te.placeholder((out_dim, in_dim), name='B') bnn_A = topi.nn.binarize_pack(A) bnn_B = topi.nn.binarize_pack(B) # binary dense - bnn_A1 = tvm.placeholder(bnn_A.shape, dtype=bnn_A.dtype) - bnn_B1 = tvm.placeholder(bnn_B.shape, dtype=bnn_B.dtype) + bnn_A1 = te.placeholder(bnn_A.shape, dtype=bnn_A.dtype) + bnn_B1 = te.placeholder(bnn_B.shape, dtype=bnn_B.dtype) bnn_C = topi.nn.binary_dense(bnn_A1, bnn_B1) # schedule with tvm.target.create('llvm'): diff --git a/topi/tests/python/test_topi_broadcast.py b/topi/tests/python/test_topi_broadcast.py index 2bea9b09bbf43..2fe00c7d4ec9e 100644 --- a/topi/tests/python/test_topi_broadcast.py +++ b/topi/tests/python/test_topi_broadcast.py @@ -17,6 +17,7 @@ """Test code for broadcasting operators.""" import numpy as np import tvm +from tvm import te import topi import topi.testing from common import get_all_backend @@ -24,7 +25,7 @@ def verify_broadcast_to_ele(in_shape, out_shape, fbcast): # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = fbcast(A, out_shape) def check_device(device): @@ -54,13 +55,13 @@ def verify_broadcast_binary_ele(lhs_shape, rhs_shape, rhs_min=-100, rhs_max=100, dtype="float32"): # Build the logic and compile the function - A = (tvm.var("A", dtype=dtype) if lhs_shape is None - else tvm.placeholder(shape=lhs_shape, name="A", dtype=dtype)) - B = (tvm.var("B", dtype=dtype) if rhs_shape is None - else tvm.placeholder(shape=rhs_shape, name="B", dtype=dtype)) + A = (te.var("A", dtype=dtype) if lhs_shape is None + else te.placeholder(shape=lhs_shape, name="A", dtype=dtype)) + B = (te.var("B", dtype=dtype) if rhs_shape is None + else te.placeholder(shape=rhs_shape, name="B", dtype=dtype)) C = ftopi(A, B) - if isinstance(A, tvm.expr.PrimExpr) and isinstance(B, tvm.expr.PrimExpr): - assert(isinstance(C, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr): + assert(isinstance(C, tvm.tir.PrimExpr)) return def gen_operand(shape, low, high, ctx): @@ -240,10 +241,10 @@ def test_apply( dtype="bool", ): # Build the logic and compile the function - A = tvm.placeholder(shape=indata.shape, name="A", dtype=dtype) + A = te.placeholder(shape=indata.shape, name="A", dtype=dtype) B = func(A) - if isinstance(A, tvm.expr.PrimExpr): - assert (isinstance(B, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr): + assert (isinstance(B, tvm.tir.PrimExpr)) return def check_device(device): @@ -280,11 +281,11 @@ def test_apply( dtype="int32", ): # Build the logic and compile the function - A = tvm.placeholder(shape=shape, name="A", dtype=dtype) + A = te.placeholder(shape=shape, name="A", dtype=dtype) B = func(A) - if isinstance(A, tvm.expr.PrimExpr): - assert (isinstance(B, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr): + assert (isinstance(B, tvm.tir.PrimExpr)) return def check_device(device): @@ -322,11 +323,11 @@ def test_apply( dtype="bool", ): # Build the logic and compile the function - A = (tvm.var("A", dtype=dtype)) - B = (tvm.var("B", dtype=dtype)) + A = (te.var("A", dtype=dtype)) + B = (te.var("B", dtype=dtype)) C = func(A, B) - if isinstance(A, tvm.expr.PrimExpr) and isinstance(B, tvm.expr.PrimExpr): - assert (isinstance(C, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr): + assert (isinstance(C, tvm.tir.PrimExpr)) return def check_device(device): diff --git a/topi/tests/python/test_topi_clip.py b/topi/tests/python/test_topi_clip.py index 74034ce30b0e7..38617ee114438 100644 --- a/topi/tests/python/test_topi_clip.py +++ b/topi/tests/python/test_topi_clip.py @@ -17,6 +17,7 @@ """Test code for clip operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -25,9 +26,9 @@ from common import get_all_backend def verify_clip(N, a_min, a_max, dtype): - A = tvm.placeholder((N, N), dtype=dtype, name='A') + A = te.placeholder((N, N), dtype=dtype, name='A') B = topi.clip(A, a_min, a_max) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_clip") diff --git a/topi/tests/python/test_topi_conv1d.py b/topi/tests/python/test_topi_conv1d.py index 6e55a574de4a2..972a3f195a4f0 100644 --- a/topi/tests/python/test_topi_conv1d.py +++ b/topi/tests/python/test_topi_conv1d.py @@ -18,6 +18,7 @@ import numpy as np import itertools import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -54,8 +55,8 @@ def verify_conv1d(batch, kernel_shape = [kernel_size, in_channels, filters] dtype = 'float32' - A = tvm.placeholder(in_shape, name='A', dtype=dtype) - W = tvm.placeholder(kernel_shape, name='W', dtype=dtype) + A = te.placeholder(in_shape, name='A', dtype=dtype) + W = te.placeholder(kernel_shape, name='W', dtype=dtype) def get_ref_data(layout): a_np = np.random.uniform(size=in_shape).astype(dtype) diff --git a/topi/tests/python/test_topi_conv1d_transpose_ncw.py b/topi/tests/python/test_topi_conv1d_transpose_ncw.py index 64af254adc7dd..4d015bf53321a 100644 --- a/topi/tests/python/test_topi_conv1d_transpose_ncw.py +++ b/topi/tests/python/test_topi_conv1d_transpose_ncw.py @@ -18,6 +18,7 @@ import numpy as np import itertools import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -31,8 +32,8 @@ def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel, stride, padding): in_width = in_size - A = tvm.placeholder((batch, in_channel, in_width), name='A') - W = tvm.placeholder((in_channel, num_filter, kernel), name='W') + A = te.placeholder((batch, in_channel, in_width), name='A') + W = te.placeholder((in_channel, num_filter, kernel), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_NCHWc.py b/topi/tests/python/test_topi_conv2d_NCHWc.py index 8a74b4f06cd25..a072d2abdafc2 100644 --- a/topi/tests/python/test_topi_conv2d_NCHWc.py +++ b/topi/tests/python/test_topi_conv2d_NCHWc.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -71,9 +72,9 @@ def verify_conv2d_NCHWc(batch, in_channel, in_size, num_filter, kernel, stride, ic_block = bn break - A = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A') - W = tvm.placeholder((num_filter//oc_block, in_channel//ic_block, kernel, kernel, ic_block, oc_block), name='W') - bias = tvm.placeholder((num_filter//oc_block, 1, 1, oc_block), name='bias') + A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A') + W = te.placeholder((num_filter//oc_block, in_channel//ic_block, kernel, kernel, ic_block, oc_block), name='W') + bias = te.placeholder((num_filter//oc_block, 1, 1, oc_block), name='bias') @memoize("topi.tests.test_topi_conv2d_NCHWc.verify_conv2d_NCHWc") def get_ref_data(): diff --git a/topi/tests/python/test_topi_conv2d_hwcn.py b/topi/tests/python/test_topi_conv2d_hwcn.py index 086523e460136..41192bd45debf 100644 --- a/topi/tests/python/test_topi_conv2d_hwcn.py +++ b/topi/tests/python/test_topi_conv2d_hwcn.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -33,9 +34,9 @@ def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): in_height = in_width = in_size - A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') - B = tvm.placeholder((1, num_filter, 1), name='bias') + A = te.placeholder((in_height, in_width, in_channel, batch), name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') + B = te.placeholder((1, num_filter, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_int8.py b/topi/tests/python/test_topi_conv2d_int8.py index c36bfa331fafa..d784e5cd3f86b 100644 --- a/topi/tests/python/test_topi_conv2d_int8.py +++ b/topi/tests/python/test_topi_conv2d_int8.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -38,9 +39,9 @@ def verify_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kernel, str in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W', dtype='int8') - bias = tvm.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', + A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W', dtype='int8') + bias = te.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', dtype='int8') a_shape = get_const_tuple(A.shape) diff --git a/topi/tests/python/test_topi_conv2d_nchw.py b/topi/tests/python/test_topi_conv2d_nchw.py index a0258ec93bf23..d42c8c7c24c07 100644 --- a/topi/tests/python/test_topi_conv2d_nchw.py +++ b/topi/tests/python/test_topi_conv2d_nchw.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -36,9 +37,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_nhwc.py b/topi/tests/python/test_topi_conv2d_nhwc.py index 2a5915ef0a53e..814fd45e0636f 100644 --- a/topi/tests/python/test_topi_conv2d_nhwc.py +++ b/topi/tests/python/test_topi_conv2d_nhwc.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -37,8 +38,8 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): in_height = in_width = in_size - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py b/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py index 8267aad382e8a..a5d532c4e016b 100644 --- a/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py +++ b/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -30,8 +31,8 @@ def verify_conv2d_1x1_nhwc_pack_int8(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): in_height = in_width = in_size - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='uint8') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='uint8') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_transpose_nchw.py b/topi/tests/python/test_topi_conv2d_transpose_nchw.py index e8aabc61a4fab..e8e1fce97eb10 100644 --- a/topi/tests/python/test_topi_conv2d_transpose_nchw.py +++ b/topi/tests/python/test_topi_conv2d_transpose_nchw.py @@ -17,6 +17,7 @@ """Test code for transposed convolution.""" import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -38,8 +39,8 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, stride_height, stride_width = stride pad_top, pad_left, pad_bottom, pad_right = padding - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_winograd.py b/topi/tests/python/test_topi_conv2d_winograd.py index 2d12336e771a8..cfbc30063d091 100644 --- a/topi/tests/python/test_topi_conv2d_winograd.py +++ b/topi/tests/python/test_topi_conv2d_winograd.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -42,9 +43,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv3d_ncdhw.py b/topi/tests/python/test_topi_conv3d_ncdhw.py index 6c60c27ed4264..33e791716e343 100644 --- a/topi/tests/python/test_topi_conv3d_ncdhw.py +++ b/topi/tests/python/test_topi_conv3d_ncdhw.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -40,9 +41,9 @@ def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride, in_depth = in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv3d_ndhwc.py b/topi/tests/python/test_topi_conv3d_ndhwc.py index 7e2f02cea20a3..8526bb1fc90ab 100644 --- a/topi/tests/python/test_topi_conv3d_ndhwc.py +++ b/topi/tests/python/test_topi_conv3d_ndhwc.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -41,8 +42,8 @@ def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride, else: kernel_depth = kernel_height = kernel_width = kernel - A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') - W = tvm.placeholder((kernel_depth, kernel_height, kernel_width, in_channel, num_filter), name='W') + A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') + W = te.placeholder((kernel_depth, kernel_height, kernel_width, in_channel, num_filter), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_deformable_conv2d.py b/topi/tests/python/test_topi_deformable_conv2d.py index 1b1a0327a3d5f..a88525407e274 100644 --- a/topi/tests/python/test_topi_deformable_conv2d.py +++ b/topi/tests/python/test_topi_deformable_conv2d.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -34,11 +35,11 @@ def verify_deformable_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, deformable_groups, groups)) - A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A') + A = te.placeholder((batch, in_channel, in_size, in_size), name='A') out_size = (in_size - (kernel - 1) * dilation - 1 + 2 * padding) // stride + 1 - Offset = tvm.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + Offset = te.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) offset_shape = get_const_tuple(Offset.shape) diff --git a/topi/tests/python/test_topi_dense.py b/topi/tests/python/test_topi_dense.py index d729e4330e52e..7498c004c8ddc 100644 --- a/topi/tests/python/test_topi_dense.py +++ b/topi/tests/python/test_topi_dense.py @@ -17,6 +17,7 @@ """Test code for dense operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -38,9 +39,9 @@ } def verify_dense(batch, in_dim, out_dim, use_bias=True): - A = tvm.placeholder((batch, in_dim), name='A') - B = tvm.placeholder((out_dim, in_dim), name='B') - C = tvm.placeholder((out_dim,), name='C') + A = te.placeholder((batch, in_dim), name='A') + B = te.placeholder((out_dim, in_dim), name='B') + C = te.placeholder((out_dim,), name='C') dtype = A.dtype # use memoize to pickle the test data for next time use @@ -83,9 +84,9 @@ def check_device(device): def verify_dense_int8(batch, in_dim, out_dim, use_bias=True): dtype = 'int8' out_dtype = 'int32' - A = tvm.placeholder((batch, in_dim), name='A', dtype=dtype) - B = tvm.placeholder((out_dim, in_dim), name='B', dtype=dtype) - C = tvm.placeholder((out_dim,), name='C', dtype=out_dtype) + A = te.placeholder((batch, in_dim), name='A', dtype=dtype) + B = te.placeholder((out_dim, in_dim), name='B', dtype=dtype) + C = te.placeholder((out_dim,), name='C', dtype=out_dtype) # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_dense_int8") diff --git a/topi/tests/python/test_topi_depth_to_space.py b/topi/tests/python/test_topi_depth_to_space.py index 693bfb624042f..b21eb9773c32a 100644 --- a/topi/tests/python/test_topi_depth_to_space.py +++ b/topi/tests/python/test_topi_depth_to_space.py @@ -17,6 +17,7 @@ """Test code for depth to space""" import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -37,7 +38,7 @@ def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, la else: raise NotImplementedError('Layout not supported {}'.format(layout)) - A = tvm.placeholder(in_shape, name='A', dtype='float32') + A = te.placeholder(in_shape, name='A', dtype='float32') dtype = A.dtype a_np = np.random.uniform(size=in_shape).astype(dtype) diff --git a/topi/tests/python/test_topi_depthwise_conv2d.py b/topi/tests/python/test_topi_depthwise_conv2d.py index 7efe5a21578c0..693348918d3ef 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d.py +++ b/topi/tests/python/test_topi_depthwise_conv2d.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -58,10 +59,10 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu padding_args = padding # placeholder - Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') - Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Input = te.placeholder((batch, in_channel, in_height, in_width), name='Input') + Filter = te.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') dtype = 'float32' @@ -161,10 +162,10 @@ def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_mu padding_args = padding # placeholder - Input = tvm.placeholder((batch, in_height, in_width, in_channel), name='Input') - Filter = tvm.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Input = te.placeholder((batch, in_height, in_width, in_channel), name='Input') + Filter = te.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') dtype = 'float32' @@ -289,8 +290,8 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, in_height, channel_m break # placeholder - Input = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='Input') - Filter = tvm.placeholder((out_channel//oc_block, 1, filter_height, filter_width, 1, oc_block), name='Filter') + Input = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='Input') + Filter = te.placeholder((out_channel//oc_block, 1, filter_height, filter_width, 1, oc_block), name='Filter') in_layout = "NCHW%dc" % ic_block out_layout = "NCHW%dc" % oc_block dtype = 'float32' diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py index ad44429612ce5..aac0cd523b0be 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py +++ b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import numpy as np from tvm.contrib.pickle_memoize import memoize @@ -40,8 +41,8 @@ def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multipli oshape = [batch, out_h, out_w, out_channel] # placeholder - Out_grad = tvm.placeholder(oshape, name='Out_grad') - Filter = tvm.placeholder((filter_h, filter_w, filter_channel, channel_multiplier)) + Out_grad = te.placeholder(oshape, name='Out_grad') + Filter = te.placeholder((filter_h, filter_w, filter_channel, channel_multiplier)) # declare In_grad = topi.nn.depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, stride=[stride_h, stride_w], padding=[padding_h, padding_w]) diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py index 2e09e675c5025..4602d098bf915 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py +++ b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import topi.testing import numpy as np @@ -40,8 +41,8 @@ def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multipl fshape = [filter_h, filter_w, in_channel, channel_multiplier] # placeholder - Out_grad = tvm.placeholder(oshape, name='Out_grad') - Input = tvm.placeholder((batch, in_h, in_w, in_channel), name='In_grad') + Out_grad = te.placeholder(oshape, name='Out_grad') + Input = te.placeholder((batch, in_h, in_w, in_channel), name='In_grad') # declare Weight_grad = topi.nn.depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, stride=[stride_h, stride_w], padding=[padding_h, padding_w]) diff --git a/topi/tests/python/test_topi_dilate.py b/topi/tests/python/test_topi_dilate.py index 24988212e52a8..1e69383238c79 100644 --- a/topi/tests/python/test_topi_dilate.py +++ b/topi/tests/python/test_topi_dilate.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import topi.testing import numpy as np @@ -25,9 +26,9 @@ def test_dilate(): ctx = tvm.cpu(0) def _test_dilate(input_size, strides): - Input = tvm.placeholder((input_size)) + Input = te.placeholder((input_size)) Output = topi.nn.dilate(Input, strides) - schedule = tvm.create_schedule(Output.op) + schedule = te.create_schedule(Output.op) input_np = np.random.uniform(size=input_size).astype(Input.dtype) output_np = topi.testing.dilate_python(input_np, strides) input_tvm = tvm.nd.array(input_np, ctx=ctx) diff --git a/topi/tests/python/test_topi_group_conv2d.py b/topi/tests/python/test_topi_group_conv2d.py index 3904db7d2b23f..6909bbee8bb03 100644 --- a/topi/tests/python/test_topi_group_conv2d.py +++ b/topi/tests/python/test_topi_group_conv2d.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -41,9 +42,9 @@ def verify_group_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, str in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) @@ -112,9 +113,9 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kerne in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') - W = tvm.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W', dtype='int8') - bias = tvm.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', + A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') + W = te.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W', dtype='int8') + bias = te.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', dtype='int8') a_shape = get_const_tuple(A.shape) diff --git a/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py b/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py index 08f136e5ae231..0fd4205eef64c 100644 --- a/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py +++ b/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -61,8 +62,8 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, groups, in_size, num_filte ic_block = 8 autotvm.DispatchContext.current.silent = True - A = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8') - W = tvm.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8') + A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8') + W = te.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8') @memoize("topi.tests.test_topi_conv2d_NCHWc_int8.verify_conv2d_NCHWc_int8") def get_ref_data(): diff --git a/topi/tests/python/test_topi_image.py b/topi/tests/python/test_topi_image.py index 4297638b3dfee..4eea75d68d28d 100644 --- a/topi/tests/python/test_topi_image.py +++ b/topi/tests/python/test_topi_image.py @@ -17,6 +17,7 @@ """Test code for bilinear scale """ import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -25,12 +26,12 @@ def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width, layout='NCHW', coord_trans="align_corners", method="bilinear"): if layout == 'NCHW': - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, in_channel, out_height, out_width) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif layout == 'NHWC': - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, out_height, out_width, in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) @@ -84,12 +85,12 @@ def test_resize(): def verify_resize3d(batch, in_channel, in_depth, in_height, in_width, out_depth, out_height, out_width, layout='NCDHW', coordinate_transformation_mode="half_pixel", method="trilinear"): if layout == 'NCDHW': - A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A', dtype='float32') + A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, in_channel, out_depth, out_height, out_width) a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype) elif layout == 'NDHWC': - A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A', dtype='float32') + A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, out_depth, out_height, out_width, in_channel) a_np = np.random.uniform(size=(batch, in_depth, in_height, in_width, in_channel)).astype(dtype) @@ -146,10 +147,10 @@ def test_crop_and_resize(): def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size, layout='NHWC', method="bilinear", extrapolation_value=0.0): - images = tvm.placeholder(image_shape, name='images', dtype='float32') + images = te.placeholder(image_shape, name='images', dtype='float32') np_images = np.random.uniform(size=image_shape).astype("float32") - boxes = tvm.placeholder(np_boxes.shape, name="boxes", dtype="float32") - box_ind = tvm.placeholder(np_box_indices.shape, name="box_ind", dtype="int32") + boxes = te.placeholder(np_boxes.shape, name="boxes", dtype="float32") + box_ind = te.placeholder(np_box_indices.shape, name="box_ind", dtype="int32") batch = len(np_box_indices) target_height, target_width = np_crop_size[0], np_crop_size[1] diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py index 4cb3c75818002..7e003a7a52b21 100644 --- a/topi/tests/python/test_topi_lrn.py +++ b/topi/tests/python/test_topi_lrn.py @@ -17,6 +17,7 @@ """Test code for local response normalization""" import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple import topi.testing @@ -32,7 +33,7 @@ } def verify_lrn(shape, size, axis, bias, alpha, beta): - A = tvm.placeholder(shape, name='A') + A = te.placeholder(shape, name='A') B = topi.nn.lrn(A, size, axis, alpha, beta, bias) dtype = A.dtype diff --git a/topi/tests/python/test_topi_math.py b/topi/tests/python/test_topi_math.py index debc3efe0d275..30a0f44aad579 100644 --- a/topi/tests/python/test_topi_math.py +++ b/topi/tests/python/test_topi_math.py @@ -17,6 +17,7 @@ import numpy as np import scipy import tvm +from tvm import te import topi import topi.testing from topi import util @@ -24,7 +25,7 @@ def test_util(): - x = tvm.const(100, "int32") + x = tvm.tir.const(100, "int32") assert util.get_const_int(x) == 100 assert util.get_const_tuple((x, x)) == (100, 100) @@ -37,13 +38,13 @@ def test_apply( low, high, shape=(20, 3), - dtype=tvm.float32, + dtype="float32", check_round=False, skip_name_check=False, ): - m = tvm.var("m") - l = tvm.var("l") - A = tvm.placeholder((m, l), dtype=dtype, name="A") + m = te.var("m") + l = te.var("l") + A = te.placeholder((m, l), dtype=dtype, name="A") B = func(A) assert tuple(B.shape) == tuple(A.shape) @@ -76,13 +77,13 @@ def test_isnan( low, high, shape=(20, 3), - dtype=tvm.float32, + dtype="float32", check_round=False, skip_name_check=False, ): - m = tvm.var("m") - l = tvm.var("l") - A = tvm.placeholder((m, l), dtype=dtype, name="A") + m = te.var("m") + l = te.var("l") + A = te.placeholder((m, l), dtype=dtype, name="A") B = topi.isnan(A) assert tuple(B.shape) == tuple(A.shape) @@ -134,7 +135,7 @@ def check_device(device): def test_cast(): def verify(from_dtype, to_dtype, low=-100, high=100): shape = (5, 4) - A = tvm.placeholder(shape, dtype=from_dtype, name="A") + A = te.placeholder(shape, dtype=from_dtype, name="A") B = topi.cast(A, to_dtype) if from_dtype == "bool": @@ -177,11 +178,11 @@ def test_apply( low, high, step, - dtype=tvm.float32 + dtype="float32" ): a_np = np.arange(low, high, step).astype(dtype) b_np = f_numpy(a_np) - A = tvm.placeholder(a_np.shape, dtype=dtype, name="A") + A = te.placeholder(a_np.shape, dtype=dtype, name="A") B = func(A) assert tuple(B.shape) == tuple(A.shape) diff --git a/topi/tests/python/test_topi_matmul.py b/topi/tests/python/test_topi_matmul.py index c712970945fcb..0c0a365688b37 100644 --- a/topi/tests/python/test_topi_matmul.py +++ b/topi/tests/python/test_topi_matmul.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple @@ -27,12 +28,12 @@ def with_tvm(lam, *args): pls = [] # placeholders vals_nd = [] # initial values for i,arg in enumerate(args): - pls.append(tvm.placeholder(arg.shape, name='pl'+str(i))) + pls.append(te.placeholder(arg.shape, name='pl'+str(i))) vals_nd.append(tvm.nd.array(arg, ctx)) out = lam(*pls) out_nd = tvm.nd.array(np.zeros(get_const_tuple(out.shape), dtype=out.dtype), ctx) - s = tvm.create_schedule([out.op]) + s = te.create_schedule([out.op]) m = tvm.build(s, pls + [out], "llvm") m(*(vals_nd+[out_nd])) return out_nd.asnumpy() diff --git a/topi/tests/python/test_topi_pooling.py b/topi/tests/python/test_topi_pooling.py index 084a2c7c76713..64f0841274e27 100644 --- a/topi/tests/python/test_topi_pooling.py +++ b/topi/tests/python/test_topi_pooling.py @@ -18,6 +18,7 @@ import math import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -48,7 +49,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_ sw = sh pt, pl, pb, pr = padding layout = "NCHW" - A = tvm.placeholder((n, ic, ih, iw), name='A') + A = te.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) @@ -112,7 +113,7 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc sw = sh pt, pl, pb, pr = padding layout = "NCHW" - A = tvm.placeholder((n, ic, ih, iw), name='A') + A = te.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) @@ -126,7 +127,7 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc else: assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1) assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1) - OutGrad = tvm.placeholder(bshape, name='OutGrad') + OutGrad = te.placeholder(bshape, name='OutGrad') PoolGrad = topi.nn.pool_grad(OutGrad, A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) @@ -202,7 +203,7 @@ def test_pool_grad(): def verify_global_pool(n, c, h, w, pool_type, layout='NCHW'): assert layout in ["NCHW", "NHWC"] - A = tvm.placeholder((n, c, h, w), name='A') + A = te.placeholder((n, c, h, w), name='A') B = topi.nn.global_pool(A, pool_type=pool_type, layout=layout) B = topi.nn.relu(B) @@ -268,7 +269,7 @@ def end_index(index, odim, idim): l_sl = slice(l_start, l_end) np_out[i, j, k, l] = np_op(np_data[i, j, k_sl, l_sl]) - data = tvm.placeholder(dshape, name="data", dtype=dtype) + data = te.placeholder(dshape, name="data", dtype=dtype) out = topi.nn.adaptive_pool(data, out_size, pool_type, layout) def check_device(device): ctx = tvm.context(device, 0) @@ -302,7 +303,7 @@ def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type, input_shape = (n, ic, id, ih, iw) kernel = [kd, kh, kw] stride = [sd, sh, sw] - A = tvm.placeholder(input_shape, name='A') + A = te.placeholder(input_shape, name='A') B = topi.nn.pool3d(A, kernel=kernel, stride=stride, padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout=layout, count_include_pad=count_include_pad) @@ -355,7 +356,7 @@ def verify_pool1d(n, ic, iw, kw, sw, padding, pool_type, input_shape = (n, ic, iw) kernel = [kw] stride = [sw] - A = tvm.placeholder(input_shape, name='A') + A = te.placeholder(input_shape, name='A') B = topi.nn.pool1d(A, kernel=kernel, stride=stride, padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout=layout, count_include_pad=count_include_pad) diff --git a/topi/tests/python/test_topi_reduce.py b/topi/tests/python/test_topi_reduce.py index 751025bf82b83..cc84fe006f64d 100644 --- a/topi/tests/python/test_topi_reduce.py +++ b/topi/tests/python/test_topi_reduce.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -46,7 +47,7 @@ def _my_npy_argmin(arr, axis, keepdims): def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32"): # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A", dtype=dtype) + A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if type == "sum": diff --git a/topi/tests/python/test_topi_relu.py b/topi/tests/python/test_topi_relu.py index 8ef354907691f..4d4166ff6487a 100644 --- a/topi/tests/python/test_topi_relu.py +++ b/topi/tests/python/test_topi_relu.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -26,7 +27,7 @@ from common import get_all_backend def verify_relu(m, n, dtype="float32"): - A = tvm.placeholder((m, n), name='A', dtype=dtype) + A = te.placeholder((m, n), name='A', dtype=dtype) B = topi.nn.relu(A) a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype) @@ -55,9 +56,9 @@ def check_device(device): def verify_leaky_relu(m, alpha): - A = tvm.placeholder((m,), name='A') + A = te.placeholder((m,), name='A') B = topi.nn.leaky_relu(A, alpha) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha @@ -70,8 +71,8 @@ def verify_leaky_relu(m, alpha): def verify_prelu(x, w, axis, weight_reshape): - X = tvm.placeholder((x), name='X') - W = tvm.placeholder((w), name='W') + X = te.placeholder((x), name='X') + W = te.placeholder((w), name='W') x_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(X.shape)).astype(X.dtype) w_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(W.shape)).astype(W.dtype) @@ -79,7 +80,7 @@ def _prelu_numpy(x, W): return (x < 0) * (x *W.reshape(weight_reshape)) + (x>=0) * x B = topi.nn.prelu(X, W, axis) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) ctx = tvm.cpu(0) x_tvm = tvm.nd.array(x_np, ctx) diff --git a/topi/tests/python/test_topi_reorg.py b/topi/tests/python/test_topi_reorg.py index c4cd2b5d0eb8c..09c2f2f966de5 100644 --- a/topi/tests/python/test_topi_reorg.py +++ b/topi/tests/python/test_topi_reorg.py @@ -19,6 +19,7 @@ import topi from topi.util import get_const_tuple import tvm +from tvm import te import topi.testing _reorg_schedule = { @@ -30,7 +31,7 @@ def verify_reorg(batch, in_size, in_channel, stride): '''Verify reorg operator by comparing outputs from tvm and numpy implementation''' in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') B = topi.vision.reorg(A, stride) a_shape = get_const_tuple(A.shape) diff --git a/topi/tests/python/test_topi_softmax.py b/topi/tests/python/test_topi_softmax.py index 5396b6beef816..4857387003002 100644 --- a/topi/tests/python/test_topi_softmax.py +++ b/topi/tests/python/test_topi_softmax.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing import logging @@ -50,10 +51,10 @@ def check_device(A, B, a_np, b_np, device, name): tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) def verify_softmax(m, n, dtype="float32"): - A = tvm.placeholder((m, n), dtype=dtype, name='A') + A = te.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) @@ -63,7 +64,7 @@ def verify_softmax(m, n, dtype="float32"): check_device(A, B, a_np, b_np, device, "softmax") def verify_softmax_4d(shape, dtype="float32"): - A = tvm.placeholder(shape, dtype=dtype, name='A') + A = te.placeholder(shape, dtype=dtype, name='A') B = topi.nn.softmax(A, axis=1) _, c, h, w = shape @@ -81,10 +82,10 @@ def test_softmax(): verify_softmax_4d((1, 16, 256, 256)) def verify_log_softmax(m, n, dtype="float32"): - A = tvm.placeholder((m, n), dtype=dtype, name='A') + A = te.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.log_softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) diff --git a/topi/tests/python/test_topi_sort.py b/topi/tests/python/test_topi_sort.py index 74e55ec248d91..2728733e23941 100644 --- a/topi/tests/python/test_topi_sort.py +++ b/topi/tests/python/test_topi_sort.py @@ -18,6 +18,7 @@ from __future__ import print_function import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -34,7 +35,7 @@ def verify_argsort(axis, is_ascend): dshape = (20, 100) data_dtype = "float32" - data = tvm.placeholder(dshape, name="data", dtype=data_dtype) + data = te.placeholder(dshape, name="data", dtype=data_dtype) perm = np.arange(dshape[0] * dshape[1], dtype=data_dtype) np.random.shuffle(perm) @@ -74,7 +75,7 @@ def check_device(device): def verify_topk(k, axis, ret_type, is_ascend, dtype): shape = (20, 100) data_dtype = "float32" - data = tvm.placeholder(shape, name="data", dtype=data_dtype) + data = te.placeholder(shape, name="data", dtype=data_dtype) np_data = np.random.uniform(size=shape).astype(data_dtype) if is_ascend: diff --git a/topi/tests/python/test_topi_space_to_depth.py b/topi/tests/python/test_topi_space_to_depth.py index 99a798e733ee9..11a009d3fde99 100644 --- a/topi/tests/python/test_topi_space_to_depth.py +++ b/topi/tests/python/test_topi_space_to_depth.py @@ -17,6 +17,7 @@ """Test code for space to depth""" import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -37,7 +38,7 @@ def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, la else: raise NotImplementedError('Layout not supported {}'.format(layout)) - A = tvm.placeholder(in_shape, name='A', dtype='float32') + A = te.placeholder(in_shape, name='A', dtype='float32') dtype = A.dtype a_np = np.random.uniform(size=in_shape).astype(dtype) diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py index fc2885999997a..fc2d26b828424 100644 --- a/topi/tests/python/test_topi_sparse.py +++ b/topi/tests/python/test_topi_sparse.py @@ -17,6 +17,7 @@ """Test code for sparse operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -26,13 +27,13 @@ import scipy.sparse as sp def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True): - nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n") + nr, nc, n = te.var("nr"), te.var("nc"), te.var("n") dtype = 'float32' A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A') - B = tvm.placeholder((in_dim, 1), name='B') - C = tvm.placeholder((nr,), name='C') + B = te.placeholder((in_dim, 1), name='B') + C = te.placeholder((nr,), name='C') D = topi.sparse.csrmv(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) dtype = A.dtype # get the test data @@ -70,13 +71,13 @@ def check_device(device): check_device(device) def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True): - nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n") + nr, nc, n = te.var("nr"), te.var("nc"), te.var("n") dtype = 'float32' A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A') - B = tvm.placeholder((in_dim, out_dim), name='B') - C = tvm.placeholder((nr,), name='C') + B = te.placeholder((in_dim, out_dim), name='B') + C = te.placeholder((nr,), name='C') D = topi.sparse.csrmm(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) dtype = A.dtype # get the test data @@ -112,12 +113,12 @@ def check_device(device): check_device(device) def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'): - nonzeros = tvm.var('nonzeros') + nonzeros = te.var('nonzeros') A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A') - B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B') - C = tvm.placeholder((out_dim,), dtype=dtype, name='C') + B = te.placeholder((out_dim, in_dim), dtype=dtype, name='B') + C = te.placeholder((out_dim,), dtype=dtype, name='C') D = topi.sparse.dense(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) # get the test data def get_ref_data(): @@ -149,12 +150,12 @@ def check_device(device): check_device('llvm') def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'): - nonzeros = tvm.var('nonzeros') - A = tvm.placeholder((batch, in_dim), dtype=dtype, name='A') + nonzeros = te.var('nonzeros') + A = te.placeholder((batch, in_dim), dtype=dtype, name='A') B = tvmsp.placeholder(shape=(out_dim, in_dim), nonzeros=nonzeros, dtype=dtype, name='B') - C = tvm.placeholder((out_dim,), dtype=dtype, name='C') + C = te.placeholder((out_dim,), dtype=dtype, name='C') D = topi.sparse.dense(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) # get the test data def get_ref_data(): @@ -224,12 +225,12 @@ def test_sparse_dense_csr(): W_np = W_sp_np.todense() Y_np = X_np.dot(W_np.T) - W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) - W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) - W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) - X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) + W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) + W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) + W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) + X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr) - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y]) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype)) func(tvm.nd.array(X_np), tvm.nd.array(W_sp_np.data), tvm.nd.array(W_sp_np.indices), tvm.nd.array(W_sp_np.indptr), Y_tvm) @@ -243,12 +244,12 @@ def test_sparse_transpose_csr(): X_sp_T = X_sp.transpose() X_np_T = X_sp_T.todense() - X_data = tvm.placeholder(shape=X_sp.data.shape, dtype=str(X_sp.data.dtype)) - X_indices = tvm.placeholder(shape=X_sp.indices.shape, dtype=str(X_sp.indices.dtype)) - X_indptr = tvm.placeholder(shape=X_sp.indptr.shape, dtype=str(X_sp.indptr.dtype)) + X_data = te.placeholder(shape=X_sp.data.shape, dtype=str(X_sp.data.dtype)) + X_indices = te.placeholder(shape=X_sp.indices.shape, dtype=str(X_sp.indices.dtype)) + X_indptr = te.placeholder(shape=X_sp.indptr.shape, dtype=str(X_sp.indptr.dtype)) X_T_data, X_T_indices, X_T_indptr = topi.nn.sparse_transpose(X_data, X_indices, X_indptr) - s = tvm.create_schedule([X_T_data.op, X_T_indices.op, X_T_indptr.op]) + s = te.create_schedule([X_T_data.op, X_T_indices.op, X_T_indptr.op]) func = tvm.build(s, [X_data, X_indices, X_indptr, X_T_data, X_T_indices, X_T_indptr]) @@ -288,12 +289,12 @@ def test_sparse_dense_bsr(): W_np = W_sp_np.todense() Y_np = X_np.dot(W_np.T) - W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) - W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) - W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) - X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) + W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) + W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) + W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) + X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr) - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y]) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype)) func(tvm.nd.array(X_np), @@ -317,12 +318,12 @@ def test_sparse_dense_bsr_randomized(): W_np = W_sp_np.todense() Y_np = np.array(X_np.dot(W_np.T)) - W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) - W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) - W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) - X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) + W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) + W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) + W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) + X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr) - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y]) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype)) func(tvm.nd.array(X_np), diff --git a/topi/tests/python/test_topi_tensor.py b/topi/tests/python/test_topi_tensor.py index 05098421c5614..68ea7ab6d7d9c 100644 --- a/topi/tests/python/test_topi_tensor.py +++ b/topi/tests/python/test_topi_tensor.py @@ -17,6 +17,7 @@ """Test code for tensor operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -28,9 +29,9 @@ def verify_elemwise_sum(num_args, dtype): tvm_placeholders = [] for i in range(num_args): tvm_placeholders.append( - tvm.placeholder(shape, name="data"+str(i), dtype=dtype)) + te.placeholder(shape, name="data"+str(i), dtype=dtype)) esum = topi.elemwise_sum(tvm_placeholders) - s = tvm.create_schedule([esum.op]) + s = te.create_schedule([esum.op]) @memoize("topi.tests.test_topi_elemwise_sum") def get_ref_data(): @@ -57,11 +58,11 @@ def check_device(device): def verify_full(shape, dtype, fill_value): - A = tvm.placeholder(shape, dtype=dtype, name="A") + A = te.placeholder(shape, dtype=dtype, name="A") B = topi.full_like(A, fill_value=fill_value) C = topi.full(shape=shape, dtype=dtype, fill_value=fill_value) - s1 = tvm.create_schedule([B.op]) - s2 = tvm.create_schedule([C.op]) + s1 = te.create_schedule([B.op]) + s2 = te.create_schedule([C.op]) @memoize("topi.tests.test_topi_full") def get_ref_data(): @@ -96,9 +97,9 @@ def check_device(device): return with tvm.target.create(device): ctx = tvm.context(device, 0) - A = tvm.placeholder((n, m), name='A', dtype=dtype) - B = tvm.compute((n, m), lambda i, j: - A[i, j] + tvm.const(1, A.dtype), name='B') + A = te.placeholder((n, m), name='A', dtype=dtype) + B = te.compute((n, m), lambda i, j: + A[i, j] + tvm.tir.const(1, A.dtype), name='B') S = topi.testing.get_elemwise_schedule(device)(B) fun = tvm.build(S, [A, B], device) diff --git a/topi/tests/python/test_topi_transform.py b/topi/tests/python/test_topi_transform.py index 880e86d205e76..097c87db3a973 100644 --- a/topi/tests/python/test_topi_transform.py +++ b/topi/tests/python/test_topi_transform.py @@ -17,6 +17,7 @@ """Test code for broadcasting operators.""" import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.nvcc import have_fp16 @@ -24,7 +25,7 @@ from common import get_all_backend def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) def check_device(device): ctx = tvm.context(device, 0) @@ -47,7 +48,7 @@ def check_device(device): def verify_reinterpret(in_shape, in_dtype, out_dtype, generator): - A = tvm.placeholder(shape=in_shape, name="A", dtype=in_dtype) + A = te.placeholder(shape=in_shape, name="A", dtype=in_dtype) B = topi.reinterpret(A, out_dtype) def check_device(device): ctx = tvm.context(device, 0) @@ -73,7 +74,7 @@ def check_device(device): def verify_transpose(in_shape, axes): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.transpose(A, axes) def check_device(device): ctx = tvm.context(device, 0) @@ -96,7 +97,7 @@ def check_device(device): def verify_reshape(src_shape, dst_shape): - A = tvm.placeholder(shape=src_shape, name="A") + A = te.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) @@ -119,7 +120,7 @@ def check_device(device): def verify_squeeze(src_shape, axis): - A = tvm.placeholder(shape=src_shape, name="A") + A = te.placeholder(shape=src_shape, name="A") B = topi.squeeze(A, axis=axis) def check_device(device): ctx = tvm.context(device, 0) @@ -158,7 +159,7 @@ def get_concat_schedule(target): tensor_l = [] for i, shape in enumerate(shapes): - tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) + tensor_l.append(te.placeholder(shape, name="A" + str(i))) out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis) def check_device(device): ctx = tvm.context(device, 0) @@ -183,7 +184,7 @@ def check_device(device): def verify_stack(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): - tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) + tensor_l.append(te.placeholder(shape, name="A" + str(i))) out_tensor = topi.stack(tensor_l, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -207,7 +208,7 @@ def check_device(device): def verify_split(src_shape, indices_or_sections, axis): - A = tvm.placeholder(shape=src_shape, name="A") + A = te.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) def check_device(device): ctx = tvm.context(device, 0) @@ -232,10 +233,10 @@ def check_device(device): def verify_expand_like(in_shape, out_shape, axis): - A = tvm.placeholder(shape=in_shape, name="A") - B = tvm.placeholder(shape=out_shape, name="B") + A = te.placeholder(shape=in_shape, name="A") + B = te.placeholder(shape=out_shape, name="B") C = topi.expand_like(A, B, axis) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) def check_device(device): if not tvm.runtime.enabled(device): @@ -266,7 +267,7 @@ def check_device(device): check_device(device) def verify_flip(in_shape, axis): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.flip(A, axis) + 1 def check_device(device): ctx = tvm.context(device, 0) @@ -292,8 +293,8 @@ def verify_take(src_shape, indices_src, axis=None, mode="clip"): src_dtype = "float32" indices_dtype = "int32" indices_src = np.array(indices_src, dtype=indices_dtype) - A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A") - indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") + A = te.placeholder(shape=src_shape, dtype=src_dtype, name="A") + indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") if axis is None: out_tensor = topi.take(a=A, indices=indices, mode=mode) else: @@ -330,7 +331,7 @@ def check_device(device): check_device(device) def verify_strided_slice(in_shape, begin, end, strides=None): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") strides = [1,1,1] if strides is None else strides B = topi.strided_slice(A, begin, end, strides) + 1 @@ -356,12 +357,12 @@ def check_device(device): check_device(device) def verify_strided_set(in_shape, v_shape, begin, end, strides=None): - A = tvm.placeholder(shape=in_shape, name="A") - V = tvm.placeholder(shape=v_shape, name="V") - b = tvm.placeholder(shape=(len(begin),), name="b", dtype='int32') - e = tvm.placeholder(shape=(len(end),), name="e", dtype='int32') + A = te.placeholder(shape=in_shape, name="A") + V = te.placeholder(shape=v_shape, name="V") + b = te.placeholder(shape=(len(begin),), name="b", dtype='int32') + e = te.placeholder(shape=(len(end),), name="e", dtype='int32') if strides is not None: - st = tvm.placeholder(shape=(len(strides),), name="st", dtype='int32') + st = te.placeholder(shape=(len(strides),), name="st", dtype='int32') B = topi.strided_set(A, V, b, e, st) + 1 else: B = topi.strided_set(A, V, b, e) + 1 @@ -404,8 +405,8 @@ def check_device(device): def verify_gather_nd(src_shape, indices_src, indices_dtype): src_dtype = "float32" indices_src = np.array(indices_src, dtype=indices_dtype) - A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A") - indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") + A = te.placeholder(shape=src_shape, dtype=src_dtype, name="A") + indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") out_tensor = topi.gather_nd(a=A, indices=indices) def check_device(device): @@ -464,7 +465,7 @@ def check_device(device): check_device(device) def verify_repeat(in_shape, repeats, axis): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.repeat(A, repeats, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -486,7 +487,7 @@ def check_device(device): check_device(device) def verify_tile(in_shape, reps): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.tile(A, reps) def check_device(device): ctx = tvm.context(device, 0) @@ -508,10 +509,10 @@ def check_device(device): check_device(device) def verify_where(in_shape): - Cond = tvm.placeholder(shape=in_shape, name="cond") + Cond = te.placeholder(shape=in_shape, name="cond") dtype = Cond.dtype - A = tvm.placeholder(shape=in_shape, name="A") - B = tvm.placeholder(shape=in_shape, name="B") + A = te.placeholder(shape=in_shape, name="A") + B = te.placeholder(shape=in_shape, name="B") C = topi.where(Cond, A, B) def check_device(device): ctx = tvm.context(device, 0) @@ -537,9 +538,9 @@ def check_device(device): check_device(device) def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype): - indices = tvm.placeholder(shape=indices_shape, name="indices", dtype="int32") - on_value_const = tvm.const(on_value, dtype) - off_value_const = tvm.const(off_value, dtype) + indices = te.placeholder(shape=indices_shape, name="indices", dtype="int32") + on_value_const = tvm.tir.const(on_value, dtype) + off_value_const = tvm.tir.const(off_value, dtype) one_hot_result = topi.transform.one_hot(indices, on_value_const, off_value_const, depth, axis, dtype) def check_device(device): ctx = tvm.context(device, 0) @@ -624,9 +625,9 @@ def test_squeeze(): verify_squeeze((1, 1, 1, 1), None) # a special case to trigger inline let expression - A = tvm.placeholder((2,), 'float32', 'A') + A = te.placeholder((2,), 'float32', 'A') E = topi.squeeze(A) - C = tvm.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')]) + C = te.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')]) for device in ['cuda', 'opencl']: ctx = tvm.context(device, 0) if ctx.exist: @@ -737,7 +738,7 @@ def test_tile(): def test_layout_transform(): in_shape = (1, 32, 8, 8) - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.layout_transform(A, "NCHW", "NCHW16c") input = np.random.uniform(size=in_shape).astype(A.dtype) @@ -766,7 +767,7 @@ def check_device(device): def test_shape(): in_shape = (8, 7, 13) dtype = "int32" - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.shape(A, dtype) input = np.random.uniform(size=in_shape).astype(A.dtype) @@ -796,8 +797,8 @@ def test_sequence_mask(): for mask_value in [0.0, 1.0]: max_length = in_shape[axis] batch_size = in_shape[1 - axis] - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") - B = tvm.placeholder(shape=(batch_size,), dtype="int32", name="B") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") + B = te.placeholder(shape=(batch_size,), dtype="int32", name="B") C = topi.sequence_mask(A, B, axis=axis, mask_value=mask_value) A_data = np.random.normal(0, 1, in_shape).astype(np.float32) B_data = np.random.randint(1, max_length, (batch_size,)).astype(np.int32) @@ -823,7 +824,7 @@ def check_device(device): def test_ndarray_size(): in_shape = (5, 11, 7) dtype = "int32" - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.ndarray_size(A, dtype) input = np.random.uniform(size=in_shape).astype(A.dtype) @@ -857,13 +858,13 @@ def check_device(device): return print("Running on target: %s" % device) conv2d_compute, conv2d_schedule = topi.testing.get_conv2d_nchw_implement(device) - data = tvm.placeholder((2, 1, 2, 4), 'int8', 'data') - w = tvm.placeholder((3, 1, 2, 2), 'int8', 'w') + data = te.placeholder((2, 1, 2, 4), 'int8', 'data') + w = te.placeholder((3, 1, 2, 2), 'int8', 'w') conv1 = conv2d_compute(data, w, 1, 0, 1, 'int32') - zeros = topi.full((2, 3, 1, 3), 'int32', tvm.const(0, dtype='int32')) + zeros = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(0, dtype='int32')) gt = topi.greater_equal(conv1, zeros) - one = topi.full((2, 3, 1, 3), 'int32', tvm.const(1, dtype='int32')) - two = topi.full((2, 3, 1, 3), 'int32', tvm.const(2, dtype='int32')) + one = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(1, dtype='int32')) + two = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(2, dtype='int32')) where = topi.where(gt, one, two) add = topi.add(conv1, where) outs = [add] diff --git a/topi/tests/python/test_topi_upsampling.py b/topi/tests/python/test_topi_upsampling.py index 003748719a0e9..874471b830fd8 100644 --- a/topi/tests/python/test_topi_upsampling.py +++ b/topi/tests/python/test_topi_upsampling.py @@ -17,6 +17,7 @@ """Test code for upsampling""" import numpy as np import tvm +from tvm import te import topi import topi.testing import math @@ -28,12 +29,12 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w, layout='NCHW', method="nearest_neighbor", in_batch_block = 0, in_channel_block = 0): if layout == 'NCHW': - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w))) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif nchw_pack_layout(layout): - A = tvm.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block), + A = te.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w)), @@ -41,7 +42,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w, a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width, in_batch_block, in_channel_block)).astype(dtype) elif layout == 'NHWC': - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) @@ -115,13 +116,13 @@ def test_upsampling(): def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_d, scale_h, scale_w, layout='NCDHW', method="nearest_neighbor"): if layout == 'NCDHW': - A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') + A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w))) a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype) elif layout == 'NDHWC': - A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') + A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel) diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 7d27b8221a609..0aa410d7ea13d 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -19,6 +19,7 @@ import math import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -90,7 +91,7 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): fcompute, fschedule = topi.testing.dispatch(device, _get_valid_counts_implement) - data = tvm.placeholder(dshape, name="data", dtype=dtype) + data = te.placeholder(dshape, name="data", dtype=dtype) outs = fcompute(data, score_threshold, id_index, score_index) s = fschedule(outs) @@ -121,8 +122,8 @@ def verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_re dshape = np_data.shape batch, num_anchors, _ = dshape indices_dshape = (batch, num_anchors) - data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder((batch,), dtype="int32", name="valid_count") + data = te.placeholder(dshape, name="data") + valid_count = te.placeholder((batch,), dtype="int32", name="valid_count") def check_device(device): ctx = tvm.context(device, 0) @@ -182,7 +183,7 @@ def test_non_max_suppression(): def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, 0.5), clip=False): - data = tvm.placeholder(dshape, name="data") + data = te.placeholder(dshape, name="data") dtype = data.dtype input_data = np.random.uniform(size=dshape).astype(dtype) @@ -223,7 +224,7 @@ def check_device(device): print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) - + fcompute, fschedule = topi.testing.dispatch(device, _multibox_prior_implement) with tvm.target.create(device): out = fcompute(data, sizes, ratios, steps, offsets, clip) @@ -249,9 +250,9 @@ def test_multibox_detection(): batch_size = 1 num_anchors = 3 num_classes = 3 - cls_prob = tvm.placeholder((batch_size, num_anchors, num_classes), name="cls_prob") - loc_preds = tvm.placeholder((batch_size, num_anchors * 4), name="loc_preds") - anchors = tvm.placeholder((1, num_anchors, 4), name="anchors") + cls_prob = te.placeholder((batch_size, num_anchors, num_classes), name="cls_prob") + loc_preds = te.placeholder((batch_size, num_anchors * 4), name="loc_preds") + anchors = te.placeholder((1, num_anchors, 4), name="anchors") # Manually create test case np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]]) @@ -290,8 +291,8 @@ def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_s a_shape = (batch, in_channel, in_size, in_size) rois_shape = (num_roi, 5) - a = tvm.placeholder(a_shape) - rois = tvm.placeholder(rois_shape) + a = te.placeholder(a_shape) + rois = te.placeholder(rois_shape) @memoize("topi.tests.test_topi_vision.verify_roi_align") def get_ref_data(): @@ -342,8 +343,8 @@ def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_sc a_shape = (batch, in_channel, in_size, in_size) rois_shape = (num_roi, 5) - a = tvm.placeholder(a_shape) - rois = tvm.placeholder(rois_shape) + a = te.placeholder(a_shape) + rois = te.placeholder(rois_shape) @memoize("topi.tests.test_topi_vision.verify_roi_pool") def get_ref_data(): @@ -387,9 +388,9 @@ def test_roi_pool(): def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs): - cls_prob = tvm.placeholder(np_cls_prob.shape) - bbox_pred = tvm.placeholder(np_bbox_pred.shape) - im_info = tvm.placeholder(np_im_info.shape) + cls_prob = te.placeholder(np_cls_prob.shape) + bbox_pred = te.placeholder(np_bbox_pred.shape) + im_info = te.placeholder(np_im_info.shape) def check_device(device): ctx = tvm.context(device, 0) diff --git a/tutorials/autotvm/tune_conv2d_cuda.py b/tutorials/autotvm/tune_conv2d_cuda.py index 0e26dcb974129..260cf5a4bb086 100644 --- a/tutorials/autotvm/tune_conv2d_cuda.py +++ b/tutorials/autotvm/tune_conv2d_cuda.py @@ -49,6 +49,7 @@ import numpy as np import tvm +from tvm import te import topi from topi.testing import conv2d_nchw_python @@ -82,10 +83,10 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): assert N == 1, "Only consider batch_size = 1 in this template" - data = tvm.placeholder((N, CI, H, W), name='data') - kernel = tvm.placeholder((CO, CI, KH, KW), name='kernel') + data = te.placeholder((N, CI, H, W), name='data') + kernel = te.placeholder((CO, CI, KH, KW), name='kernel') conv = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation=1, out_dtype='float32') - s = tvm.create_schedule([conv.op]) + s = te.create_schedule([conv.op]) ##### space definition begin ##### n, f, y, x = s[conv].op.axis @@ -123,15 +124,15 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) kernel_scope = n # this is the scope to attach global config inside this kernel - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -155,9 +156,9 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # tune unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py index ea24b16857888..ffd3e8b9b5cb5 100644 --- a/tutorials/autotvm/tune_relay_arm.py +++ b/tutorials/autotvm/tune_relay_arm.py @@ -62,6 +62,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py index 58c8751b73b9e..4195075ca66df 100644 --- a/tutorials/autotvm/tune_relay_cuda.py +++ b/tutorials/autotvm/tune_relay_cuda.py @@ -60,6 +60,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py index 5425f1b157154..ad7460829329d 100644 --- a/tutorials/autotvm/tune_relay_mobile_gpu.py +++ b/tutorials/autotvm/tune_relay_mobile_gpu.py @@ -61,6 +61,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing diff --git a/tutorials/autotvm/tune_relay_x86.py b/tutorials/autotvm/tune_relay_x86.py index f44773e544a73..15ce2de4b82f3 100644 --- a/tutorials/autotvm/tune_relay_x86.py +++ b/tutorials/autotvm/tune_relay_x86.py @@ -28,6 +28,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay from tvm.relay import testing diff --git a/tutorials/autotvm/tune_simple_template.py b/tutorials/autotvm/tune_simple_template.py index 8efeed487b435..dd3b9dce3d7ab 100644 --- a/tutorials/autotvm/tune_simple_template.py +++ b/tutorials/autotvm/tune_simple_template.py @@ -55,6 +55,7 @@ import numpy as np import tvm +from tvm import te # the module is called `autotvm` from tvm import autotvm @@ -70,12 +71,12 @@ # Matmul V0: Constant tiling factor def matmul_v0(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -104,12 +105,12 @@ def matmul_v0(N, L, M, dtype): # Matmul V1: List candidate values @autotvm.register_customized_task("tutorial/matmul_v1") # 1. use a decorator def matmul_v1(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -184,12 +185,12 @@ def matmul_v1(N, L, M, dtype): @autotvm.register_customized_task("tutorial/matmul") def matmul(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis diff --git a/tutorials/cross_compilation_and_rpc.py b/tutorials/cross_compilation_and_rpc.py index 2ffcb11989b8b..553d77dd20239 100644 --- a/tutorials/cross_compilation_and_rpc.py +++ b/tutorials/cross_compilation_and_rpc.py @@ -96,13 +96,14 @@ import numpy as np import tvm +from tvm import te from tvm import rpc from tvm.contrib import util -n = tvm.convert(1024) -A = tvm.placeholder((n,), name='A') -B = tvm.compute((n,), lambda i: A[i] + 1.0, name='B') -s = tvm.create_schedule(B.op) +n = tvm.runtime.convert(1024) +A = te.placeholder((n,), name='A') +B = te.compute((n,), lambda i: A[i] + 1.0, name='B') +s = te.create_schedule(B.op) ###################################################################### # Then we cross compile the kernel. @@ -228,10 +229,10 @@ def run_opencl(): opencl_device_port = 9090 # create schedule for the above "add one" compute declaration - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=32) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) func = tvm.build(s, [A, B], "opencl", target_host=target_host) remote = rpc.connect(opencl_device_host, opencl_device_port) diff --git a/tutorials/dev/low_level_custom_pass.py b/tutorials/dev/low_level_custom_pass.py index 97c4a1f49a9ac..f9b3ea3c8245c 100644 --- a/tutorials/dev/low_level_custom_pass.py +++ b/tutorials/dev/low_level_custom_pass.py @@ -43,6 +43,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -50,12 +51,12 @@ # our customized lowering pass to manipulate the IR directly instead of using schedule primitives. # -n = tvm.const(128, "int32") -a = tvm.placeholder((n, ), name="a") -b = tvm.placeholder((n, ), name="b") -c = tvm.compute((n, ), lambda i: a[i] + b[i], name='c') +n = tvm.tir.const(128, "int32") +a = te.placeholder((n, ), name="a") +b = te.placeholder((n, ), name="b") +c = te.compute((n, ), lambda i: a[i] + b[i], name='c') -sch = tvm.create_schedule(c.op) +sch = te.create_schedule(c.op) ir = tvm.lower(sch, [a, b, c], simple_mode=True) print(ir) @@ -111,7 +112,7 @@ def vectorize8(op): if op in loops: extent = op.extent.value name = op.loop_var.name - lo, li = tvm.var(name + '.outer'), tvm.var(name + '.inner') + lo, li = te.var(name + '.outer'), te.var(name + '.inner') body = tvm.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li}) body = tvm.tir.For(li, 0, 8, tvm.tir.For.Vectorized, 0, body) body = tvm.tir.For(lo, 0, extent // 8, tvm.tir.For.Serial, 0, body) diff --git a/tutorials/dev/relay_pass_infra.py b/tutorials/dev/relay_pass_infra.py index 494593eeb5a13..7f818cfa30685 100644 --- a/tutorials/dev/relay_pass_infra.py +++ b/tutorials/dev/relay_pass_infra.py @@ -49,6 +49,7 @@ import numpy as np import tvm +from tvm import te import tvm.relay as relay ############################################################################### diff --git a/tutorials/frontend/build_gcn.py b/tutorials/frontend/build_gcn.py index d385dc9e72aba..e0d0aa0743173 100644 --- a/tutorials/frontend/build_gcn.py +++ b/tutorials/frontend/build_gcn.py @@ -186,6 +186,7 @@ def evaluate(data, logits): from tvm import relay from tvm.contrib import graph_runtime import tvm +from tvm import te def GraphConv(layer_name, input_dim, diff --git a/tutorials/frontend/deploy_model_on_android.py b/tutorials/frontend/deploy_model_on_android.py index 3d0e83d5e450e..f516004181c56 100644 --- a/tutorials/frontend/deploy_model_on_android.py +++ b/tutorials/frontend/deploy_model_on_android.py @@ -31,6 +31,7 @@ import keras from keras.applications.mobilenet_v2 import MobileNetV2 import tvm +from tvm import te import tvm.relay as relay from tvm import rpc from tvm.contrib import util, ndk, graph_runtime as runtime diff --git a/tutorials/frontend/deploy_model_on_rasp.py b/tutorials/frontend/deploy_model_on_rasp.py index e78c73659b989..ef707feedd2f7 100644 --- a/tutorials/frontend/deploy_model_on_rasp.py +++ b/tutorials/frontend/deploy_model_on_rasp.py @@ -27,6 +27,7 @@ """ import tvm +from tvm import te import tvm.relay as relay from tvm import rpc from tvm.contrib import util, graph_runtime as runtime diff --git a/tutorials/frontend/deploy_quantized.py b/tutorials/frontend/deploy_quantized.py index 0e09ba9c93076..5af9fc950bc2e 100644 --- a/tutorials/frontend/deploy_quantized.py +++ b/tutorials/frontend/deploy_quantized.py @@ -28,6 +28,7 @@ """ import tvm +from tvm import te from tvm import relay import mxnet as mx from tvm.contrib.download import download_testdata diff --git a/tutorials/frontend/deploy_ssd_gluoncv.py b/tutorials/frontend/deploy_ssd_gluoncv.py index 78bb0cafcfe87..6126df0e73ab2 100644 --- a/tutorials/frontend/deploy_ssd_gluoncv.py +++ b/tutorials/frontend/deploy_ssd_gluoncv.py @@ -24,6 +24,7 @@ We will use GluonCV pre-trained SSD model and convert it to Relay IR """ import tvm +from tvm import te from matplotlib import pyplot as plt from tvm.relay.testing.config import ctx_list @@ -47,7 +48,7 @@ # # To get best inference performance on Intel graphics, # change target argument to :code:`opencl -device=intel_graphics`. -# But when using Intel graphics on Mac, target needs to +# But when using Intel graphics on Mac, target needs to # be set to `opencl` only for the reason that Intel subgroup # extension is not supported on Mac. # diff --git a/tutorials/frontend/from_caffe2.py b/tutorials/frontend/from_caffe2.py index aadee02f4b027..8fad80df1d1ee 100644 --- a/tutorials/frontend/from_caffe2.py +++ b/tutorials/frontend/from_caffe2.py @@ -96,6 +96,7 @@ def transform_image(image): # --------------- # The process is no different from other examples. import tvm +from tvm import te from tvm.contrib import graph_runtime # context x86 CPU, use tvm.gpu(0) if you run on GPU ctx = tvm.cpu(0) diff --git a/tutorials/frontend/from_coreml.py b/tutorials/frontend/from_coreml.py index 2f70353501b8d..2a0c8dbc93f2a 100644 --- a/tutorials/frontend/from_coreml.py +++ b/tutorials/frontend/from_coreml.py @@ -35,6 +35,7 @@ https://github.com/apple/coremltools """ import tvm +from tvm import te import tvm.relay as relay from tvm.contrib.download import download_testdata import coremltools as cm diff --git a/tutorials/frontend/from_darknet.py b/tutorials/frontend/from_darknet.py index e90c8bb63b320..e2c1ea5aacbfa 100644 --- a/tutorials/frontend/from_darknet.py +++ b/tutorials/frontend/from_darknet.py @@ -38,6 +38,7 @@ # tvm, relay import tvm +from tvm import te from tvm import relay from ctypes import * from tvm.contrib.download import download_testdata diff --git a/tutorials/frontend/from_keras.py b/tutorials/frontend/from_keras.py index c1f3471bb644a..928a8acbefa78 100644 --- a/tutorials/frontend/from_keras.py +++ b/tutorials/frontend/from_keras.py @@ -35,6 +35,7 @@ https://keras.io/#installation """ import tvm +from tvm import te import tvm.relay as relay from tvm.contrib.download import download_testdata import keras diff --git a/tutorials/frontend/from_mxnet.py b/tutorials/frontend/from_mxnet.py index d0e4c4ab0d180..bf53db532e1da 100644 --- a/tutorials/frontend/from_mxnet.py +++ b/tutorials/frontend/from_mxnet.py @@ -38,6 +38,7 @@ # some standard imports import mxnet as mx import tvm +from tvm import te import tvm.relay as relay import numpy as np diff --git a/tutorials/frontend/from_onnx.py b/tutorials/frontend/from_onnx.py index 7a615930a905f..766451c2f8b12 100644 --- a/tutorials/frontend/from_onnx.py +++ b/tutorials/frontend/from_onnx.py @@ -35,6 +35,7 @@ import onnx import numpy as np import tvm +from tvm import te import tvm.relay as relay from tvm.contrib.download import download_testdata diff --git a/tutorials/frontend/from_tensorflow.py b/tutorials/frontend/from_tensorflow.py index 55eb3d014191e..0ebd733ef9aab 100644 --- a/tutorials/frontend/from_tensorflow.py +++ b/tutorials/frontend/from_tensorflow.py @@ -26,6 +26,7 @@ # tvm, relay import tvm +from tvm import te from tvm import relay # os and numpy diff --git a/tutorials/frontend/from_tflite.py b/tutorials/frontend/from_tflite.py index e93a71ce4a781..50fd69f3f92b9 100644 --- a/tutorials/frontend/from_tflite.py +++ b/tutorials/frontend/from_tflite.py @@ -151,6 +151,7 @@ def extract(path): # Execute on TVM # -------------- import tvm +from tvm import te from tvm.contrib import graph_runtime as runtime # Create a runtime executor module diff --git a/tutorials/frontend/using_external_lib.py b/tutorials/frontend/using_external_lib.py index 71acedaf181b8..7063c0e27feae 100644 --- a/tutorials/frontend/using_external_lib.py +++ b/tutorials/frontend/using_external_lib.py @@ -32,6 +32,7 @@ To begin with, we import Relay and TVM. """ import tvm +from tvm import te import numpy as np from tvm.contrib import graph_runtime as runtime from tvm import relay diff --git a/tutorials/language/extern_op.py b/tutorials/language/extern_op.py index 2ad3e30634158..64e9880dc20b8 100644 --- a/tutorials/language/extern_op.py +++ b/tutorials/language/extern_op.py @@ -32,13 +32,14 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np from tvm.contrib import cblas ###################################################################### # Use Extern Tensor Function # -------------------------- -# In the example below, we use :any:`tvm.extern` to add an extern +# In the example below, we use :any:`te.extern` to add an extern # array function call. In the extern call, we declare the shape # of output tensors. In the second argument we provide the list of inputs. # @@ -53,15 +54,15 @@ n = 1024 l = 128 m = 235 -bias = tvm.var('bias', dtype=tvm.float32) -A = tvm.placeholder((n, l), name='A') -B = tvm.placeholder((l, m), name='B') -C = tvm.extern((n, m), [A, B], - lambda ins, outs: tvm.call_packed( +bias = te.var('bias', dtype="float32") +A = te.placeholder((n, l), name='A') +B = te.placeholder((l, m), name='B') +C = te.extern((n, m), [A, B], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cblas.matmul", ins[0], ins[1], outs[0], False, False), name="C") -D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") -s = tvm.create_schedule(D.op) +D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") +s = te.create_schedule(D.op) ###################################################################### # Verify the Result @@ -86,8 +87,8 @@ # from tvm.contrib import cblas C = cblas.matmul(A, B) -D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") -s = tvm.create_schedule(D.op) +D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") +s = te.create_schedule(D.op) ###################################################################### # Hook Python Function as Extern @@ -106,10 +107,10 @@ def my_tvm_addone(x, y): print("my_tvm_addone signatures: %s, %s" % (type(x), type(y))) tvm.nd.array(x.asnumpy() + 1).copyto(y) -A = tvm.placeholder((n,), name='A') -B = tvm.extern(A.shape, [A], lambda ins, outs: tvm.call_packed( +A = te.placeholder((n,), name='A') +B = te.extern(A.shape, [A], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.my_tvm_addone", ins[0], outs[0]), name="C") -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.random.uniform(size=(n,)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), ctx) @@ -119,7 +120,7 @@ def my_tvm_addone(x, y): ###################################################################### # Summary # ------- -# - TVM calls extern tensor function via :any:`tvm.extern` +# - TVM calls extern tensor function via :any:`te.extern` # - Use contrib wrappers for short sugars of extern tensor calls. # - We can hook front-end function as extern tensor callbacks. # diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index 59bf79d130926..eebab3f6c3c3d 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -31,6 +31,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -38,19 +39,19 @@ # ------------------------------- # The most straight-forward way to call target specific function is via # extern function call construct in tvm. -# In the following example, we use :any:`tvm.call_pure_extern` to call +# In the following example, we use :any:`tvm.tir.call_pure_extern` to call # :code:`__expf` function, which is only available under CUDA. # -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, - lambda i: tvm.call_pure_extern("float32", "__expf", A[i]), +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, + lambda i: tvm.tir.call_pure_extern("float32", "__expf", A[i]), name="B") -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) num_thread = 64 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) f = tvm.build(s, [A, B], "cuda", name="myexp") print(f.imported_modules[0].get_source()) @@ -64,22 +65,22 @@ # # TVM intrinsic provides the user a mechanism to achieve this, and this # is the recommended way to solve the problem. -# The following code use tvm.exp instead, which create an intrinsic call -# :any:`tvm.exp` to do the exponential. +# The following code use te.exp instead, which create an intrinsic call +# :any:`te.exp` to do the exponential. # -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, lambda i: tvm.exp(A[i]), name="B") -s = tvm.create_schedule(B.op) +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, lambda i: te.exp(A[i]), name="B") +s = te.create_schedule(B.op) num_thread = 64 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) fcuda = tvm.build(s, [A, B], "cuda", name="myexp") print(fcuda.imported_modules[0].get_source()) ###################################################################### # We can find that the code works for both CUDA and opencl. -# The same tvm.exp can also be used for float64 data types. +# The same te.exp can also be used for float64 data types. # fopencl = tvm.build(s, [A, B], "opencl", name="myexp") print(fopencl.imported_modules[0].get_source()) @@ -87,7 +88,7 @@ ###################################################################### # Intrinsic Lowering Rule # ----------------------- -# When :any:`tvm.exp` is called, TVM creates an intrinsic Call Expr. +# When :any:`te.exp` is called, TVM creates an intrinsic Call Expr. # TVM uses transformation rules to transform the intrinsic # call to device specific extern calls. # @@ -101,10 +102,10 @@ def my_cuda_math_rule(op): assert isinstance(op, tvm.tir.Call) if op.dtype == "float32": # call float function - return tvm.call_pure_extern("float32", "%sf" % op.name, op.args[0]) + return tvm.tir.call_pure_extern("float32", "%sf" % op.name, op.args[0]) elif op.dtype == "float64": # call double function - return tvm.call_pure_extern("float32", op.name, op.args[0]) + return tvm.tir.call_pure_extern("float32", op.name, op.args[0]) else: # cannot do translation, return self. return op @@ -131,29 +132,29 @@ def my_cuda_math_rule(op): def mylog(x): """customized log intrinsic function""" - return tvm.call_pure_intrin(x.dtype, "mylog", x) + return tvm.tir.call_pure_intrin(x.dtype, "mylog", x) def my_cuda_mylog_rule(op): """CUDA lowering rule for log""" if op.dtype == "float32": - return tvm.call_pure_extern("float32", "logf", op.args[0]) + return tvm.tir.call_pure_extern("float32", "logf", op.args[0]) elif op.dtype == "float64": - return tvm.call_pure_extern("float64", "log", op.args[0]) + return tvm.tir.call_pure_extern("float64", "log", op.args[0]) else: return op tvm.target.register_intrin_rule("cuda", "mylog", my_cuda_mylog_rule, override=True) -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, lambda i: mylog(A[i]), name="B") -s = tvm.create_schedule(B.op) +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, lambda i: mylog(A[i]), name="B") +s = te.create_schedule(B.op) num_thread = 64 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) fcuda = tvm.build(s, [A, B], "cuda", name="mylog") print(fcuda.imported_modules[0].get_source()) @@ -162,6 +163,6 @@ def my_cuda_mylog_rule(op): # ------- # - TVM can call extern target dependent math function. # - Use intrinsic to defined a unified interface for the functions. -# - For more intrinsics available in tvm, take a look at :any:`tvm.intrin` +# - For more intrinsics available in tvm, take a look at :any:`tvm.tir` # - You can customize the intrinsic behavior by defining your own rules. # diff --git a/tutorials/language/reduction.py b/tutorials/language/reduction.py index 0b631cb077f43..cdfc94ef096f7 100644 --- a/tutorials/language/reduction.py +++ b/tutorials/language/reduction.py @@ -28,6 +28,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -38,8 +39,8 @@ # # The following lines describe the row sum operation. # To create a reduction formula, we declare a reduction axis using -# :any:`tvm.reduce_axis`. :any:`tvm.reduce_axis` takes in the range of reductions. -# :any:`tvm.sum` takes in the expression to be reduced as well as the reduction +# :any:`te.reduce_axis`. :any:`te.reduce_axis` takes in the range of reductions. +# :any:`te.sum` takes in the expression to be reduced as well as the reduction # axis and compute the sum of value over all k in the declared range. # # The equivalent C code is as follows: @@ -53,11 +54,11 @@ # } # } # -n = tvm.var("n") -m = tvm.var("m") -A = tvm.placeholder((n, m), name='A') -k = tvm.reduce_axis((0, m), "k") -B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") +n = te.var("n") +m = te.var("m") +A = te.placeholder((n, m), name='A') +k = te.reduce_axis((0, m), "k") +B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") ###################################################################### # Schedule the Reduction @@ -65,7 +66,7 @@ # There are several ways to schedule a reduction. # Before doing anything, let us print out the IR code of default schedule. # -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### @@ -81,8 +82,8 @@ ###################################################################### # If we are building a GPU kernel, we can bind the rows of B to GPU threads. -s[B].bind(xo, tvm.thread_axis("blockIdx.x")) -s[B].bind(xi, tvm.thread_axis("threadIdx.x")) +s[B].bind(xo, te.thread_axis("blockIdx.x")) +s[B].bind(xi, te.thread_axis("threadIdx.x")) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### @@ -97,7 +98,7 @@ # In the following schedule, the result of B is written to a temporary # result B.rf. The factored dimension becomes the first dimension of B.rf. # -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) ko, ki = s[B].split(B.op.reduce_axis[0], factor=16) BF = s.rfactor(B, ki) print(tvm.lower(s, [A, B], simple_mode=True)) @@ -122,9 +123,9 @@ # columns by threadIdx.x and finally do a cross thread reduction over threadIdx.x # xo, xi = s[B].split(s[B].op.axis[0], factor=32) -s[B].bind(xo, tvm.thread_axis("blockIdx.x")) -s[B].bind(xi, tvm.thread_axis("threadIdx.y")) -tx = tvm.thread_axis("threadIdx.x") +s[B].bind(xo, te.thread_axis("blockIdx.x")) +s[B].bind(xi, te.thread_axis("threadIdx.y")) +tx = te.thread_axis("threadIdx.x") s[B].bind(s[B].op.reduce_axis[0], tx) s[BF].compute_at(s[B], s[B].op.reduce_axis[0]) s[B].set_store_predicate(tx.var.equal(0)) @@ -148,16 +149,16 @@ # In TVM, we can describe convolution via 2D reduction in a simple way. # Here is an example for 2D convolution with filter size = [3, 3] and strides = [1, 1]. # -n = tvm.var('n') -Input = tvm.placeholder((n, n), name='Input') -Filter = tvm.placeholder((3, 3), name='Filter') -di = tvm.reduce_axis((0, 3), name='di') -dj = tvm.reduce_axis((0, 3), name='dj') -Output = tvm.compute( +n = te.var('n') +Input = te.placeholder((n, n), name='Input') +Filter = te.placeholder((3, 3), name='Filter') +di = te.reduce_axis((0, 3), name='di') +dj = te.reduce_axis((0, 3), name='dj') +Output = te.compute( (n - 2, n - 2), - lambda i, j: tvm.sum(Input[i + di, j + dj] * Filter[di, dj], axis=[di, dj]), + lambda i, j: te.sum(Input[i + di, j + dj] * Filter[di, dj], axis=[di, dj]), name='Output') -s = tvm.create_schedule(Output.op) +s = te.create_schedule(Output.op) print(tvm.lower(s, [Input, Filter, Output], simple_mode=True)) ###################################################################### @@ -165,18 +166,18 @@ # # Define General Commutative Reduction Operation # ---------------------------------------------- -# Besides the built-in reduction operations like :any:`tvm.sum`, -# :any:`tvm.min` and :any:`tvm.max`, you can also define your -# commutative reduction operation by :any:`tvm.comm_reducer`. +# Besides the built-in reduction operations like :any:`te.sum`, +# :any:`tvm.te.min` and :any:`tvm.te.max`, you can also define your +# commutative reduction operation by :any:`te.comm_reducer`. # -n = tvm.var('n') -m = tvm.var('m') -product = tvm.comm_reducer(lambda x, y: x*y, - lambda t: tvm.const(1, dtype=t), name="product") -A = tvm.placeholder((n, m), name='A') -k = tvm.reduce_axis((0, m), name='k') -B = tvm.compute((n,), lambda i: product(A[i, k], axis=k), name='B') +n = te.var('n') +m = te.var('m') +product = te.comm_reducer(lambda x, y: x*y, + lambda t: tvm.tir.const(1, dtype=t), name="product") +A = te.placeholder((n, m), name='A') +k = te.reduce_axis((0, m), name='k') +B = te.compute((n,), lambda i: product(A[i, k], axis=k), name='B') ###################################################################### # .. note:: @@ -192,4 +193,4 @@ # # - Describe reduction with reduce_axis. # - Use rfactor to factor out axis if we need parallelism. -# - Define new reduction operation by :any:`tvm.comm_reducer` +# - Define new reduction operation by :any:`te.comm_reducer` diff --git a/tutorials/language/scan.py b/tutorials/language/scan.py index 2fa9c210ead21..73790da275001 100644 --- a/tutorials/language/scan.py +++ b/tutorials/language/scan.py @@ -25,6 +25,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -46,13 +47,13 @@ # The result of the scan is a tensor, giving the result of :code:`s_state` after the # update over the time domain. # -m = tvm.var("m") -n = tvm.var("n") -X = tvm.placeholder((m, n), name="X") -s_state = tvm.placeholder((m, n)) -s_init = tvm.compute((1, n), lambda _, i: X[0, i]) -s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) -s_scan = tvm.scan(s_init, s_update, s_state, inputs=[X]) +m = te.var("m") +n = te.var("n") +X = te.placeholder((m, n), name="X") +s_state = te.placeholder((m, n)) +s_init = te.compute((1, n), lambda _, i: X[0, i]) +s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) +s_scan = tvm.te.scan(s_init, s_update, s_state, inputs=[X]) ###################################################################### # Schedule the Scan Cell @@ -62,10 +63,10 @@ # first iteration dimension of the update part. # To split on the time iteration, user can schedule on scan_op.scan_axis instead. # -s = tvm.create_schedule(s_scan.op) +s = te.create_schedule(s_scan.op) num_thread = 256 -block_x = tvm.thread_axis("blockIdx.x") -thread_x = tvm.thread_axis("threadIdx.x") +block_x = te.thread_axis("blockIdx.x") +thread_x = te.thread_axis("threadIdx.x") xo, xi = s[s_init].split(s_init.op.axis[1], factor=num_thread) s[s_init].bind(xo, block_x) s[s_init].bind(xi, thread_x) @@ -100,21 +101,21 @@ # The following lines demonstrate a scan with two stage operations # in the scan cell. # -m = tvm.var("m") -n = tvm.var("n") -X = tvm.placeholder((m, n), name="X") -s_state = tvm.placeholder((m, n)) -s_init = tvm.compute((1, n), lambda _, i: X[0, i]) -s_update_s1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] * 2, name="s1") -s_update_s2 = tvm.compute((m, n), lambda t, i: s_update_s1[t, i] + X[t, i], name="s2") -s_scan = tvm.scan(s_init, s_update_s2, s_state, inputs=[X]) +m = te.var("m") +n = te.var("n") +X = te.placeholder((m, n), name="X") +s_state = te.placeholder((m, n)) +s_init = te.compute((1, n), lambda _, i: X[0, i]) +s_update_s1 = te.compute((m, n), lambda t, i: s_state[t-1, i] * 2, name="s1") +s_update_s2 = te.compute((m, n), lambda t, i: s_update_s1[t, i] + X[t, i], name="s2") +s_scan = tvm.te.scan(s_init, s_update_s2, s_state, inputs=[X]) ###################################################################### # These intermediate tensors can also be scheduled normally. # To ensure correctness, TVM creates a group constraint to forbid # the body of scan to be compute_at locations outside the scan loop. # -s = tvm.create_schedule(s_scan.op) +s = te.create_schedule(s_scan.op) xo, xi = s[s_update_s2].split(s_update_s2.op.axis[1], factor=32) s[s_update_s1].compute_at(s[s_update_s2], xo) print(tvm.lower(s, [X, s_scan], simple_mode=True)) @@ -126,20 +127,20 @@ # recurrent state. Scan support multiple recurrent states. # The following example demonstrates how we can build recurrence with two states. # -m = tvm.var("m") -n = tvm.var("n") -l = tvm.var("l") -X = tvm.placeholder((m, n), name="X") -s_state1 = tvm.placeholder((m, n)) -s_state2 = tvm.placeholder((m, l)) -s_init1 = tvm.compute((1, n), lambda _, i: X[0, i]) -s_init2 = tvm.compute((1, l), lambda _, i: 0.0) -s_update1 = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + X[t, i]) -s_update2 = tvm.compute((m, l), lambda t, i: s_state2[t-1, i] + s_state1[t-1, 0]) -s_scan1, s_scan2 = tvm.scan([s_init1, s_init2], +m = te.var("m") +n = te.var("n") +l = te.var("l") +X = te.placeholder((m, n), name="X") +s_state1 = te.placeholder((m, n)) +s_state2 = te.placeholder((m, l)) +s_init1 = te.compute((1, n), lambda _, i: X[0, i]) +s_init2 = te.compute((1, l), lambda _, i: 0.0) +s_update1 = te.compute((m, n), lambda t, i: s_state1[t-1, i] + X[t, i]) +s_update2 = te.compute((m, l), lambda t, i: s_state2[t-1, i] + s_state1[t-1, 0]) +s_scan1, s_scan2 = tvm.te.scan([s_init1, s_init2], [s_update1, s_update2], [s_state1, s_state2], inputs=[X]) -s = tvm.create_schedule(s_scan1.op) +s = te.create_schedule(s_scan1.op) print(tvm.lower(s, [X, s_scan1, s_scan2], simple_mode=True)) ###################################################################### diff --git a/tutorials/language/schedule_primitives.py b/tutorials/language/schedule_primitives.py index e59264f298981..61bfcad1f3a95 100644 --- a/tutorials/language/schedule_primitives.py +++ b/tutorials/language/schedule_primitives.py @@ -27,6 +27,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -41,19 +42,19 @@ # # declare some variables for use later -n = tvm.var('n') -m = tvm.var('m') +n = te.var('n') +m = te.var('m') ###################################################################### # A schedule can be created from a list of ops, by default the # schedule computes tensor in a serial manner in a row-major order. # declare a matrix element-wise multiply -A = tvm.placeholder((m, n), name='A') -B = tvm.placeholder((m, n), name='B') -C = tvm.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C') +A = te.placeholder((m, n), name='A') +B = te.placeholder((m, n), name='B') +C = te.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C') -s = tvm.create_schedule([C.op]) +s = te.create_schedule([C.op]) # lower will transform the computation from definition to the real # callable function. With argument `simple_mode=True`, it will # return you a readable C like statement, we use it here to print the @@ -70,20 +71,20 @@ # ----- # :code:`split` can split a specified axis into two axises by # :code:`factor`. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]*2, name='B') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]*2, name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=32) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### # You can also split a axis by :code:`nparts`, which splits the axis # contrary with :code:`factor`. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i], name='B') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], nparts=32) print(tvm.lower(s, [A, B], simple_mode=True)) @@ -92,10 +93,10 @@ # ---- # :code:`tile` help you execute the computation tile by tile over two # axises. -A = tvm.placeholder((m, n), name='A') -B = tvm.compute((m, n), lambda i, j: A[i, j], name='B') +A = te.placeholder((m, n), name='A') +B = te.compute((m, n), lambda i, j: A[i, j], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5) print(tvm.lower(s, [A, B], simple_mode=True)) @@ -103,10 +104,10 @@ # fuse # ---- # :code:`fuse` can fuse two consecutive axises of one computation. -A = tvm.placeholder((m, n), name='A') -B = tvm.compute((m, n), lambda i, j: A[i, j], name='B') +A = te.placeholder((m, n), name='A') +B = te.compute((m, n), lambda i, j: A[i, j], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) # tile to four axises first: (i.outer, j.outer, i.inner, j.inner) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5) # then fuse (i.inner, j.inner) into one axis: (i.inner.j.inner.fused) @@ -117,10 +118,10 @@ # reorder # ------- # :code:`reorder` can reorder the axises in the specified order. -A = tvm.placeholder((m, n), name='A') -B = tvm.compute((m, n), lambda i, j: A[i, j], name='B') +A = te.placeholder((m, n), name='A') +B = te.compute((m, n), lambda i, j: A[i, j], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) # tile to four axises first: (i.outer, j.outer, i.inner, j.inner) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5) # then reorder the axises: (i.inner, j.outer, i.outer, j.inner) @@ -132,13 +133,13 @@ # ---- # :code:`bind` can bind a specified axis with a thread axis, often used # in gpu programming. -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, lambda i: A[i] * 2, name='B') +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, lambda i: A[i] * 2, name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=64) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### @@ -146,21 +147,21 @@ # ---------- # For a schedule that consists of multiple operators, TVM will compute # tensors at the root separately by default. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) ###################################################################### # :code:`compute_at` can move computation of `B` into the first axis # of computation of `C`. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B].compute_at(s[C], C.op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) @@ -170,11 +171,11 @@ # :code:`compute_inline` can mark one stage as inline, then the body of # computation will be expanded and inserted at the address where the # tensor is required. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B].compute_inline() print(tvm.lower(s, [A, B, C], simple_mode=True)) @@ -182,11 +183,11 @@ # compute_root # ------------ # :code:`compute_root` can move computation of one stage to the root. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B].compute_at(s[C], C.op.axis[0]) s[B].compute_root() print(tvm.lower(s, [A, B, C], simple_mode=True)) diff --git a/tutorials/language/tedd.py b/tutorials/language/tedd.py index aa3fa152a5195..a56c3f0da3922 100644 --- a/tutorials/language/tedd.py +++ b/tutorials/language/tedd.py @@ -1,47 +1,46 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Use Tensor Expression Debug Display (TEDD) for Visualization -============================================================ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Use Tensor Expression Debug Display (TEDD) for Visualization +============================================================ **Author**: `Yongfeng Gu `_ -This is an introduction about using TEDD to visualize tensor expressions. +This is an introduction about using TEDD to visualize tensor expressions. -Tensor Expressions are scheduled with primitives. Although individual -primitives are usually easy to understand, they become complicated quickly -when you put them together. We have introduced an operational model of -schedule primitives in Tensor Expression in this document -(https://docs.google.com/document/d/1nmz00_n4Ju-SpYN0QFl3abTHTlR_P0dRyo5zsWC0Q1k/edit?usp=sharing) -to make it easier to understand +Tensor Expressions are scheduled with primitives. Although individual +primitives are usually easy to understand, they become complicated quickly +when you put them together. We have introduced an operational model of +schedule primitives in Tensor Expression in this document +(https://docs.google.com/document/d/1nmz00_n4Ju-SpYN0QFl3abTHTlR_P0dRyo5zsWC0Q1k/edit?usp=sharing) +to make it easier to understand -* the interactions between different schedule primitives, -* the impact of the schedule primitives on the final code generation. +* the interactions between different schedule primitives, +* the impact of the schedule primitives on the final code generation. -The operational model is based on a Dataflow Graph, a Schedule Tree and an -IterVar Relationship Graph. Schedule primitives perform operations on these +The operational model is based on a Dataflow Graph, a Schedule Tree and an +IterVar Relationship Graph. Schedule primitives perform operations on these graphs. -TEDD renders these three graphs from a given schedule. This tutorial demonstrates -how to use TEDD and how to interpret the rendered graphs. +TEDD renders these three graphs from a given schedule. This tutorial demonstrates +how to use TEDD and how to interpret the rendered graphs. """ -from __future__ import absolute_import, print_function - import tvm +from tvm import te import topi from tvm.contrib import tedd @@ -60,42 +59,43 @@ stride = 1 padding = "SAME" dilation=1 -A = tvm.placeholder((in_size, in_size, in_channel, batch), name='A') -W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') -B = tvm.placeholder((1, num_filter, 1), name='bias') -with tvm.target.create("cuda"): - t_conv = topi.nn.conv2d(A, W, stride, padding, dilation, layout='HWCN') +A = te.placeholder((in_size, in_size, in_channel, batch), name='A') +W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') +B = te.placeholder((1, num_filter, 1), name='bias') + +with tvm.target.create("llvm"): + t_conv = topi.nn.conv2d_hwcn(A, W, stride, padding, dilation) t_bias = topi.add(t_conv, B) t_relu = topi.nn.relu(t_bias) - s = topi.generic.schedule_conv2d_hwcn([t_relu]) - -###################################################################### + s = topi.generic.schedule_conv2d_hwcn([t_relu]) + +###################################################################### # Render Graphs with TEDD # ----------------------- -# We render graphs to see the computation -# and how it is scheduled. -# If you run the tutorial in a Jupyter notebook, you can use the following commented lines +# We render graphs to see the computation +# and how it is scheduled. +# If you run the tutorial in a Jupyter notebook, you can use the following commented lines # to render SVG figures showing in notebook directly. # -tedd.viz_dataflow_graph(s, dot_file_path = '/tmp/dfg.dot') -#tedd.viz_dataflow_graph(s, show_svg = True) +tedd.viz_dataflow_graph(s, dot_file_path = '/tmp/dfg.dot') +#tedd.viz_dataflow_graph(s, show_svg = True) ###################################################################### # .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_dfg.png # :align: center # :scale: 100% # -# The first one is a dataflow graph. Every node represents a stage with name and memory -# scope shown in the middle and inputs/outputs information on the sides. -# Edges show nodes' dependency. +# The first one is a dataflow graph. Every node represents a stage with name and memory +# scope shown in the middle and inputs/outputs information on the sides. +# Edges show nodes' dependency. # -tedd.viz_schedule_tree(s, dot_file_path = '/tmp/scheduletree.dot') -#tedd.viz_schedule_tree(s, show_svg = True) +tedd.viz_schedule_tree(s, dot_file_path = '/tmp/scheduletree.dot') +#tedd.viz_schedule_tree(s, show_svg = True) ###################################################################### -# We just rendered the schedule tree graph. You may notice an warning about ranges not +# We just rendered the schedule tree graph. You may notice an warning about ranges not # available. # The message also suggests to call normalize() to infer range information. We will # skip inspecting the first schedule tree and encourage you to compare the graphs before @@ -103,62 +103,62 @@ # s = s.normalize() -tedd.viz_schedule_tree(s, dot_file_path = '/tmp/scheduletree2.dot') -#tedd.viz_schedule_tree(s, show_svg = True) +tedd.viz_schedule_tree(s, dot_file_path = '/tmp/scheduletree2.dot') +#tedd.viz_schedule_tree(s, show_svg = True) ###################################################################### # .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_st.png # :align: center # :scale: 100% # -# Now, let us take a close look at the second schedule tree. Every block under ROOT -# represents a -# stage. Stage name shows in the top row and compute shows in the bottom row. -# The middle rows are for IterVars, the higher the outer, the lower the inner. -# An IterVar row contains its index, name, type, and other optional information. -# Let's use the W.shared stage as an example. The top row tells -# its name, "W.shared", and memory scope, "Shared". Its compute is -# :code:`W(ax0, ax1, ax2, ax3)`. -# Its outer most loop IterVar is ax0.ax1.fused.ax2.fused.ax3.fused.outer, -# indexed with 0, of kDataPar, bound to threadIdx.y, and with range(min=0, ext=8). -# You can also tell -# IterVar type with the index box color, shown in the legend. -# -# If a stage doesn't compute_at any other stage, it has an edge directly to the -# ROOT node. Otherwise, it has an edge pointing to the IterVar it attaches to, -# such as W.shared attaches to rx.outer in the middle compute stage. +# Now, let us take a close look at the second schedule tree. Every block under ROOT +# represents a +# stage. Stage name shows in the top row and compute shows in the bottom row. +# The middle rows are for IterVars, the higher the outer, the lower the inner. +# An IterVar row contains its index, name, type, and other optional information. +# Let's use the W.shared stage as an example. The top row tells +# its name, "W.shared", and memory scope, "Shared". Its compute is +# :code:`W(ax0, ax1, ax2, ax3)`. +# Its outer most loop IterVar is ax0.ax1.fused.ax2.fused.ax3.fused.outer, +# indexed with 0, of kDataPar, bound to threadIdx.y, and with range(min=0, ext=8). +# You can also tell +# IterVar type with the index box color, shown in the legend. +# +# If a stage doesn't compute_at any other stage, it has an edge directly to the +# ROOT node. Otherwise, it has an edge pointing to the IterVar it attaches to, +# such as W.shared attaches to rx.outer in the middle compute stage. # -###################################################################### -# .. note:: -# -# By definition, IterVars are internal nodes and computes are leaf nodes in -# a schedule tree. The edges among IterVars and compute within one stage are +###################################################################### +# .. note:: +# +# By definition, IterVars are internal nodes and computes are leaf nodes in +# a schedule tree. The edges among IterVars and compute within one stage are # omitted, making every stage a block, for better readability. # -tedd.viz_itervar_relationship_graph(s, dot_file_path = '/tmp/itervar.dot') -#tedd.viz_itervar_relationship_graph(s, show_svg = True) +tedd.viz_itervar_relationship_graph(s, dot_file_path = '/tmp/itervar.dot') +#tedd.viz_itervar_relationship_graph(s, show_svg = True) ###################################################################### # .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_itervar_rel.png # :align: center # :scale: 100% # -# The last one is an IterVar Relationship Graph. Every subgraph represents a -# stage and contains IterVar nodes and transformation nodes. For example, -# W.shared has three split nodes and three fuse nodes. The rest are IterVar -# nodes of the same format as the IterVar rows in Schedule Trees. Root -# IterVars are those not driven by any transformation node, such as ax0; leaf -# IterVars don't drive any transformation node and have non-negative indices, +# The last one is an IterVar Relationship Graph. Every subgraph represents a +# stage and contains IterVar nodes and transformation nodes. For example, +# W.shared has three split nodes and three fuse nodes. The rest are IterVar +# nodes of the same format as the IterVar rows in Schedule Trees. Root +# IterVars are those not driven by any transformation node, such as ax0; leaf +# IterVars don't drive any transformation node and have non-negative indices, # such as ax0.ax1.fused.ax2.fused.ax3.fused.outer with index of 0. # -###################################################################### -# Summary -# ------- -# This tutorial demonstrates the usage of TEDD. We use an example built -# with TOPI to show the schedules under the hood. You can also use +###################################################################### +# Summary +# ------- +# This tutorial demonstrates the usage of TEDD. We use an example built +# with TOPI to show the schedules under the hood. You can also use # it before and after any schedule primitive to inspect its effect. -# \ No newline at end of file +# diff --git a/tutorials/language/tensorize.py b/tutorials/language/tensorize.py index afc708e5d1d3c..d80a7ca584450 100644 --- a/tutorials/language/tensorize.py +++ b/tutorials/language/tensorize.py @@ -35,6 +35,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -46,12 +47,12 @@ # The following lines describe the computation :code:`A * B^T` in TVM. # N, M, L = 1024, 512, 64 -A = tvm.placeholder((N, L), name='A') -B = tvm.placeholder((M, L), name='B') -k = tvm.reduce_axis((0, L), name='k') -C = tvm.compute((N, M), lambda i, j: - tvm.sum(A[i, k] * B[j, k], axis=k), name='C') -s = tvm.create_schedule(C.op) +A = te.placeholder((N, L), name='A') +B = te.placeholder((M, L), name='B') +k = te.reduce_axis((0, L), name='k') +C = te.compute((N, M), lambda i, j: + te.sum(A[i, k] * B[j, k], axis=k), name='C') +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) ###################################################################### @@ -88,19 +89,19 @@ # which is done in :code:`intrin_func` below. # def intrin_gemv(m, l): - a = tvm.placeholder((l,), name='a') - b = tvm.placeholder((m, l), name='b') - k = tvm.reduce_axis((0, l), name='k') - c = tvm.compute((m,), lambda i: tvm.sum(a[k] * b[i, k], axis=k), name='c') - Ab = tvm.decl_buffer(a.shape, a.dtype, + a = te.placeholder((l,), name='a') + b = te.placeholder((m, l), name='b') + k = te.reduce_axis((0, l), name='k') + c = te.compute((m,), lambda i: te.sum(a[k] * b[i, k], axis=k), name='c') + Ab = tvm.tir.decl_buffer(a.shape, a.dtype, name="A", offset_factor=1, strides=[1]) - Bb = tvm.decl_buffer(b.shape, b.dtype, + Bb = tvm.tir.decl_buffer(b.shape, b.dtype, name="B", offset_factor=1, - strides=[tvm.var("s1"), 1]) - Cb = tvm.decl_buffer(c.shape, c.dtype, + strides=[te.var("s1"), 1]) + Cb = tvm.tir.decl_buffer(c.shape, c.dtype, name="C", offset_factor=1, strides=[1]) @@ -108,7 +109,7 @@ def intrin_func(ins, outs): ib = tvm.ir_builder.create() aa, bb = ins cc = outs[0] - ib.emit(tvm.call_extern("int32", "gemv_update", + ib.emit(tvm.tir.call_extern("int32", "gemv_update", cc.access_ptr("w"), aa.access_ptr("r"), bb.access_ptr("r"), @@ -134,7 +135,7 @@ def intrin_func(ins, outs): # For now :code:`bb.strides[0] == l`, # but later we will see how they can differ with more complicated schedules. # -# Note that we use :code:`tvm.var("s1")` as the first stride dimension for :code:`B`. +# Note that we use :code:`te.var("s1")` as the first stride dimension for :code:`B`. # If the strides can be inferred # - in this case, TVM knows tensor B is compact thus the strides are :code:`[L, 1]` - # such placeholder can be put to let TVM automatically bind the inferred value for us. @@ -233,20 +234,20 @@ def gemv_impl(): return ll_code def intrin_gemv(m, l): - a = tvm.placeholder((l,), name='a') - b = tvm.placeholder((m, l), name='b') - k = tvm.reduce_axis((0, l), name='k') - c = tvm.compute((m,), lambda i: - tvm.sum(a[k] * b[i, k], axis=k), name='c') - Ab = tvm.decl_buffer(a.shape, a.dtype, + a = te.placeholder((l,), name='a') + b = te.placeholder((m, l), name='b') + k = te.reduce_axis((0, l), name='k') + c = te.compute((m,), lambda i: + te.sum(a[k] * b[i, k], axis=k), name='c') + Ab = tvm.tir.decl_buffer(a.shape, a.dtype, name="A", offset_factor=1, strides=[1]) - Bb = tvm.decl_buffer(b.shape, b.dtype, + Bb = tvm.tir.decl_buffer(b.shape, b.dtype, name="B", offset_factor=1, - strides=[tvm.var("s1"), 1]) - Cb = tvm.decl_buffer(c.shape, c.dtype, + strides=[te.var("s1"), 1]) + Cb = tvm.tir.decl_buffer(c.shape, c.dtype, name="C", offset_factor=1, strides=[1]) @@ -255,7 +256,7 @@ def intrin_func(ins, outs): cc = outs[0] def _body(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("int32", "gemv_update", + ib.emit(tvm.tir.call_extern("int32", "gemv_update", cc.access_ptr("w"), aa.access_ptr("r"), bb.access_ptr("r"), @@ -263,7 +264,7 @@ def _body(): return ib.get() def _reduce_reset(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m)) + ib.emit(tvm.tir.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m)) return ib.get() def _reduce_update(): return _body() diff --git a/tutorials/language/tuple_inputs.py b/tutorials/language/tuple_inputs.py index 715e2ef36f7e3..828797ac1b1db 100644 --- a/tutorials/language/tuple_inputs.py +++ b/tutorials/language/tuple_inputs.py @@ -28,23 +28,24 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### # Describe Batchwise Computation # ------------------------------ # For operators which have the same shape, we can put them together as -# the inputs of :any:`tvm.compute`, if we want them to be scheduled +# the inputs of :any:`te.compute`, if we want them to be scheduled # together in the next schedule procedure. # -n = tvm.var("n") -m = tvm.var("m") -A0 = tvm.placeholder((m, n), name='A0') -A1 = tvm.placeholder((m, n), name='A1') -B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] + 2, A1[i, j] * 3), name='B') +n = te.var("n") +m = te.var("m") +A0 = te.placeholder((m, n), name='A0') +A1 = te.placeholder((m, n), name='A1') +B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] + 2, A1[i, j] * 3), name='B') # The generated IR code would be: -s = tvm.create_schedule(B0.op) +s = te.create_schedule(B0.op) print(tvm.lower(s, [A0, A1, B0, B1], simple_mode=True)) ###################################################################### @@ -56,7 +57,7 @@ # operators, and the inputs will collaborate together, e.g. :code:`argmax`. # In the reduction procedure, :code:`argmax` need to compare the value of # operands, also need to keep the index of operand. It can be expressed -# with :py:func:`tvm.comm_reducer` as below: +# with :py:func:`te.comm_reducer` as below: # x and y are the operands of reduction, both of them is a tuple of index # and value. @@ -68,20 +69,20 @@ def fcombine(x, y): # our identity element also need to be a tuple, so `fidentity` accepts # two types as inputs. def fidentity(t0, t1): - return tvm.const(-1, t0), tvm.min_value(t1) + return tvm.tir.const(-1, t0), tvm.te.min_value(t1) -argmax = tvm.comm_reducer(fcombine, fidentity, name='argmax') +argmax = te.comm_reducer(fcombine, fidentity, name='argmax') # describe the reduction computation -m = tvm.var('m') -n = tvm.var('n') -idx = tvm.placeholder((m, n), name='idx', dtype='int32') -val = tvm.placeholder((m, n), name='val', dtype='int32') -k = tvm.reduce_axis((0, n), 'k') -T0, T1 = tvm.compute((m, ), lambda i: argmax((idx[i, k], val[i, k]), axis=k), name='T') +m = te.var('m') +n = te.var('n') +idx = te.placeholder((m, n), name='idx', dtype='int32') +val = te.placeholder((m, n), name='val', dtype='int32') +k = te.reduce_axis((0, n), 'k') +T0, T1 = te.compute((m, ), lambda i: argmax((idx[i, k], val[i, k]), axis=k), name='T') # the generated IR code would be: -s = tvm.create_schedule(T0.op) +s = te.create_schedule(T0.op) print(tvm.lower(s, [idx, val, T0, T1], simple_mode=True)) ###################################################################### @@ -97,14 +98,14 @@ def fidentity(t0, t1): # with one batch operation, but they can only be scheduled together # in terms of operation. -n = tvm.var("n") -m = tvm.var("m") -A0 = tvm.placeholder((m, n), name='A0') -B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] + 2, A0[i, j] * 3), name='B') -A1 = tvm.placeholder((m, n), name='A1') -C = tvm.compute((m, n), lambda i, j: A1[i, j] + B0[i, j], name='C') +n = te.var("n") +m = te.var("m") +A0 = te.placeholder((m, n), name='A0') +B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] + 2, A0[i, j] * 3), name='B') +A1 = te.placeholder((m, n), name='A1') +C = te.compute((m, n), lambda i, j: A1[i, j] + B0[i, j], name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B0].compute_at(s[C], C.op.axis[0]) # as you can see in the below generated IR code: print(tvm.lower(s, [A0, A1, C], simple_mode=True)) diff --git a/tutorials/optimize/opt_conv_cuda.py b/tutorials/optimize/opt_conv_cuda.py index 74d1e6d8b6a0c..025e53eb012a9 100644 --- a/tutorials/optimize/opt_conv_cuda.py +++ b/tutorials/optimize/opt_conv_cuda.py @@ -42,6 +42,7 @@ import numpy as np import tvm +from tvm import te # The sizes of inputs and filters batch = 256 @@ -53,25 +54,25 @@ stride = 1 # Algorithm -A = tvm.placeholder((in_size, in_size, in_channel, batch), name='A') -W = tvm.placeholder((kernel, kernel, in_channel, out_channel), name='W') +A = te.placeholder((in_size, in_size, in_channel, batch), name='A') +W = te.placeholder((kernel, kernel, in_channel, out_channel), name='W') out_size = (in_size - kernel + 2*pad) // stride + 1 # Pad input -Apad = tvm.compute( +Apad = te.compute( (in_size + 2*pad, in_size + 2*pad, in_channel, batch), - lambda yy, xx, cc, nn: tvm.if_then_else( - tvm.all(yy >= pad, yy - pad < in_size, + lambda yy, xx, cc, nn: tvm.tir.if_then_else( + tvm.tir.all(yy >= pad, yy - pad < in_size, xx >= pad, xx - pad < in_size), - A[yy - pad, xx - pad, cc, nn], tvm.const(0., "float32")), + A[yy - pad, xx - pad, cc, nn], tvm.tir.const(0., "float32")), name='Apad') # Create reduction variables -rc = tvm.reduce_axis((0, in_channel), name='rc') -ry = tvm.reduce_axis((0, kernel), name='ry') -rx = tvm.reduce_axis((0, kernel), name='rx') +rc = te.reduce_axis((0, in_channel), name='rc') +ry = te.reduce_axis((0, kernel), name='ry') +rx = te.reduce_axis((0, kernel), name='rx') # Compute the convolution -B = tvm.compute( +B = te.compute( (out_size, out_size, out_channel, batch), - lambda yy, xx, ff, nn: tvm.sum( + lambda yy, xx, ff, nn: te.sum( Apad[yy * stride + ry, xx * stride + rx, rc, nn] * W[ry, rx, rc, ff], axis=[ry, rx, rc]), name='B') @@ -101,7 +102,7 @@ # # Designate the memory hierarchy -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) s[Apad].compute_inline() # compute Apad inline AA = s.cache_read(Apad, 'shared', [B]) WW = s.cache_read(W, "shared", [B]) @@ -135,13 +136,13 @@ vthread = 2 # Get the GPU thread indices -block_x = tvm.thread_axis("blockIdx.x") -block_y = tvm.thread_axis("blockIdx.y") -block_z = tvm.thread_axis("blockIdx.z") -thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") -thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") -thread_xz = tvm.thread_axis((0, vthread), "vthread", name="vx") -thread_yz = tvm.thread_axis((0, vthread), "vthread", name="vy") +block_x = te.thread_axis("blockIdx.x") +block_y = te.thread_axis("blockIdx.y") +block_z = te.thread_axis("blockIdx.z") +thread_x = te.thread_axis((0, num_thread), "threadIdx.x") +thread_y = te.thread_axis((0, num_thread), "threadIdx.y") +thread_xz = te.thread_axis((0, vthread), "vthread", name="vx") +thread_yz = te.thread_axis((0, vthread), "vthread", name="vy") # Split the workloads hi, wi, fi, ni = s[B].op.axis diff --git a/tutorials/optimize/opt_conv_tensorcore.py b/tutorials/optimize/opt_conv_tensorcore.py index ef840892d7d55..2fa4fd7ef33b5 100644 --- a/tutorials/optimize/opt_conv_tensorcore.py +++ b/tutorials/optimize/opt_conv_tensorcore.py @@ -52,6 +52,7 @@ # NHWCnc memory layout.The following code defines the convolution algorithm in TVM. import tvm +from tvm import te import numpy as np from tvm.contrib import nvcc @@ -98,30 +99,30 @@ block_size) # Reduction axes -kh = tvm.reduce_axis((0, kernel_h), name='kh') -kw = tvm.reduce_axis((0, kernel_w), name='kw') -ic = tvm.reduce_axis((0, in_channels // block_size), name='ic') -ii = tvm.reduce_axis((0, block_size), name='ii') +kh = te.reduce_axis((0, kernel_h), name='kh') +kw = te.reduce_axis((0, kernel_w), name='kw') +ic = te.reduce_axis((0, in_channels // block_size), name='ic') +ii = te.reduce_axis((0, block_size), name='ii') # Algorithm -A = tvm.placeholder(data_shape, name='A', dtype="float16") -W = tvm.placeholder(kernel_shape, name='W', dtype="float16") -Apad = tvm.compute( +A = te.placeholder(data_shape, name='A', dtype="float16") +W = te.placeholder(kernel_shape, name='W', dtype="float16") +Apad = te.compute( (batch_size // block_size, height + 2 * pad_h, width + 2 * pad_w, in_channels // block_size, block_size, block_size), - lambda n, h, w, i, nn, ii: tvm.if_then_else( - tvm.all(h >= pad_h, h - pad_h < height, + lambda n, h, w, i, nn, ii: tvm.tir.if_then_else( + tvm.tir.all(h >= pad_h, h - pad_h < height, w >= pad_w, w - pad_w < width), - A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.const(0., "float16")), + A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.tir.const(0., "float16")), name='Apad') -Conv = tvm.compute(output_shape, - lambda n, h, w, o, nn, oo: tvm.sum( +Conv = te.compute(output_shape, + lambda n, h, w, o, nn, oo: te.sum( Apad[n, h * stride_h + kh, w * stride_w + kw, ic, nn, ii].astype("float32") * W[kh, kw, ic, o, ii, oo].astype("float32"), axis=[ic, kh, kw, ii]), name="Conv") -s = tvm.create_schedule(Conv.op) +s = te.create_schedule(Conv.op) s[Apad].compute_inline() ############################################################################### @@ -152,17 +153,17 @@ def intrin_wmma_load_matrix(scope): n = 16 - A = tvm.placeholder((n, n), name='A', dtype='float16') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256) - C = tvm.compute((n, n), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256) + A = te.placeholder((n, n), name='A', dtype='float16') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256) + C = te.compute((n, n), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256) def intrin_func(ins, outs): ib = tvm.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_load_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync', BC.data, n, n, n, BC.elem_offset // 256, BA.access_ptr('r'), n, 'row_major')) return ib.get() @@ -172,16 +173,16 @@ def intrin_func(ins, outs): def intrin_wmma_gemm(): n = 16 - A = tvm.placeholder((n, n), name='A', dtype='float16') - B = tvm.placeholder((n, n), name='B', dtype='float16') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((n, n), + A = te.placeholder((n, n), name='A', dtype='float16') + B = te.placeholder((n, n), name='B', dtype='float16') + k = te.reduce_axis((0, n), name="k") + C = te.compute((n, n), lambda ii, jj: - tvm.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), + te.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), name='C') - BA = tvm.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=256) - BB = tvm.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=256) - BC = tvm.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=256) + BA = tvm.tir.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=256) + BB = tvm.tir.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=256) + BC = tvm.tir.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=256) def intrin_func(ins, outs): BA, BB = ins @@ -189,12 +190,12 @@ def intrin_func(ins, outs): def init(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0)) + ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0)) return ib.get() def update(): ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_mma_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync', BC.data, BC.elem_offset // 256, BA.data, BA.elem_offset // 256, BB.data, BB.elem_offset // 256, @@ -208,16 +209,16 @@ def update(): def intrin_wmma_store_matrix(): n = 16 - A = tvm.placeholder((n, n), name='A', dtype='float32') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=256) - C = tvm.compute((n, n), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256) + A = te.placeholder((n, n), name='A', dtype='float32') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=256) + C = te.compute((n, n), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256) def intrin_func(ins, outs): ib = tvm.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_store_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync', BA.data, n, n, n, BA.elem_offset // 256, BC.access_ptr('w'), n, 'row_major')) return ib.get() @@ -255,12 +256,12 @@ def intrin_func(ins, outs): warp_size = 32 chunk = 2 -block_x = tvm.thread_axis('blockIdx.x') -block_y = tvm.thread_axis('blockIdx.y') -block_z = tvm.thread_axis('blockIdx.z') -thread_x = tvm.thread_axis('threadIdx.x') -thread_y = tvm.thread_axis('threadIdx.y') -thread_z = tvm.thread_axis('threadIdx.z') +block_x = te.thread_axis('blockIdx.x') +block_y = te.thread_axis('blockIdx.y') +block_z = te.thread_axis('blockIdx.z') +thread_x = te.thread_axis('threadIdx.x') +thread_y = te.thread_axis('threadIdx.y') +thread_z = te.thread_axis('threadIdx.z') nc, hc, wc, oc, nnc, ooc = Conv.op.axis block_k = s[Conv].fuse(hc, wc) diff --git a/tutorials/optimize/opt_gemm.py b/tutorials/optimize/opt_gemm.py index 8ed152aee9184..daca89b23a52b 100644 --- a/tutorials/optimize/opt_gemm.py +++ b/tutorials/optimize/opt_gemm.py @@ -56,6 +56,7 @@ # Then we write a baseline implementation, the simplest way to write a matrix multiplication in TVM. import tvm +from tvm import te import numpy import timeit @@ -94,16 +95,16 @@ answer = numpy.dot(a.asnumpy(), b.asnumpy()) # Algorithm -k = tvm.reduce_axis((0, K), 'k') -A = tvm.placeholder((M, K), name='A') -B = tvm.placeholder((K, N), name='B') -C = tvm.compute( +k = te.reduce_axis((0, K), 'k') +A = te.placeholder((M, K), name='A') +B = te.placeholder((K, N), name='B') +C = te.compute( (M, N), - lambda x, y: tvm.sum(A[x, k] * B[k, y], axis=k), + lambda x, y: te.sum(A[x, k] * B[k, y], axis=k), name='C') # Default schedule -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) func = tvm.build(s, [A, B, C], target=target, name='mmult') assert func @@ -129,7 +130,7 @@ # fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB (L1 data cache) bn = 32 -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) # Blocking by loop tiling xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) @@ -165,7 +166,7 @@ # # In this tutorial, we chose to vectorize the inner loop row data since it is cache friendly. -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) k, = s[C].op.reduce_axis ko, ki = s[C].split(k, factor=4) @@ -199,7 +200,7 @@ # which is not cache friendly. If we change the nested loop order of ki and inner axes xi, # the access pattern for A matrix is more cache friendly. -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) k, = s[C].op.reduce_axis ko, ki = s[C].split(k, factor=4) @@ -244,12 +245,12 @@ # # We have to re-write the algorithm slightly. -packedB = tvm.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB') -C = tvm.compute((M, N), - lambda x, y: tvm.sum(A[x, k] * packedB[y // bn, k, tvm.indexmod(y, bn)], axis=k), +packedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB') +C = te.compute((M, N), + lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k), name = 'C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) k, = s[C].op.reduce_axis @@ -285,7 +286,7 @@ # write to C when all the block results are ready. # -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) # Allocate write cache CC = s.cache_write(C, 'global') @@ -328,7 +329,7 @@ # -------- # Futhermore, we can also utilize multi-core processors to do the thread-level parallelization. -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) CC = s.cache_write(C, 'global') diff --git a/tutorials/optimize/opt_matmul_auto_tensorcore.py b/tutorials/optimize/opt_matmul_auto_tensorcore.py index a4658eba2bee4..ce983fba660ef 100644 --- a/tutorials/optimize/opt_matmul_auto_tensorcore.py +++ b/tutorials/optimize/opt_matmul_auto_tensorcore.py @@ -46,12 +46,13 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.contrib import nvcc def matmul_nn(A, B, L, dtype='float16', layout='NN'): - k = tvm.reduce_axis((0, L), name='k') + k = te.reduce_axis((0, L), name='k') if dtype == 'float16': out_type = 'float' elif dtype == 'int8': @@ -59,13 +60,13 @@ def matmul_nn(A, B, L, dtype='float16', layout='NN'): elif dtype == 'int4' or dtype == 'int1': out_type = 'int' if (layout == 'NN'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k].astype(out_type) * B[k, j].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[i, k].astype(out_type) * B[k, j].astype(out_type), axis=k)) if (layout == 'NT'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[k, i].astype(out_type) * B[k, j].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[k, i].astype(out_type) * B[k, j].astype(out_type), axis=k)) if (layout == 'TN'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k].astype(out_type) * B[j, k].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[i, k].astype(out_type) * B[j, k].astype(out_type), axis=k)) if (layout == 'TT'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[k, i].astype(out_type) * B[j, k].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[k, i].astype(out_type) * B[j, k].astype(out_type), axis=k)) ############################################################################### # Scheduling the Computation @@ -85,7 +86,7 @@ def matmul_nn(A, B, L, dtype='float16', layout='NN'): # (2) The warp tile size is not 16x16x16 on CUDA9, or not one of {16x16x16, 32x8x16, 8x32x16} on CUDA version >= 10.0. # # In this schedule, storage_align is used to reduce bank conflicts of shared memory. Please refer to this -# `doc `_ +# `doc `_ # for the usage of storage_align primitive. In short, we need to add an offset to some shared memory buffer # to reduce bank conflicts. # According to the `wmma doc `_, @@ -111,11 +112,11 @@ def test_gemm(N, L, M, dtype, layout): else: print ("Unsupported layout:", layout) sys.exit(1); - A = tvm.placeholder(shape_a, name='A', dtype=dtype) - B = tvm.placeholder(shape_b, name='B', dtype=dtype) + A = te.placeholder(shape_a, name='A', dtype=dtype) + B = te.placeholder(shape_b, name='B', dtype=dtype) C = matmul_nn(A, B, L, dtype, layout) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) y, x = s[C].op.axis k = s[C].op.reduce_axis[0] @@ -182,11 +183,11 @@ def test_gemm(N, L, M, dtype, layout): tz, xi = s[C].split(xi, WX) tx, xi = s[C].split(xi, TX) s[C].reorder(yo, xo, tz, ty, tx, yi, xi) - s[C].bind(yo, tvm.thread_axis("blockIdx.y")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(yo, te.thread_axis("blockIdx.y")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) # schedule for CL stage ko, ki = s[CL].split(k, step_k * warp_tile_k) @@ -202,9 +203,9 @@ def test_gemm(N, L, M, dtype, layout): tx, vec = s[AA].split(tx, factor=v) fused = s[AA].fuse(s[AA].op.axis[0], xo) _, ty = s[AA].split(fused, factor=by) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tz, tvm.thread_axis("threadIdx.z")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tz, te.thread_axis("threadIdx.z")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) # vectorization is very important for float16/int8 inputs s[AA].vectorize(vec) @@ -215,9 +216,9 @@ def test_gemm(N, L, M, dtype, layout): tx, vec = s[BB].split(tx, factor=v) fused = s[BB].fuse(s[BB].op.axis[0], xo) _, ty = s[BB].split(fused, factor=by) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tz, tvm.thread_axis("threadIdx.z")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tz, te.thread_axis("threadIdx.z")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].vectorize(vec) s[AL].compute_at(s[CL], kl) diff --git a/tutorials/relay_quick_start.py b/tutorials/relay_quick_start.py index d272a0e315b88..b258d1bf3338d 100644 --- a/tutorials/relay_quick_start.py +++ b/tutorials/relay_quick_start.py @@ -42,6 +42,7 @@ from tvm import relay from tvm.relay import testing import tvm +from tvm import te from tvm.contrib import graph_runtime ###################################################################### diff --git a/tutorials/tensor_expr_get_started.py b/tutorials/tensor_expr_get_started.py index ca92b3b3ab957..ecd3f2b89528f 100644 --- a/tutorials/tensor_expr_get_started.py +++ b/tutorials/tensor_expr_get_started.py @@ -28,6 +28,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np # Global declarations of environment. @@ -62,10 +63,10 @@ # No computation happens during this phase, as we are only declaring how # the computation should be done. # -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.placeholder((n,), name='B') -C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.placeholder((n,), name='B') +C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") print(type(C)) ###################################################################### @@ -88,7 +89,7 @@ # C[i] = A[i] + B[i]; # } # -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) ###################################################################### # We used the split construct to split the first axis of C, @@ -114,8 +115,8 @@ # to generate code that runs on GPU. # if tgt == "cuda" or tgt == "rocm" or tgt.startswith('opencl'): - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) ###################################################################### # Compilation @@ -188,7 +189,7 @@ # arrays with different shapes into fadd, an error will be raised. # # We can do more specializations. For example, we can write -# :code:`n = tvm.convert(1024)` instead of :code:`n = tvm.var("n")`, +# :code:`n = tvm.runtime.convert(1024)` instead of :code:`n = te.var("n")`, # in the computation declaration. The generated function will # only take vectors with length 1024. # diff --git a/tutorials/topi/intro_topi.py b/tutorials/topi/intro_topi.py index 2e049828e5cc5..5bb5f0a66e309 100644 --- a/tutorials/topi/intro_topi.py +++ b/tutorials/topi/intro_topi.py @@ -26,6 +26,7 @@ from __future__ import absolute_import, print_function import tvm +from tvm import te import topi import numpy as np @@ -36,12 +37,12 @@ # To compute the sum of rows of a two dimensional TVM tensor A, we should # specify the symbolic operation as well as schedule as follows # -n = tvm.var("n") -m = tvm.var("m") -A = tvm.placeholder((n, m), name='A') -k = tvm.reduce_axis((0, m), "k") -B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") -s = tvm.create_schedule(B.op) +n = te.var("n") +m = te.var("m") +A = te.placeholder((n, m), name='A') +k = te.reduce_axis((0, m), "k") +B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") +s = te.create_schedule(B.op) ###################################################################### # and to examine the IR code in human readable format, we can do @@ -50,11 +51,11 @@ ###################################################################### # However, for such a common operation we had to define the reduce axis ourselves as well as explicit computation with -# :code:`tvm.compute`. Imagine for more complicated operations how much details we need to provide. +# :code:`te.compute`. Imagine for more complicated operations how much details we need to provide. # Fortunately, we can replace those two lines with simple :code:`topi.sum` much like :code:`numpy.sum` # C = topi.sum(A, axis=1) -ts = tvm.create_schedule(C.op) +ts = te.create_schedule(C.op) print(tvm.lower(ts, [A], simple_mode=True)) ###################################################################### @@ -64,8 +65,8 @@ # Even shorter, TOPI provides operator overloading for such common operations. For example, # x, y = 100, 10 -a = tvm.placeholder((x, y, y), name="a") -b = tvm.placeholder((y, y), name="b") +a = te.placeholder((x, y, y), name="a") +b = te.placeholder((y, y), name="b") c = a + b # same as topi.broadcast_add d = a * b # same as topi.broadcast_mul @@ -110,7 +111,7 @@ ###################################################################### # TOPI also provides common neural nets operations such as _softmax_ with optimized schedule # -tarray = tvm.placeholder((512, 512), name="tarray") +tarray = te.placeholder((512, 512), name="tarray") softmax_topi = topi.nn.softmax(tarray) with tvm.target.create("cuda"): sst = topi.cuda.schedule_softmax(softmax_topi) @@ -129,8 +130,8 @@ # compute declaration and schedule. TVM will choose the right function to call with # the target information. -data = tvm.placeholder((1, 3, 224, 224)) -kernel = tvm.placeholder((10, 3, 5, 5)) +data = te.placeholder((1, 3, 224, 224)) +kernel = te.placeholder((10, 3, 5, 5)) with tvm.target.create("cuda"): conv = topi.cuda.conv2d_nchw(data, kernel, 1, 2, 1) diff --git a/vta/apps/gemm/python/tsim.py b/vta/apps/gemm/python/tsim.py index c0f7b136e11bb..85fd463e32782 100644 --- a/vta/apps/gemm/python/tsim.py +++ b/vta/apps/gemm/python/tsim.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import ctypes import os.path as osp from sys import platform diff --git a/vta/apps/gemm/tests/python/chisel_accel.py b/vta/apps/gemm/tests/python/chisel_accel.py index 4666661f9bc9f..441f36d8de090 100644 --- a/vta/apps/gemm/tests/python/chisel_accel.py +++ b/vta/apps/gemm/tests/python/chisel_accel.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np import tsim import sys @@ -32,7 +33,7 @@ """ def slice(A, slice_width): assert np.log2(slice_width) % 1 == 0, "only power of 2 is supported" - dtype = type(A[0]) + dtype = type(A[0]) row = 0 # currently only supports uint if dtype is np.uint8: row = 8 // slice_width @@ -45,7 +46,7 @@ def slice(A, slice_width): else: dtype = 'uint8' - C = np.zeros((row, len(A))).astype(dtype) # sliced and transform + C = np.zeros((row, len(A))).astype(dtype) # sliced and transform # create mask slice_mask = 2**(slice_width)-1 @@ -57,7 +58,7 @@ def slice(A, slice_width): def slice_mat(A, slice_width): assert np.log2(slice_width) % 1 == 0, "only power of 2 is supported" - dtype = type(A[0][0]) + dtype = type(A[0][0]) row = 0 # currently only supports uint if dtype is np.uint8: row = 8 // slice_width @@ -71,7 +72,7 @@ def slice_mat(A, slice_width): dtype = 'uint8' # 3d array (bits, row, clmn) - C = np.zeros((row, A.shape[0], A.shape[1])).astype(dtype) # sliced and transform + C = np.zeros((row, A.shape[0], A.shape[1])).astype(dtype) # sliced and transform # create mask slice_mask = 2**(slice_width)-1 @@ -162,16 +163,16 @@ def test_accel(A, B, i_width, w_width): for i in range(len(a_arr)): for j in range(len(b_arr)): shift = np.uint8(i*i_width + j*w_width) - if i == 0 and j == 0: + if i == 0 and j == 0: cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(1)) # reset accumulator - else: + else: cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(0)) # no reset return (accum.asnumpy(), cycles) """ Matrix Generator Parameters ----------- +---------- dtype : String, datatype generated (supports only uint) i_width : weight bit slices(needs to be less than actual bit width) w_width : activation bit slices(needs to be less than actual bit width) @@ -179,9 +180,9 @@ def test_accel(A, B, i_width, w_width): def top_test(dtype, i_width, w_width): # only supports positive values (up to 2**(bits-1)) - rmax = 127 + rmax = 127 # (m,16) * (16,16) GEMM - rrow = np.random.randint(7) + 1 + rrow = np.random.randint(7) + 1 clmn = 16 A = np.random.randint(rmax, size=(rrow,clmn)).astype(dtype) B = np.random.randint(rmax, size=(clmn,clmn)).astype(dtype) @@ -196,8 +197,8 @@ def top_test(dtype, i_width, w_width): for i in range(1): # reg1 and reg2 bits in hardware/chisel/src/main/Compute.scala must be modified for slices greater than 8 bits if sys.argv[1] == 'serial': - # generates a random uint8 GEMM with 2-bit(8/4) input and 4-bit(8/2) weight + # generates a random uint8 GEMM with 2-bit(8/4) input and 4-bit(8/2) weight top_test("uint8", 4, 2) elif sys.argv[1] == 'parallel': - # generates a random uint8 GEMM with 8-bit input and 8-bit weight (bit parallel) + # generates a random uint8 GEMM with 8-bit input and 8-bit weight (bit parallel) top_test('uint8', 8, 8) diff --git a/vta/apps/tsim_example/python/tsim.py b/vta/apps/tsim_example/python/tsim.py index c0f7b136e11bb..85fd463e32782 100644 --- a/vta/apps/tsim_example/python/tsim.py +++ b/vta/apps/tsim_example/python/tsim.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import ctypes import os.path as osp from sys import platform diff --git a/vta/apps/tsim_example/tests/python/chisel_accel.py b/vta/apps/tsim_example/tests/python/chisel_accel.py index 1749aaa3b54d6..370ac4068e18b 100644 --- a/vta/apps/tsim_example/tests/python/chisel_accel.py +++ b/vta/apps/tsim_example/tests/python/chisel_accel.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np import tsim diff --git a/vta/apps/tsim_example/tests/python/verilog_accel.py b/vta/apps/tsim_example/tests/python/verilog_accel.py index 578a7c3a00205..3489ff2f6fed8 100644 --- a/vta/apps/tsim_example/tests/python/verilog_accel.py +++ b/vta/apps/tsim_example/tests/python/verilog_accel.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np import tsim diff --git a/vta/python/vta/build_module.py b/vta/python/vta/build_module.py index 4a62d354eb41a..8633a9b07156c 100644 --- a/vta/python/vta/build_module.py +++ b/vta/python/vta/build_module.py @@ -16,8 +16,6 @@ # under the License. # pylint: disable=unused-argument """VTA specific buildin for runtime.""" -from __future__ import absolute_import as _abs - import tvm from . import ir_pass from .environment import get_env @@ -63,7 +61,7 @@ def build_config(debug_flag=0, **kwargs): """ env = get_env() def add_debug(stmt): - debug = tvm.call_extern( + debug = tvm.tir.call_extern( "int32", "VTASetDebugMode", env.dev.command_handle, debug_flag) diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py index 8d58958410e54..49b78b321bd2b 100644 --- a/vta/python/vta/environment.py +++ b/vta/python/vta/environment.py @@ -22,6 +22,7 @@ import json import copy import tvm +from tvm import te from . import intrin from .pkg_config import PkgConfig @@ -61,9 +62,9 @@ class DevContext(object): QID_COMPUTE = 2 def __init__(self, env): - self.vta_axis = tvm.thread_axis("vta") + self.vta_axis = te.thread_axis("vta") self.vta_push_uop = tvm.tir.StringImm("VTAPushGEMMOp") - ctx = tvm.call_extern("handle", "VTATLSCommandHandle") + ctx = tvm.tir.call_extern("handle", "VTATLSCommandHandle") self.command_handle = tvm.tir.Call( "handle", "tvm_thread_context", [ctx], tvm.tir.Call.Intrinsic, None, 0) @@ -284,14 +285,14 @@ def mem_info_acc_buffer(): @tvm.register_func("tvm.intrin.rule.default.vta.coproc_sync") def coproc_sync(op): _ = op - return tvm.call_extern( + return tvm.tir.call_extern( "int32", "VTASynchronize", get_env().dev.command_handle, 1<<31) @tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_push") def coproc_dep_push(op): - return tvm.call_extern( + return tvm.tir.call_extern( "int32", "VTADepPush", get_env().dev.command_handle, op.args[0], op.args[1]) @@ -299,7 +300,7 @@ def coproc_dep_push(op): @tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_pop") def coproc_dep_pop(op): - return tvm.call_extern( + return tvm.tir.call_extern( "int32", "VTADepPop", get_env().dev.command_handle, op.args[0], op.args[1]) diff --git a/vta/python/vta/intrin.py b/vta/python/vta/intrin.py index a43fc75a92d07..b1ed7a13fa622 100644 --- a/vta/python/vta/intrin.py +++ b/vta/python/vta/intrin.py @@ -18,6 +18,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te def gemm(env, mock=False): """Matrix-matrix multiply intrinsic @@ -45,26 +46,26 @@ def gemm(env, mock=False): out_shape = (env.BATCH, env.BLOCK_OUT) assert out_shape[0] * out_shape[1] == out_lanes - wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]), - dtype="int%d" % env.WGT_WIDTH, - name=env.wgt_scope) - inp = tvm.placeholder((inp_shape[0], inp_shape[1]), - dtype="int%d" % env.INP_WIDTH, - name=env.inp_scope) - k = tvm.reduce_axis((0, wgt_shape[1]), name="k") + wgt = te.placeholder((wgt_shape[0], wgt_shape[1]), + dtype="int%d" % env.WGT_WIDTH, + name=env.wgt_scope) + inp = te.placeholder((inp_shape[0], inp_shape[1]), + dtype="int%d" % env.INP_WIDTH, + name=env.inp_scope) + k = te.reduce_axis((0, wgt_shape[1]), name="k") out_dtype = "int%d" % env.ACC_WIDTH - out = tvm.compute((out_shape[0], out_shape[1]), - lambda i, j: tvm.sum(inp[i, k].astype(out_dtype) * - wgt[j, k].astype(out_dtype), - axis=[k]), - name="out") - wgt_layout = tvm.decl_buffer( + out = te.compute((out_shape[0], out_shape[1]), + lambda i, j: te.sum(inp[i, k].astype(out_dtype) * + wgt[j, k].astype(out_dtype), + axis=[k]), + name="out") + wgt_layout = tvm.tir.decl_buffer( wgt.shape, wgt.dtype, env.wgt_scope, scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes) - inp_layout = tvm.decl_buffer( + inp_layout = tvm.tir.decl_buffer( inp.shape, inp.dtype, env.inp_scope, scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes) - out_layout = tvm.decl_buffer( + out_layout = tvm.tir.decl_buffer( out.shape, out.dtype, env.acc_scope, scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes) @@ -81,7 +82,7 @@ def instr(index): irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop) if index in (0, 2): - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopPush", 0, 0, dout.access_ptr("rw", "int32"), @@ -89,7 +90,7 @@ def instr(index): dwgt.access_ptr("r", "int32"), 0, 0, 0)) else: - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopPush", 0, 1, dout.access_ptr("rw", "int32"), diff --git a/vta/python/vta/ir_pass.py b/vta/python/vta/ir_pass.py index 0c9b2eac2df7c..723558357e6aa 100644 --- a/vta/python/vta/ir_pass.py +++ b/vta/python/vta/ir_pass.py @@ -17,6 +17,7 @@ """Additional IR Pass for VTA""" # pylint: disable=len-as-condition, no-else-return import tvm +from tvm import te from topi import util from .environment import get_env @@ -90,7 +91,7 @@ def _post_order(op): gemm_offsets[i] = m[0] args.append(m[1]) args += op.args[base_args+3:] - return tvm.call_extern("int32", "VTAUopPush", *args) + return tvm.tir.call_extern("int32", "VTAUopPush", *args) if op.name not in ("VTATLSCommandHandle", "tvm_thread_context"): raise RuntimeError("unexpected op %s" % op) return op @@ -104,9 +105,9 @@ def _visit(op): fail[0] = True tvm.ir_pass.PostOrderVisit(ret, _visit) if not fail[0]: - begin = tvm.call_extern( + begin = tvm.tir.call_extern( "int32", "VTAUopLoopBegin", stmt.extent, *gemm_offsets) - end = tvm.call_extern("int32", "VTAUopLoopEnd") + end = tvm.tir.call_extern("int32", "VTAUopLoopEnd") return [begin, ret, end] raise ValueError("Failed to fold the GEMM instructions..") @@ -169,7 +170,7 @@ def _post_order(op): return None new_var = rw_info[buffer_var] let_stmt = tvm.tir.LetStmt( - new_var, tvm.call_extern( + new_var, tvm.tir.call_extern( "handle", "VTABufferCPUPtr", env.dev.command_handle, buffer_var), op.body) @@ -181,14 +182,14 @@ def _post_order(op): if isinstance(op, tvm.tir.Load): buffer_var = op.buffer_var if not buffer_var in rw_info: - rw_info[buffer_var] = tvm.var( + rw_info[buffer_var] = te.var( buffer_var.name + "_ptr", "handle") new_var = rw_info[buffer_var] return tvm.tir.Load(op.dtype, new_var, op.index) if isinstance(op, tvm.tir.Store): buffer_var = op.buffer_var if not buffer_var in rw_info: - rw_info[buffer_var] = tvm.var( + rw_info[buffer_var] = te.var( buffer_var.name + "_ptr", "handle") new_var = rw_info[buffer_var] return tvm.tir.Store(new_var, op.value, op.index) @@ -197,7 +198,7 @@ def _post_order(op): stmt_in, None, _post_order, ["Allocate", "Load", "Store"]) for buffer_var, new_var in rw_info.items(): stmt = tvm.tir.LetStmt( - new_var, tvm.call_extern( + new_var, tvm.tir.call_extern( "handle", "VTABufferCPUPtr", env.dev.command_handle, buffer_var), stmt) @@ -333,12 +334,12 @@ def inject_dma_intrin(stmt_in): Transformed statement """ env = get_env() - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod def _check_compact(buf): ndim = len(buf.shape) - size = tvm.const(1, buf.shape[0].dtype) + size = tvm.tir.const(1, buf.shape[0].dtype) for i in reversed(range(ndim)): if not util.equal_const_int(size - buf.strides[i], 0): raise RuntimeError( @@ -494,7 +495,7 @@ def _inject_copy(src, dst, pad_before, pad_after, pad_value): irb = tvm.ir_builder.create() irb.scope_attr(env.dev.vta_axis, "coproc_scope", env.dev.get_task_qid(task_qid)) - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAStoreBuffer2D", env.dev.command_handle, src.access_ptr("r", "int32"), @@ -565,7 +566,7 @@ def _inject_copy(src, dst, pad_before, pad_after, pad_value): irb.scope_attr(env.dev.vta_axis, "coproc_scope", env.dev.get_task_qid(task_qid)) - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTALoadBuffer2D", env.dev.command_handle, src.data, offset, x_size, y_size, x_stride, @@ -594,26 +595,26 @@ def _get_gemm_intrin_buffer(): assert out_lanes == env.BATCH * env.BLOCK_OUT out_shape = (env.BATCH, env.BLOCK_OUT) assert out_shape[0] * out_shape[1] == out_lanes - wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]), - dtype="int%d" % env.WGT_WIDTH, - name=env.wgt_scope) - inp = tvm.placeholder((inp_shape[0], inp_shape[1]), - dtype="int%d" % env.INP_WIDTH, - name=env.inp_scope) - k = tvm.reduce_axis((0, wgt_shape[1]), name="k") + wgt = te.placeholder((wgt_shape[0], wgt_shape[1]), + dtype="int%d" % env.WGT_WIDTH, + name=env.wgt_scope) + inp = te.placeholder((inp_shape[0], inp_shape[1]), + dtype="int%d" % env.INP_WIDTH, + name=env.inp_scope) + k = te.reduce_axis((0, wgt_shape[1]), name="k") out_dtype = "int%d" % env.ACC_WIDTH - out = tvm.compute((out_shape[0], out_shape[1]), - lambda i, j: tvm.sum(inp[i, k].astype(out_dtype) * - wgt[j, k].astype(out_dtype), - axis=[k]), - name="out") - wgt_layout = tvm.decl_buffer( + out = te.compute((out_shape[0], out_shape[1]), + lambda i, j: te.sum(inp[i, k].astype(out_dtype) * + wgt[j, k].astype(out_dtype), + axis=[k]), + name="out") + wgt_layout = tvm.tir.decl_buffer( wgt.shape, wgt.dtype, env.wgt_scope, scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes) - inp_layout = tvm.decl_buffer( + inp_layout = tvm.tir.decl_buffer( inp.shape, inp.dtype, env.inp_scope, scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes) - out_layout = tvm.decl_buffer( + out_layout = tvm.tir.decl_buffer( out.shape, out.dtype, env.acc_scope, scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes) @@ -656,11 +657,11 @@ def _do_fold(op): dev = env.dev irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE)) irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop) - irb.emit(tvm.call_extern("int32", "VTAUopPush", - 0, 1, - dout.access_ptr("rw", "int32"), - 0, 0, - 0, 0, 0)) + irb.emit(tvm.tir.call_extern("int32", "VTAUopPush", + 0, 1, + dout.access_ptr("rw", "int32"), + 0, 0, + 0, 0, 0)) inner = irb.get() # TODO(@tmoreau89): This is only a temporary fix, please take a look. body = op.body.body @@ -671,7 +672,7 @@ def _do_fold(op): tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, 1, 0, env.BLOCK_OUT) inner = tvm.tir.AttrStmt( [dout, res_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) return inner else: conv_call, data_call, kernel_call = calls[-3:] @@ -682,7 +683,7 @@ def _do_fold(op): if selects: condition = selects[0].condition else: - condition = tvm.const(1, 'int') + condition = tvm.tir.const(1, 'int') # create inner most block irb = tvm.ir_builder.create() @@ -690,12 +691,12 @@ def _do_fold(op): dev = env.dev irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE)) irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop) - irb.emit(tvm.call_extern("int32", "VTAUopPush", - 0, 0, - dout.access_ptr("rw", "int32"), - dinp.access_ptr("r", "int32"), - dwgt.access_ptr("r", "int32"), - 0, 0, 0)) + irb.emit(tvm.tir.call_extern("int32", "VTAUopPush", + 0, 0, + dout.access_ptr("rw", "int32"), + dinp.access_ptr("r", "int32"), + dwgt.access_ptr("r", "int32"), + 0, 0, 0)) inner = irb.get() args = conv_call.args @@ -703,19 +704,19 @@ def _do_fold(op): 1, 0, 1, 0, env.BLOCK_OUT) inner = tvm.tir.AttrStmt( [dout, res_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) args = kernel_call.args tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, env.BLOCK_OUT, 0, env.BLOCK_IN) inner = tvm.tir.AttrStmt( [dwgt, kernel_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) args = data_call.args tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, 1, 0, env.BLOCK_IN) inner = tvm.tir.AttrStmt( [dinp, pad_data_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) return inner return None ret = tvm.ir_pass.IRTransform( @@ -770,7 +771,7 @@ def inject_alu_intrin(stmt_in): Transformed statement """ env = get_env() - idxm = tvm.indexmod + idxm = tvm.tir.indexmod def _do_fold(stmt): def _equal(x, y): @@ -864,7 +865,7 @@ def _flatten_loop(src_coeff, dst_coeff, extents): elif isinstance(loop_body.value, tvm.tir.Load): alu_opcode = env.dev.ALU_OPCODE_SHR lhs = loop_body.value - rhs = tvm.const(0, "int32") + rhs = tvm.tir.const(0, "int32") else: raise RuntimeError( "Expression not recognized %s, %s, %s" % ( @@ -955,11 +956,11 @@ def _flatten_loop(src_coeff, dst_coeff, extents): # Insert ALU micro-ops irb = tvm.ir_builder.create() for idx, extent in enumerate(extents): - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopLoopBegin", extent, dst_coeff[idx], src_coeff[idx], 0)) use_imm = int(use_imm) - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopPush", 1, 0, dst_coeff[len(dst_coeff)-1], @@ -967,7 +968,7 @@ def _flatten_loop(src_coeff, dst_coeff, extents): 0, alu_opcode, use_imm, imm_val)) for extent in extents: - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopLoopEnd")) return irb.get() return stmt diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index 0720e2fc97347..0516e839484aa 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -193,7 +193,7 @@ def __init__(self, cfg, proj_root): self.inp_mem_size = 1 << cfg["LOG_INP_BUFF_SIZE"] # bytes self.inp_mem_banks = (inp_mem_bus_width + \ max_bus_width - 1) // \ - max_bus_width + max_bus_width self.inp_mem_width = min(inp_mem_bus_width, max_bus_width) self.inp_mem_depth = self.inp_mem_size * 8 // inp_mem_bus_width self.inp_mem_axi_ratio = self.inp_mem_width // mem_bus_width @@ -204,7 +204,7 @@ def __init__(self, cfg, proj_root): self.wgt_mem_size = 1 << cfg["LOG_WGT_BUFF_SIZE"] # bytes self.wgt_mem_banks = (wgt_mem_bus_width + \ max_bus_width - 1) // \ - max_bus_width + max_bus_width self.wgt_mem_width = min(wgt_mem_bus_width, max_bus_width) self.wgt_mem_depth = self.wgt_mem_size * 8 // wgt_mem_bus_width self.wgt_mem_axi_ratio = self.wgt_mem_width // mem_bus_width @@ -215,7 +215,7 @@ def __init__(self, cfg, proj_root): self.out_mem_size = 1 << cfg["LOG_OUT_BUFF_SIZE"] # bytes self.out_mem_banks = (out_mem_bus_width + \ max_bus_width - 1) // \ - max_bus_width + max_bus_width self.out_mem_width = min(out_mem_bus_width, max_bus_width) self.out_mem_depth = self.out_mem_size * 8 // out_mem_bus_width self.out_mem_axi_ratio = self.out_mem_width // mem_bus_width @@ -235,23 +235,23 @@ def __init__(self, cfg, proj_root): self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr)) # IP register offsets self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % \ - (self.fetch_insn_count_offset)) + (self.fetch_insn_count_offset)) self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % \ - (self.fetch_insn_addr_offset)) + (self.fetch_insn_addr_offset)) self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % \ - (self.load_inp_addr_offset)) + (self.load_inp_addr_offset)) self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % \ - (self.load_wgt_addr_offset)) + (self.load_wgt_addr_offset)) self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % \ - (self.compute_done_wr_offet)) + (self.compute_done_wr_offet)) self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % \ - (self.compute_done_rd_offet)) + (self.compute_done_rd_offet)) self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % \ - (self.compute_uop_addr_offset)) + (self.compute_uop_addr_offset)) self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % \ - (self.compute_bias_addr_offset)) + (self.compute_bias_addr_offset)) self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % \ - (self.store_out_addr_offset)) + (self.store_out_addr_offset)) # Coherency if coherent: self.macro_defs.append("-DVTA_COHERENT_ACCESSES=true") diff --git a/vta/python/vta/top/bitpack.py b/vta/python/vta/top/bitpack.py index 6e9d57bc0001c..7a0710053b877 100644 --- a/vta/python/vta/top/bitpack.py +++ b/vta/python/vta/top/bitpack.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from topi import util from tvm.relay.op.op import register_compute, register_injective_schedule @@ -59,7 +60,7 @@ def bitpack(data, bits, pack_type="int8", name="bitpack"): def _bitpack(*indices): ret = None - mask = tvm.const((1 << bits) - 1, pack_type) + mask = tvm.tir.const((1 << bits) - 1, pack_type) for k in range(lanes): idx = list(indices) idx[-1] = idx[-1] * lanes + k @@ -67,11 +68,11 @@ def _bitpack(*indices): if k == 0: ret = elem & mask else: - val = (elem & mask) << tvm.const(k * bits, pack_type) + val = (elem & mask) << tvm.tir.const(k * bits, pack_type) ret = ret | val return ret - return tvm.compute( + return te.compute( oshape, _bitpack, name=name, tag='bitpack') diff --git a/vta/python/vta/top/op.py b/vta/python/vta/top/op.py index 04e14b1e2bddc..fe89341d9df80 100644 --- a/vta/python/vta/top/op.py +++ b/vta/python/vta/top/op.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te import topi from tvm.relay.op import op as reg @@ -42,13 +43,13 @@ def compute_clip_vta(attrs, inputs, output_type): x = inputs[0] a_min = attrs.a_min a_max = attrs.a_max - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) with tvm.tag_scope(topi.tag.ELEMWISE): - x = tvm.compute( - x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute( - x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + x = te.compute( + x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute( + x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return [x] def clip_strategy_vta(attrs, inputs, out_type, target): diff --git a/vta/python/vta/top/vta_conv2d.py b/vta/python/vta/top/vta_conv2d.py index ba93b05ca2323..5b23ddeba1c13 100644 --- a/vta/python/vta/top/vta_conv2d.py +++ b/vta/python/vta/top/vta_conv2d.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi @@ -44,14 +45,14 @@ def conv2d_packed(cfg, data, kernel, strides, padding, dilation, layout, out_dty ishape = topi.util.get_const_tuple(data.shape) kshape = topi.util.get_const_tuple(kernel.shape) - d_i = tvm.reduce_axis((0, kshape[2]), name='d_i') - d_j = tvm.reduce_axis((0, kshape[3]), name='d_j') - k_o = tvm.reduce_axis((0, ishape[1]), name='k_o') - k_i = tvm.reduce_axis((0, ishape[-1]), name='k_i') + d_i = te.reduce_axis((0, kshape[2]), name='d_i') + d_j = te.reduce_axis((0, kshape[3]), name='d_j') + k_o = te.reduce_axis((0, ishape[1]), name='k_o') + k_i = te.reduce_axis((0, ishape[-1]), name='k_i') hstride, wstride = strides - res = tvm.compute( + res = te.compute( oshape, - lambda b_o, c_o, i, j, b_i, c_i: tvm.sum( + lambda b_o, c_o, i, j, b_i, c_i: te.sum( pad_data[b_o, k_o, i*hstride+d_i, j*wstride+d_j, b_i, k_i].astype(out_dtype) * kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype), axis=[k_o, d_i, d_j, k_i]), @@ -81,7 +82,7 @@ def _traverse(op): else: ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -92,7 +93,7 @@ def _traverse(op): _traverse(output.op) assert len(conv2d_res) == 1 conv2d_stage = conv2d_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, x_i, x_j, _, _ = s[conv2d_stage].op.axis @@ -107,7 +108,7 @@ def _traverse(op): ###### space definition end ###### data, kernel = conv2d_stage.op.input_tensors - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: temp = data.op.input_tensors[0] pad_data = data data = temp @@ -160,13 +161,13 @@ def _traverse(op): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) # virtual threading along spatial rows if cfg['h_nthread'].val > 1: _, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis diff --git a/vta/python/vta/top/vta_conv2d_transpose.py b/vta/python/vta/top/vta_conv2d_transpose.py index a3fd7ac92cd3e..4f213f64d0dae 100644 --- a/vta/python/vta/top/vta_conv2d_transpose.py +++ b/vta/python/vta/top/vta_conv2d_transpose.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi from topi.util import get_const_tuple @@ -52,14 +53,14 @@ def conv2d_transpose_packed(cfg, data, kernel, strides, padding, out_dtype): out_h = (i_h - 1) * stride_h - fpad_top - fpad_bottom + k_h out_w = (i_w - 1) * stride_w - fpad_left - fpad_right + k_w oshape = (b, c_o, out_h, out_w, t_b, t_co) - d_c = tvm.reduce_axis((0, c_i), name='d_c') - d_h = tvm.reduce_axis((0, k_h), name='d_h') - d_w = tvm.reduce_axis((0, k_w), name='d_w') - d_ci = tvm.reduce_axis((0, t_ci), name='d_ci') + d_c = te.reduce_axis((0, c_i), name='d_c') + d_h = te.reduce_axis((0, k_h), name='d_h') + d_w = te.reduce_axis((0, k_w), name='d_w') + d_ci = te.reduce_axis((0, t_ci), name='d_ci') - out = tvm.compute( + out = te.compute( oshape, - lambda i_n, i_c, i_h, i_w, j_n, j_c: tvm.sum( + lambda i_n, i_c, i_h, i_w, j_n, j_c: te.sum( data_pad(i_n, d_c, i_h + d_h, i_w + d_w, j_n, d_ci).astype(out_dtype) * kernel[i_c, d_c, d_h, d_w, j_c, d_ci].astype(out_dtype), axis=[d_c, d_h, d_w, d_ci]), @@ -87,7 +88,7 @@ def _traverse(op): if not op.same_as(output.op): ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -98,7 +99,7 @@ def _traverse(op): _traverse(output.op) assert len(conv2d_res) == 1 conv2d_stage = conv2d_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, x_i, x_j, _, c_i = s[conv2d_stage].op.axis @@ -113,7 +114,7 @@ def _traverse(op): ###### space definition end ###### data, kernel = conv2d_stage.op.input_tensors - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: temp = data.op.input_tensors[0] pad_data = data data = temp @@ -162,13 +163,13 @@ def _traverse(op): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) # virtual threading along spatial rows if cfg['h_nthread'].val > 1: _, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis diff --git a/vta/python/vta/top/vta_dense.py b/vta/python/vta/top/vta_dense.py index e23910447ba8f..912f41f30dfb6 100644 --- a/vta/python/vta/top/vta_dense.py +++ b/vta/python/vta/top/vta_dense.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi @@ -48,11 +49,11 @@ def dense_packed(cfg, data, weight, bias=None, out_dtype=None): # Reduction axes (input channel) assert ishape[1] == wshape[1] assert ishape[3] == wshape[3] - k_o = tvm.reduce_axis((0, ishape[1]), name='k_o') - k_i = tvm.reduce_axis((0, ishape[3]), name='k_i') - res = tvm.compute( + k_o = te.reduce_axis((0, ishape[1]), name='k_o') + k_i = te.reduce_axis((0, ishape[3]), name='k_i') + res = te.compute( oshape, - lambda b_o, c_o, b_i, c_i: tvm.sum( + lambda b_o, c_o, b_i, c_i: te.sum( data[b_o, k_o, b_i, k_i].astype(out_dtype) * weight[c_o, k_o, c_i, k_i].astype(out_dtype), axis=[k_o, k_i]), @@ -83,7 +84,7 @@ def _traverse(op): else: ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -94,7 +95,7 @@ def _traverse(op): _traverse(output.op) assert len(dense_res) == 1 dense_stage = dense_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, _, _ = s[dense_stage].op.axis @@ -147,7 +148,7 @@ def _traverse(op): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_bi, _ = s[dense_stage].op.axis k_o, _ = s[dense_stage].op.reduce_axis diff --git a/vta/python/vta/top/vta_group_conv2d.py b/vta/python/vta/top/vta_group_conv2d.py index aa06c61c3ec02..d470fb77038bb 100644 --- a/vta/python/vta/top/vta_group_conv2d.py +++ b/vta/python/vta/top/vta_group_conv2d.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi @@ -54,14 +55,14 @@ def group_conv2d_packed(cfg, kshape = topi.util.get_const_tuple(kernel.shape) assert group * kshape[1] == ishape[1] assert kshape[0] % group == 0 - d_i = tvm.reduce_axis((0, kshape[2]), name='d_i') - d_j = tvm.reduce_axis((0, kshape[3]), name='d_j') - k_o = tvm.reduce_axis((0, kshape[1]), name='k_o') - k_i = tvm.reduce_axis((0, kshape[-1]), name='k_i') + d_i = te.reduce_axis((0, kshape[2]), name='d_i') + d_j = te.reduce_axis((0, kshape[3]), name='d_j') + k_o = te.reduce_axis((0, kshape[1]), name='k_o') + k_i = te.reduce_axis((0, kshape[-1]), name='k_i') hstride, wstride = strides - out = tvm.compute( + out = te.compute( oshape, - lambda b_o, c_o, i, j, b_i, c_i: tvm.sum( + lambda b_o, c_o, i, j, b_i, c_i: te.sum( pad_data[b_o, c_o // (kshape[0] // group) * kshape[1] + k_o, i * hstride + d_i, j * wstride + d_j, b_i, k_i].astype(out_dtype) * kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype), @@ -95,7 +96,7 @@ def _traverse(op): else: ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -106,7 +107,7 @@ def _traverse(op): _traverse(output.op) assert len(conv2d_res) == 1 conv2d_stage = conv2d_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, x_i, x_j, _, _ = s[conv2d_stage].op.axis @@ -121,7 +122,7 @@ def _traverse(op): ###### space definition end ###### data, kernel = conv2d_stage.op.input_tensors - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: temp = data.op.input_tensors[0] pad_data = data data = temp @@ -174,13 +175,13 @@ def _traverse(op): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) # virtual threading along spatial rows if cfg['h_nthread'].val > 1: _, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis diff --git a/vta/scripts/tune_conv2d.py b/vta/scripts/tune_conv2d.py index 265a6392b0546..ff02485b515bd 100644 --- a/vta/scripts/tune_conv2d.py +++ b/vta/scripts/tune_conv2d.py @@ -22,6 +22,7 @@ import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -51,10 +52,10 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation): @@ -62,9 +63,9 @@ def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation): kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) bias_shape = (N//env.BATCH, CO//env.BLOCK_OUT, 1, 1, env.BATCH, env.BLOCK_OUT) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) with tvm.target.vta(): res = topi.nn.conv2d( @@ -83,7 +84,7 @@ def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_conv2d_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, bias, res] diff --git a/vta/scripts/tune_conv2d_transpose.py b/vta/scripts/tune_conv2d_transpose.py index d6475abff6679..f09ba4d2566f7 100644 --- a/vta/scripts/tune_conv2d_transpose.py +++ b/vta/scripts/tune_conv2d_transpose.py @@ -22,6 +22,7 @@ import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -44,18 +45,18 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding): data_shape = (N//env.BATCH, CI//env.BLOCK_IN, H, W, env.BATCH, env.BLOCK_IN) kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) with tvm.target.vta(): res = topi.nn.conv2d_transpose_nchw( @@ -71,7 +72,7 @@ def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_conv2d_transpose_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, res] diff --git a/vta/scripts/tune_dense.py b/vta/scripts/tune_dense.py index fa49be7f9b278..d738c99965bd2 100644 --- a/vta/scripts/tune_dense.py +++ b/vta/scripts/tune_dense.py @@ -22,6 +22,7 @@ import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -40,18 +41,18 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def dense(N, CI, CO): data_shape = (N//env.BATCH, CI//env.BLOCK_IN, env.BATCH, env.BLOCK_IN) kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, env.BLOCK_OUT, env.BLOCK_IN) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) with tvm.target.vta(): res = topi.nn.dense(data, kernel, None, 'int32') @@ -62,7 +63,7 @@ def dense(N, CI, CO): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_dense([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, res] diff --git a/vta/scripts/tune_group_conv2d.py b/vta/scripts/tune_group_conv2d.py index 555154d708fce..1119d273a6b9d 100644 --- a/vta/scripts/tune_group_conv2d.py +++ b/vta/scripts/tune_group_conv2d.py @@ -22,6 +22,7 @@ import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -49,10 +50,10 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group): @@ -62,9 +63,9 @@ def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group): kernel_shape = (CO//env.BLOCK_OUT, CI_G//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) bias_shape = (N//env.BATCH, CO//env.BLOCK_OUT, 1, 1, env.BATCH, env.BLOCK_OUT) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) with tvm.target.vta(): res = topi.nn.group_conv2d_nchw( @@ -83,7 +84,7 @@ def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_group_conv2d_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, bias, res] diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py index cf6f42654e6e3..10af0ab079878 100644 --- a/vta/scripts/tune_resnet.py +++ b/vta/scripts/tune_resnet.py @@ -24,6 +24,7 @@ import topi import tvm +from tvm import te from tvm import rpc, autotvm, relay from tvm.autotvm.measure.measure_methods import request_remote from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner @@ -63,10 +64,10 @@ def register_vta_tuning_tasks(): @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x # init autotvm env to register VTA operator @@ -87,7 +88,7 @@ def _topi_nn_conv2d(*args, **kwargs): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_conv2d_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [A, W, res] @autotvm.task.register("topi_nn_dense", override=True) @@ -105,7 +106,7 @@ def _topi_nn_dense(*args, **kwargs): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_dense([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [A, W, res] diff --git a/vta/tests/python/integration/test_benchmark_gemm.py b/vta/tests/python/integration/test_benchmark_gemm.py index d4eed91aa1c7c..e023c2017acf9 100644 --- a/vta/tests/python/integration/test_benchmark_gemm.py +++ b/vta/tests/python/integration/test_benchmark_gemm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import util import vta.testing @@ -38,37 +39,37 @@ def run_gemm_packed(env, remote, batch_size, channel, block): # To compute number of ops, use a x2 factor for FMA num_ops = 2 * channel * channel * batch_size - ko = tvm.reduce_axis((0, channel // env.BLOCK_IN), name='ko') - ki = tvm.reduce_axis((0, env.BLOCK_IN), name='ki') + ko = te.reduce_axis((0, channel // env.BLOCK_IN), name='ko') + ki = te.reduce_axis((0, env.BLOCK_IN), name='ki') - data = tvm.placeholder(data_shape, + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) - weight = tvm.placeholder(weight_shape, + weight = te.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype) - data_buf = tvm.compute(data_shape, + data_buf = te.compute(data_shape, lambda *i: data(*i), "data_buf") - weight_buf = tvm.compute(weight_shape, + weight_buf = te.compute(weight_shape, lambda *i: weight(*i), "weight_buf") - res_gem = tvm.compute(res_shape, - lambda bo, co, bi, ci: tvm.sum( + res_gem = te.compute(res_shape, + lambda bo, co, bi, ci: te.sum( data_buf[bo, ko, bi, ki].astype(env.acc_dtype) * weight_buf[co, ko, ci, ki].astype(env.acc_dtype), axis=[ko, ki]), name="res_gem") - res_shf = tvm.compute(res_shape, + res_shf = te.compute(res_shape, lambda *i: res_gem(*i)>>8, name="res_shf") - res_max = tvm.compute(res_shape, - lambda *i: tvm.max(res_shf(*i), 0), + res_max = te.compute(res_shape, + lambda *i: tvm.te.max(res_shf(*i), 0), "res_max") #relu - res_min = tvm.compute(res_shape, - lambda *i: tvm.min(res_max(*i), (1<<(env.INP_WIDTH-1))-1), + res_min = te.compute(res_shape, + lambda *i: tvm.te.min(res_max(*i), (1<<(env.INP_WIDTH-1))-1), "res_min") #relu - res = tvm.compute(res_shape, + res = te.compute(res_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name="res") @@ -128,7 +129,7 @@ def run_schedule(load_inp, store_out, print_ir, check_correctness): - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[data_buf].set_scope(env.inp_scope) s[weight_buf].set_scope(env.wgt_scope) s[res_gem].set_scope(env.acc_scope) diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_conv2d.py index 6935e4794c4e7..dcffed9993529 100644 --- a/vta/tests/python/integration/test_benchmark_topi_conv2d.py +++ b/vta/tests/python/integration/test_benchmark_topi_conv2d.py @@ -25,6 +25,7 @@ from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.contrib import util @@ -64,10 +65,10 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def run_conv2d(env, remote, wl, target, @@ -104,9 +105,9 @@ def run_conv2d(env, remote, wl, target, data_shape = a_shape kernel_shape = w_shape bias_shape = b_shape - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad)) # Define base computation schedule diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py b/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py index 2d96a7313480d..45a601742dce0 100644 --- a/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py +++ b/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py @@ -25,6 +25,7 @@ from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.contrib import util @@ -56,10 +57,10 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x # Helper function to get factors @@ -102,8 +103,8 @@ def run_conv2d_transpose(env, remote, wl, target, else: data_shape = a_shape kernel_shape = w_shape - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad)) # Define base computation schedule diff --git a/vta/tests/python/integration/test_benchmark_topi_dense.py b/vta/tests/python/integration/test_benchmark_topi_dense.py index a0acdc34acefd..f9451f5f77f00 100644 --- a/vta/tests/python/integration/test_benchmark_topi_dense.py +++ b/vta/tests/python/integration/test_benchmark_topi_dense.py @@ -24,6 +24,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.contrib import util from tvm.contrib.pickle_memoize import memoize @@ -38,10 +39,10 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def run_gemm(env, remote, target, @@ -70,8 +71,8 @@ def run_gemm(env, remote, target, kernel_shape = w_shape fcompute = topi.x86.dense_nopack fschedule = topi.x86.schedule_dense_nopack - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) # Define base computation schedule with target: diff --git a/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py index 31fef4923328c..3670eb4ec6738 100644 --- a/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py +++ b/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py @@ -25,6 +25,7 @@ from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.contrib import util @@ -60,10 +61,10 @@ @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def run_group_conv2d(env, remote, wl, target, @@ -101,9 +102,9 @@ def run_group_conv2d(env, remote, wl, target, data_shape = a_shape kernel_shape = w_shape bias_shape = b_shape - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad)) # Define base computation schedule diff --git a/vta/tests/python/pynq/test_program_rpc.py b/vta/tests/python/pynq/test_program_rpc.py index 2d8da5acc16a5..fb0873586d442 100644 --- a/vta/tests/python/pynq/test_program_rpc.py +++ b/vta/tests/python/pynq/test_program_rpc.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te from tvm import rpc from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime diff --git a/vta/tests/python/unittest/test_vta_insn.py b/vta/tests/python/unittest/test_vta_insn.py index ef3c45ce58d63..c76636a4d2420 100644 --- a/vta/tests/python/unittest/test_vta_insn.py +++ b/vta/tests/python/unittest/test_vta_insn.py @@ -16,6 +16,7 @@ # under the License. """Unit test VTA's instructions """ import tvm +from tvm import te import numpy as np import topi from tvm.contrib import util @@ -30,22 +31,22 @@ def test_save_load_out(): """Test save/store output command""" def _run(env, remote): n = 6 - x = tvm.placeholder( + x = te.placeholder( (n, n, env.BATCH, env.BLOCK_OUT), name="x", dtype=env.acc_dtype) - x_buf = tvm.compute( + x_buf = te.compute( (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: x(*i), "x_buf") # insert no-op that won't be optimized away - y_buf = tvm.compute( + y_buf = te.compute( (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: x_buf(*i)>>0, "y_buf") - y = tvm.compute( + y = te.compute( (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: y_buf(*i).astype(env.inp_dtype), "y") # schedule - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) s[x_buf].set_scope(env.acc_scope) s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy) s[y_buf].set_scope(env.acc_scope) @@ -93,22 +94,22 @@ def check_padded_load(pad_before, pad_after, test_name=None): # declare n = 3 m = 5 - x = tvm.placeholder( + x = te.placeholder( (n, m, env.BATCH, env.BLOCK_OUT), name="x", dtype=env.acc_dtype) x_buf = topi.nn.pad(x, pad_before, pad_after, name="y") # insert no-op that won't be optimized away - y_buf = tvm.compute((n + pad_before[0] + pad_after[0], + y_buf = te.compute((n + pad_before[0] + pad_after[0], m + pad_before[1] + pad_after[1], env.BATCH, env.BLOCK_OUT), lambda *i: x_buf(*i)>>0, "y_buf") - y = tvm.compute((n + pad_before[0] + pad_after[0], + y = te.compute((n + pad_before[0] + pad_after[0], m + pad_before[1] + pad_after[1], env.BATCH, env.BLOCK_OUT), lambda *i: y_buf(*i).astype(env.inp_dtype), "y") # schedule - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) s[x_buf].set_scope(env.acc_scope) s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy) s[y_buf].set_scope(env.acc_scope) @@ -167,32 +168,32 @@ def _run(env, remote): o = 4 n = 1 m = 4 - x = tvm.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="x", dtype=env.inp_dtype) - w = tvm.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="w", dtype=env.wgt_dtype) - x_buf = tvm.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), "x_buf") - w_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), "w_buf") - ko = tvm.reduce_axis((0, n), name="ko") - ki = tvm.reduce_axis((0, env.BLOCK_IN), name="ki") - y_gem = tvm.compute( + x = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="x", dtype=env.inp_dtype) + w = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="w", dtype=env.wgt_dtype) + x_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), "x_buf") + w_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), "w_buf") + ko = te.reduce_axis((0, n), name="ko") + ki = te.reduce_axis((0, env.BLOCK_IN), name="ki") + y_gem = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda bo, co, bi, ci: - tvm.sum(x_buf[bo, ko, bi, ki].astype(env.acc_dtype) * + te.sum(x_buf[bo, ko, bi, ki].astype(env.acc_dtype) * w_buf[co, ko, ci, ki].astype(env.acc_dtype), axis=[ko, ki]), name="y_gem") - y_shf = tvm.compute( + y_shf = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: y_gem(*i)>>8, name="y_shf") - y_max = tvm.compute( + y_max = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.max(y_shf(*i), 0), + lambda *i: tvm.te.max(y_shf(*i), 0), "y_max") #relu - y_min = tvm.compute( + y_min = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.min(y_max(*i), (1<<(env.INP_WIDTH-1))-1), + lambda *i: tvm.te.min(y_max(*i), (1<<(env.INP_WIDTH-1))-1), "y_min") #relu - y = tvm.compute( + y = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: y_min(*i).astype(env.inp_dtype), name="y") @@ -240,7 +241,7 @@ def verify(s, name=None): def test_schedule1(): # default schedule with no smt - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) # set the scope of the SRAM buffers s[x_buf].set_scope(env.inp_scope) s[w_buf].set_scope(env.wgt_scope) @@ -270,7 +271,7 @@ def test_schedule1(): def test_smt(): # test smt schedule - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) s[x_buf].set_scope(env.inp_scope) s[w_buf].set_scope(env.wgt_scope) s[y_gem].set_scope(env.acc_scope) @@ -279,7 +280,7 @@ def test_smt(): s[y_min].set_scope(env.acc_scope) abo, aco, abi, aci = s[y].op.axis abo1, abo2 = s[y].split(abo, nparts=2) - s[y].bind(abo1, tvm.thread_axis("cthread")) + s[y].bind(abo1, te.thread_axis("cthread")) s[y_gem].compute_at(s[y], abo1) s[y_shf].compute_at(s[y], abo1) s[y_max].compute_at(s[y], abo1) @@ -315,38 +316,38 @@ def check_alu(tvm_op, np_op=None, use_imm=False, test_name=None): n = 8 imm = np.random.randint(1,5) # compute - a = tvm.placeholder( + a = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="a", dtype=env.acc_dtype) - a_buf = tvm.compute( + a_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), "a_buf") #DRAM->SRAM if use_imm: - res_buf = tvm.compute( + res_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: tvm_op(a_buf(*i), imm), "res_buf") #compute else: - b = tvm.placeholder( + b = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="b", dtype=env.acc_dtype) - b_buf = tvm.compute( + b_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: b(*i), "b_buf") #DRAM->SRAM - res_buf = tvm.compute( + res_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: tvm_op(a_buf(*i), b_buf(*i)), "res_buf") #compute5B - res = tvm.compute( + res = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_buf(*i).astype(env.inp_dtype), "res") #SRAM->DRAM # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[a_buf].set_scope(env.acc_scope) # SRAM s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM s[res_buf].set_scope(env.acc_scope) # SRAM @@ -402,8 +403,8 @@ def check_alu(tvm_op, np_op=None, use_imm=False, test_name=None): print("\t{:<16}: {:>16}".format(k, v)) check_alu(lambda x, y: x << y, np.left_shift, use_imm=True, test_name="SHL") - check_alu(tvm.max, np.maximum, use_imm=True, test_name="MAX") - check_alu(tvm.max, np.maximum, test_name="MAX") + check_alu(tvm.te.max, np.maximum, use_imm=True, test_name="MAX") + check_alu(tvm.te.max, np.maximum, test_name="MAX") check_alu(lambda x, y: x + y, use_imm=True, test_name="ADD") check_alu(lambda x, y: x + y, test_name="ADD") check_alu(lambda x, y: x >> y, np.right_shift, use_imm=True, test_name="SHR") @@ -417,28 +418,28 @@ def _run(env, remote): m = 8 n = 10 # compute - a = tvm.placeholder( + a = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="a", dtype=env.acc_dtype) - a_buf = tvm.compute( + a_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), "a_buf") # DRAM->SRAM - max_buf = tvm.compute( + max_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.max(a_buf(*i), 0), + lambda *i: tvm.te.max(a_buf(*i), 0), "res_buf") # relu - min_buf = tvm.compute( + min_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.min(max_buf(*i), (1<<(env.INP_WIDTH-1))-1), + lambda *i: tvm.te.min(max_buf(*i), (1<<(env.INP_WIDTH-1))-1), "max_buf") # relu - res = tvm.compute( + res = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: min_buf(*i).astype(env.inp_dtype), "min_buf") # SRAM->DRAM # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[a_buf].set_scope(env.acc_scope) # SRAM s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM s[max_buf].set_scope(env.acc_scope) # SRAM @@ -488,27 +489,27 @@ def _run(env, remote): imm_shift = np.random.randint(0,8) imm_scale = np.random.randint(1,5) # compute - a = tvm.placeholder( + a = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="a", dtype=env.acc_dtype) - a_buf = tvm.compute( + a_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), "a_buf") # DRAM->SRAM - res_shift = tvm.compute( + res_shift = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a_buf(*i)+imm_shift, "res_shift") # compute - res_scale = tvm.compute( + res_scale = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_shift(*i)>>imm_scale, "res_scale") # compute - res = tvm.compute( + res = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_scale(*i).astype(env.inp_dtype), "res") # SRAM->DRAM # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[a_buf].set_scope(env.acc_scope) # SRAM s[res_shift].set_scope(env.acc_scope) # SRAM s[res_scale].set_scope(env.acc_scope) # SRAM diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py index a20b8ec8d3d30..0aa6343d01c4d 100644 --- a/vta/tutorials/autotvm/tune_relay_vta.py +++ b/vta/tutorials/autotvm/tune_relay_vta.py @@ -60,6 +60,7 @@ import topi import tvm +from tvm import te from tvm import rpc, autotvm, relay from tvm.contrib import graph_runtime, util, download from tvm.autotvm.measure.measure_methods import request_remote @@ -300,10 +301,10 @@ def register_vta_tuning_tasks(): @tvm.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x # init autotvm env to register VTA operator @@ -323,7 +324,7 @@ def _topi_nn_conv2d(*args, **kwargs): if tvm.target.Target.current().device_name == 'vta': s = vta.top.schedule_conv2d_packed([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [A, W, res] diff --git a/vta/tutorials/frontend/deploy_vision_on_vta.py b/vta/tutorials/frontend/deploy_vision_on_vta.py index df02b4842488e..15cba4373056d 100644 --- a/vta/tutorials/frontend/deploy_vision_on_vta.py +++ b/vta/tutorials/frontend/deploy_vision_on_vta.py @@ -50,6 +50,7 @@ from matplotlib import pyplot as plt import tvm +from tvm import te from tvm import rpc, autotvm, relay from tvm.contrib import graph_runtime, util, download from tvm.contrib.debugger import debug_runtime diff --git a/vta/tutorials/matrix_multiply.py b/vta/tutorials/matrix_multiply.py index 3e46b427baf69..444762684bb9b 100644 --- a/vta/tutorials/matrix_multiply.py +++ b/vta/tutorials/matrix_multiply.py @@ -36,6 +36,7 @@ import os import tvm +from tvm import te import vta import numpy as np from tvm import rpc @@ -167,13 +168,13 @@ # Batch factor o (we use single batch inference) o = 1 # A placeholder tensor in tiled data format -A = tvm.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="A", dtype=env.inp_dtype) +A = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="A", dtype=env.inp_dtype) # B placeholder tensor in tiled data format -B = tvm.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="B", dtype=env.wgt_dtype) +B = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="B", dtype=env.wgt_dtype) # A copy buffer -A_buf = tvm.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: A(*i), "A_buf") +A_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: A(*i), "A_buf") # B copy buffer -B_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_buf") +B_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_buf") ###################################################################### # Matrix Multiplication @@ -186,8 +187,8 @@ # In order to implement matrix multiplication, the lambda function needs to # include a reduction formula over the input channel dimension axes. # To create a reduction formula, we can declare a reduction axis using -# :code:`tvm.reduce_axis`, which takes in the range of reductions. -# :code:`tvm.sum` takes in the expression to be reduced as well as +# :code:`te.reduce_axis`, which takes in the range of reductions. +# :code:`te.sum` takes in the expression to be reduced as well as # the reduction axes to compute the sum of value over all k in the declared # ranges. # @@ -198,14 +199,14 @@ # the computation should be done. # Outer input feature reduction axis -ko = tvm.reduce_axis((0, n), name="ko") +ko = te.reduce_axis((0, n), name="ko") # Inner input feature reduction axis -ki = tvm.reduce_axis((0, env.BLOCK_IN), name="ki") +ki = te.reduce_axis((0, env.BLOCK_IN), name="ki") # Describe the in-VTA matrix multiplication -C_buf = tvm.compute( +C_buf = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda bo, co, bi, ci: - tvm.sum(A_buf[bo, ko, bi, ki].astype(env.acc_dtype) * + te.sum(A_buf[bo, ko, bi, ki].astype(env.acc_dtype) * B_buf[co, ko, ci, ki].astype(env.acc_dtype), axis=[ko, ki]), name="C_buf") @@ -234,7 +235,7 @@ # input activation data format. # Cast to output type, and send to main memory -C = tvm.compute( +C = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: C_buf(*i).astype(env.inp_dtype), name="C") @@ -265,7 +266,7 @@ # :code:`C` in the following way: # Let's take a look at the generated schedule -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) ###################################################################### diff --git a/vta/tutorials/optimize/convolution_opt.py b/vta/tutorials/optimize/convolution_opt.py index e5cf8e595b159..2616fb28c89a9 100644 --- a/vta/tutorials/optimize/convolution_opt.py +++ b/vta/tutorials/optimize/convolution_opt.py @@ -39,6 +39,7 @@ import os import tvm +from tvm import te import vta import numpy as np @@ -167,16 +168,16 @@ env.BLOCK_OUT) # Convolution reduction axes -dy = tvm.reduce_axis((0, kernel_h), name='dy') -dx = tvm.reduce_axis((0, kernel_w), name='dx') -ic = tvm.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') -ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns') +dy = te.reduce_axis((0, kernel_h), name='dy') +dx = te.reduce_axis((0, kernel_w), name='dx') +ic = te.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') +ic_tns = te.reduce_axis((0, env.BLOCK_IN), name='ic_tns') # Input placeholder tensors -data = tvm.placeholder(data_shape, +data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) -kernel = tvm.placeholder(kernel_shape, +kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) @@ -185,33 +186,33 @@ data_buf = topi.nn.pad(data, [0, 0, pad_h, pad_w, 0, 0], name="data_buf") -kernel_buf = tvm.compute(kernel_shape, lambda *i: kernel(*i), "kernel_buf") +kernel_buf = te.compute(kernel_shape, lambda *i: kernel(*i), "kernel_buf") # Declare 2D convolution -res_conv = tvm.compute( +res_conv = te.compute( output_shape, - lambda bo, co, i, j, bi, ci: tvm.sum( + lambda bo, co, i, j, bi, ci: te.sum( data_buf[bo, ic, i*stride_h+dy, j*stride_w+dx, bi, ic_tns].astype(env.acc_dtype) * kernel_buf[co, ic, dy, dx, ci, ic_tns].astype(env.acc_dtype), axis=[ic, dy, dx, ic_tns]), name="res_conv") # Add shift stage for fix-point normalization -res_shr = tvm.compute(output_shape, +res_shr = te.compute(output_shape, lambda *i: res_conv(*i) >> 8, name="res_shr") # Apply clipping between (0, input max value) inp_max = (1 << (env.INP_WIDTH - 1)) - 1 -res_max = tvm.compute(output_shape, - lambda *i: tvm.max(res_shr(*i), 0), +res_max = te.compute(output_shape, + lambda *i: tvm.te.max(res_shr(*i), 0), "res_max") -res_min = tvm.compute(output_shape, - lambda *i: tvm.min(res_max(*i), inp_max), +res_min = te.compute(output_shape, + lambda *i: tvm.te.min(res_max(*i), inp_max), "res_min") # Result Tensor -res = tvm.compute(output_shape, +res = te.compute(output_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name="res") @@ -228,7 +229,7 @@ # - Lowering to VTA hardware intrinsics # Create TVM schedule -s = tvm.create_schedule(res.op) +s = te.create_schedule(res.op) # Let's look at the default TVM schedule print(tvm.lower(s, [data, kernel, res], simple_mode=True)) @@ -306,7 +307,7 @@ # Perform virtual thread split along output channel outer axis _, tx = s[res].split(oc_out, factor=v_threads) s[res].reorder(tx, b_out) -s[res].bind(tx, tvm.thread_axis("cthread")) +s[res].bind(tx, te.thread_axis("cthread")) # Let's look at the current TVM schedule after blocking and virtual threading print(tvm.lower(s, [data, kernel, res], simple_mode=True)) diff --git a/vta/tutorials/optimize/matrix_multiply_opt.py b/vta/tutorials/optimize/matrix_multiply_opt.py index 2722af594c035..597a7e8ecf7f8 100644 --- a/vta/tutorials/optimize/matrix_multiply_opt.py +++ b/vta/tutorials/optimize/matrix_multiply_opt.py @@ -39,6 +39,7 @@ import os import tvm +from tvm import te import vta import numpy as np from tvm import rpc @@ -119,45 +120,45 @@ num_ops = in_channels * out_channels * batch_size * 2 # Reduction axes -ic = tvm.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') -ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns') +ic = te.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') +ic_tns = te.reduce_axis((0, env.BLOCK_IN), name='ic_tns') # Input placeholder tensors -data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) -weight = tvm.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype) +data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) +weight = te.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype) # Copy buffers -data_buf = tvm.compute(data_shape, +data_buf = te.compute(data_shape, lambda *i: data(*i), "data_buf") -weight_buf = tvm.compute(weight_shape, +weight_buf = te.compute(weight_shape, lambda *i: weight(*i), "weight_buf") # Declare matrix multiply computation -res_gemm = tvm.compute(output_shape, - lambda bo, co, bi, ci: tvm.sum( +res_gemm = te.compute(output_shape, + lambda bo, co, bi, ci: te.sum( data_buf[bo, ic, bi, ic_tns].astype(env.acc_dtype) * weight_buf[co, ic, ci, ic_tns].astype(env.acc_dtype), axis=[ic, ic_tns]), name="res_gem") # Add shift stage for fix-point normalization -res_shr = tvm.compute(output_shape, +res_shr = te.compute(output_shape, lambda *i: res_gemm(*i) >> env.INP_WIDTH, name="res_shr") # Apply clipping between (0, input max value) inp_max = (1<<(env.INP_WIDTH-1))-1 -res_max = tvm.compute(output_shape, - lambda *i: tvm.max(res_shr(*i), 0), +res_max = te.compute(output_shape, + lambda *i: tvm.te.max(res_shr(*i), 0), "res_max") -res_min = tvm.compute(output_shape, - lambda *i: tvm.min(res_max(*i), inp_max), +res_min = te.compute(output_shape, + lambda *i: tvm.te.min(res_max(*i), inp_max), "res_min") # Apply typecast to input data type before sending results back -res = tvm.compute(output_shape, +res = te.compute(output_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name="res") @@ -173,7 +174,7 @@ # Create TVM schedule -s = tvm.create_schedule(res.op) +s = te.create_schedule(res.op) # Let's look at the default TVM schedule print(tvm.lower(s, [data, weight, res], simple_mode=True)) diff --git a/vta/tutorials/vta_get_started.py b/vta/tutorials/vta_get_started.py index dd305154a7333..3dd1f8c8753a1 100644 --- a/vta/tutorials/vta_get_started.py +++ b/vta/tutorials/vta_get_started.py @@ -36,6 +36,7 @@ import os import tvm +from tvm import te import vta import numpy as np @@ -137,9 +138,9 @@ # Batch factor o - total 1 x 1 = 1 o = 1 # A placeholder tensor in tiled data format -A = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="A", dtype=env.acc_dtype) +A = te.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="A", dtype=env.acc_dtype) # B placeholder tensor in tiled data format -B = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dtype) +B = te.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dtype) ###################################################################### # Copy Buffers @@ -158,9 +159,9 @@ # This can later be interpreted by the compiler as a cached read operation. # A copy buffer -A_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A(*i), "A_buf") +A_buf = te.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A(*i), "A_buf") # B copy buffer -B_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf") +B_buf = te.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf") ###################################################################### # Vector Addition @@ -174,7 +175,7 @@ # the computation should be done. # Describe the in-VTA vector addition -C_buf = tvm.compute( +C_buf = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A_buf(*i).astype(env.acc_dtype) + B_buf(*i).astype(env.acc_dtype), name="C_buf") @@ -199,7 +200,7 @@ # input activation data format. # Cast to output type, and send to main memory -C = tvm.compute( +C = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: C_buf(*i).astype(env.inp_dtype), name="C") @@ -231,7 +232,7 @@ # :code:`C` in the following way: # Let's take a look at the generated schedule -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True))