diff --git a/python/tvm/relay/adt.py b/python/tvm/relay/adt.py index 0b1edc96d52d8..372eb34c903d7 100644 --- a/python/tvm/relay/adt.py +++ b/python/tvm/relay/adt.py @@ -186,7 +186,7 @@ def __init__(self, lhs, rhs): class Match(Expr): """Pattern matching expression in Relay.""" - def __init__(self, data, clauses, complete=True): + def __init__(self, data, clauses, complete=False): """Construct a Match. Parameters diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index bbc0fec67bf66..ed7182ae5b23d 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -24,7 +24,13 @@ # Numpy support import numpy as np +import pdb + import tvm + +from tvm.relay.prelude import Prelude +from topi.util import get_const_tuple + from .. import analysis from .. import expr as _expr from .. import op as _op @@ -506,6 +512,69 @@ def _impl(inputs, attr, params): return _op.concatenate(inputs_reshaped, axis) return _impl +def _tensor_array(): + def _impl(inputs, attr, params, prelude): + return prelude.tensor_array(_op.take(inputs[0], tvm.relay.const(0))) + return _impl + +def _tensor_array_scatter(): + def _impl(inputs, attr, params, prelude): + values = None + import pdb + # pdb.set_trace() + if len(inputs[2].type_annotation.shape) == 1: + pass + elif len(inputs[2].type_annotation.shape) == 2: + values = prelude.tensor_array_unstack_tensor2(inputs[2]) + + return prelude.tensor_array_scatter(inputs[0], inputs[1], values) + return _impl + +def _tensor_array_gather(): + def _impl(inputs, attr, params, prelude): + return prelude.tensor_array_gather(inputs[2], inputs[1]) + return _impl + +def _tensor_array_size(): + def _impl(inputs, attr, params, prelude): + return prelude.tensor_array_size(inputs[0]) + return _impl + +def _tensor_array_write(): + def _impl(inputs, attr, params, prelude): + import pdb + # pdb.set_trace() + if len(inputs[2].type_annotation.shape) == 2: + v = prelude.tensor2(inputs[2]) + elif len(inputs[2].type_annotation.shape) == 3: + v = prelude.tensor3(inputs[2]) + return prelude.tensor_array_write(inputs[3], _op.take(inputs[1], tvm.relay.const(0)), v) + return _impl + +def _tensor_array_read(): + def _impl(inputs, attr, params, prelude): + import pdb + # pdb.set_trace() + return prelude.tensor_array_read(inputs[2], _op.take(inputs[1], tvm.relay.const(0))) + return _impl + +def _tensor_array_split(): + def _impl(inputs, attr, params, prelude): + import pdb + if len(inputs[1].type_annotation.shape) == 2: + v = prelude.tensor2(inputs[1]) + elif len(inputs[1].type_annotation.shape) == 3: + v = prelude.tensor3(inputs[1]) + # pdb.set_trace() + lengths = _op.cast(inputs[2], 'int32') + return prelude.tensor_array_split(inputs[0], v, lengths) + return _impl + +def _tensor_array_concat(): + def _impl(inputs, attr, params, prelude): + return prelude.tensor_array_concat(inputs[1]) + return _impl + def _tile(): def _impl(inputs, attr, params): reps = params[inputs.pop().name_hint].asnumpy() @@ -968,6 +1037,7 @@ def _impl(inputs, attr, params): def _range(): def _impl(inputs, attr, params): + pdb.set_trace() start = params.pop(inputs[0].name_hint).asnumpy()[0] limit = params.pop(inputs[1].name_hint).asnumpy()[0] \ if hasattr(inputs[1], "name_hint") else params.pop('Rank').asnumpy()[0] @@ -1285,6 +1355,14 @@ def _impl(inputs, attr, params): 'Neg' : AttrCvt('negative'), 'NotEqual' : _broadcast('not_equal'), 'Pack' : _pack(), + 'TensorArrayV3' : _tensor_array(), + 'TensorArrayScatterV3' : _tensor_array_scatter(), + 'TensorArrayGatherV3' : _tensor_array_gather(), + 'TensorArraySizeV3' : _tensor_array_size(), + 'TensorArrayWriteV3' : _tensor_array_write(), + 'TensorArrayReadV3' : _tensor_array_read(), + 'TensorArraySplitV3' : _tensor_array_split(), + 'TensorArrayConcatV3' : _tensor_array_concat(), 'Pad' : _pad('Pad'), 'PadV2' : _pad('PadV2'), 'Pow' : _elemwise('power'), @@ -1830,6 +1908,7 @@ def __init__(self): self._loops = {} self._branches = {} self._mod = _module.Module({}) + self._prelude = Prelude(self._mod) def from_tensorflow(self, graph, layout="NHWC", shape=None, outputs=None): """Construct relay nodes from tensorflow graph definition - GraphDef. @@ -2306,7 +2385,11 @@ def _convert_operator(self, op_name, inputs, attrs, if op_name in identity_list: sym = get_relay_op(op_name)(*inputs, **attrs) elif op_name in convert_map: - sym = convert_map[op_name](inputs, attrs, self._params) + if 'TensorArray' in op_name: + sym = convert_map[op_name](inputs, attrs, self._params, self._prelude) + else: + sym = convert_map[op_name](inputs, attrs, self._params) + elif op_name in convert_map_rnn: sym = self._convert_rnn_operator(op_name, inputs, attrs, self._params, graph, diff --git a/python/tvm/relay/op/_tensor.py b/python/tvm/relay/op/_tensor.py index 176def347042c..3832196f26a64 100644 --- a/python/tvm/relay/op/_tensor.py +++ b/python/tvm/relay/op/_tensor.py @@ -20,6 +20,8 @@ import topi from .op import register_compute, register_schedule, register_pattern from .op import schedule_injective, OpPattern +from ...hybrid import script +from ...api import convert schedule_broadcast = schedule_injective schedule_elemwise = schedule_injective @@ -104,3 +106,75 @@ def clip_compute(attrs, inputs, output_type, target): return [topi.clip(inputs[0], attrs.a_min, attrs.a_max)] register_schedule("clip", schedule_elemwise) + +@script +def _cast_shape_function(x): + out_ndim = len(x) + out = output_tensor((out_ndim,), "int64") + for i in const_range(out_ndim): + out[i] = x[i] + return out + +def cast_shape_func(attrs, inputs, out_ndims): + return [_cast_shape_function(*inputs)] + +@script +def _expand_dims_shape_func(x): + ndim = len(x.shape) + out = output_tensor((ndim+1,), "int64") + out[0] = int64(1) + for i in const_range(0, ndim): + out[i+1] = int64(x.shape[i]) + return out + +def expand_dims_shape_func(attrs, inputs, out_ndims): + return [_expand_dims_shape_func(*inputs)] + +# shape func +@script +def _broadcast_shape_func(x, y, ndim): + out = output_tensor((ndim,), "int64") + if len(x.shape) == 0: + for i in const_range(ndim): + out[i] = y[i] + elif len(y.shape) == 0: + for i in const_range(ndim): + out[i] = x[i] + else: + ndim1 = x.shape[0] + ndim2 = y.shape[0] + for i in const_range(1, min(ndim1, ndim2)+1): + if x[ndim1-i] == y[ndim2-i]: + out[ndim-i] = x[ndim1-i] + elif x[ndim1-i] == 1: + out[ndim-i] = y[ndim2-i] + else: + assert y[ndim2 - i] == 1, "Incompatible broadcast type %s and %s" % ( + x[ndim1-i], y[ndim2-i]) + out[ndim-i] = x[ndim1-i] + for i in const_range(min(ndim1, ndim2)+1, ndim+1): + if ndim1 >= ndim2: + out[ndim-i] = x[ndim1-i] + else: + out[ndim-i] = y[ndim2-i] + return out + +def broadcast_shape_func(attrs, inputs, out_ndims): + return [_broadcast_shape_func(*inputs, out_ndims[0])] + +register_shape_func("expand_dims", False, expand_dims_shape_func) +register_shape_func("cast", False, cast_shape_func) + +register_shape_func("add", False, broadcast_shape_func) +register_shape_func("subtract", False, broadcast_shape_func) +register_shape_func("multiply", False, broadcast_shape_func) +register_shape_func("divide", False, broadcast_shape_func) +register_shape_func("mod", False, broadcast_shape_func) +register_shape_func("logical_and", False, broadcast_shape_func) +register_shape_func("logical_or", False, broadcast_shape_func) +register_shape_func("equal", False, broadcast_shape_func) +register_shape_func("not_equal", False, broadcast_shape_func) +register_shape_func("less", False, broadcast_shape_func) +register_shape_func("less_equal", False, broadcast_shape_func) +register_shape_func("greater", False, broadcast_shape_func) +register_shape_func("greater_equal", False, broadcast_shape_func) diff --git a/python/tvm/relay/prelude.py b/python/tvm/relay/prelude.py index b5eac75180cf4..0bc5e848a7863 100644 --- a/python/tvm/relay/prelude.py +++ b/python/tvm/relay/prelude.py @@ -17,11 +17,12 @@ # pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name """A prelude containing useful global functions and ADT definitions.""" import os -from .ty import GlobalTypeVar, TypeVar, FuncType, TupleType, scalar_type +from .ty import GlobalTypeVar, TensorType, Any, TypeVar, FuncType, TupleType, scalar_type from .expr import Var, Function, GlobalVar, Let, If, Tuple, TupleGetItem, const from .op.tensor import add, subtract, equal from .adt import Constructor, TypeData, Clause, Match from .adt import PatternConstructor, PatternVar, PatternWildcard +from . import op from .parser import fromtext __PRELUDE_PATH__ = os.path.dirname(os.path.realpath(__file__)) from .module import Module @@ -29,6 +30,51 @@ class Prelude: """Contains standard definitions.""" + def define_tensor_adt(self): + """dynamic tensor + """ + self.tensor_t = GlobalTypeVar("tensor_t") + tensor0_type = TensorType([]) + tensor1_type = TensorType([Any()]) + tensor2_type = TensorType([Any(), Any()]) + tensor3_type = TensorType([Any(), Any(), Any()]) + self.tensor_nil = Constructor("tensor_nil", [], self.tensor_t) + self.tensor0 = Constructor("tensor0", [tensor0_type], self.tensor_t) + self.tensor1 = Constructor("tensor1", [tensor1_type], self.tensor_t) + self.tensor2 = Constructor("tensor2", [tensor2_type], self.tensor_t) + self.tensor3 = Constructor("tensor3", [tensor3_type], self.tensor_t) + self.mod[self.tensor_t] = TypeData(self.tensor_t, [], [self.tensor_nil, self.tensor0, self.tensor1, self.tensor2]) + + def define_tensor_add_one(self): + self.tensor_add_one = GlobalVar("tensor_add_one") + x = Var("x", self.tensor_t()) + t0 = Var("t0") + t1 = Var("t1") + t2 = Var("t2") + tensor0_case = Clause(PatternConstructor(self.tensor0, [PatternVar(t0)]), self.tensor1(op.expand_dims(t0, 0, 1))) + tensor1_case = Clause(PatternConstructor(self.tensor1, [PatternVar(t1)]), self.tensor2(op.expand_dims(t1, 0, 1))) + tensor2_case = Clause(PatternConstructor(self.tensor2, [PatternVar(t2)]), self.tensor3(op.expand_dims(t2, 0, 1))) + self.mod[self.tensor_add_one] = Function([x], Match(x, [tensor0_case, tensor1_case, tensor2_case])) + + def define_tensor_concat(self): + self.tensor_concatenate = GlobalVar("tensor_concatenate") + x = Var("x", self.tensor_t()) + y = Var("y", self.tensor_t()) + + t11 = Var("t11") + t12 = Var("t12") + t21 = Var("t21") + t22 = Var("t22") + tensor1_case = Clause(PatternConstructor(self.tensor1, [PatternVar(t11)]), + Match(y, [Clause(PatternConstructor(self.tensor1, [PatternVar(t12)]), + self.tensor1(op.concatenate([t11, t12], axis=0)) )] + )) + tensor2_case = Clause(PatternConstructor(self.tensor2, [PatternVar(t21)]), + Match(y, [Clause(PatternConstructor(self.tensor2, [PatternVar(t22)]), + self.tensor2(op.concatenate([t21, t22], axis=0)) )] + )) + self.mod[self.tensor_concatenate] = Function([x, y], Match(x, [tensor1_case, tensor2_case])) + def define_list_adt(self): """Defines a LISP-style list ADT. An empty list is represented by nil(). A member x can be appended to the @@ -39,6 +85,215 @@ def define_list_adt(self): self.cons = Constructor("cons", [a, self.l(a)], self.l) self.mod[self.l] = TypeData(self.l, [a], [self.nil, self.cons]) + def define_tensor_array(self): + self.tensor_array = GlobalVar("tensor_array") + n = Var("x", scalar_type('int32')) + body = If(equal(n, const(0)), + self.nil(), + self.cons(self.tensor_nil(), self.tensor_array(subtract(n, const(1))))) + self.mod[self.tensor_array] = Function([n], body, self.l(self.tensor_t()), []) + + def define_tensor_array_read(self): + self.tensor_array_read = GlobalVar("tensor_array_read") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + n = Var("x", scalar_type('int32')) + self.mod[self.tensor_array_read] = Function([tensor_array, n], self.nth(tensor_array, n), self.tensor_t(), []) + + def define_tensor_array_size(self): + self.tensor_array_size = GlobalVar("tensor_array_size") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + self.mod[self.tensor_array_size] = Function([tensor_array], self.length(tensor_array), scalar_type('int32'), []) + + def define_tensor_array_write(self): + self.tensor_array_write = GlobalVar("tensor_array_write") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + n = Var("x", scalar_type('int32')) + v = Var("v", self.tensor_t()) + self.mod[self.tensor_array_write] = Function([tensor_array, n, v], self.update(tensor_array, n, v), self.l(self.tensor_t()), []) + + def define_tensor_array_stack(self): + self.tensor_array_stack = GlobalVar("tensor_array_stack") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + tensor_array_add_one = self.map(self.tensor_add_one, tensor_array) + tensors = self.foldl(self.tensor_concatenate, self.hd(tensor_array_add_one), self.tl(tensor_array_add_one)) + self.mod[self.tensor_array_stack] = Function([tensor_array], tensors, self.tensor_t(), []) + + def define_tensor_array_unstack_tensor1(self): + self.tensor_array_unstack_tensor1_helper = GlobalVar("tensor_array_unstack_tensor1_helper") + tensor = Var("t", TensorType([Any()])) + up = Var("up", scalar_type('int32')) + i = Var("i", scalar_type('int32')) + + helper_body = If(equal(i, up), self.nil(), self.cons(self.tensor0(op.take(tensor, i)), + self.tensor_array_unstack_tensor1_helper(add(i, const(1)), up, tensor)) + ) + self.mod[self.tensor_array_unstack_tensor1_helper] = Function([i, up, tensor], helper_body, self.l(self.tensor_t()), []) + + self.tensor_array_unstack_tensor1 = GlobalVar("tensor_array_unstack_tensor1") + tensor1 = Var("tensor", TensorType([Any()])) + shape = op.shape_of(tensor1) + ndim = op.take(shape, const(0)) + self.mod[self.tensor_array_unstack_tensor1] = Function([tensor1], self.tensor_array_unstack_tensor1_helper(const(0), ndim, tensor1), + self.l(self.tensor_t()), []) + + def define_tensor_array_unstack_tensor2(self): + self.tensor_array_unstack_tensor2_helper = GlobalVar("tensor_array_unstack_tensor2_helper") + tensor = Var("t", TensorType([Any(), Any()])) + up = Var("up", scalar_type('int32')) + i = Var("i", scalar_type('int32')) + + helper_body = If(equal(i, up), + self.nil(), + self.cons(self.tensor1(op.take(tensor, i, axis=0)), + self.tensor_array_unstack_tensor2_helper( + add(i, const(1)), up, tensor))) + self.mod[self.tensor_array_unstack_tensor2_helper] = Function([i, up, tensor], helper_body, self.l(self.tensor_t()), []) + + self.tensor_array_unstack_tensor2 = GlobalVar("tensor_array_unstack_tensor2") + tensor2 = Var("tensor", TensorType([Any(), Any()])) + shape = op.shape_of(tensor2) + ndim = op.take(shape, const(0)) + self.mod[self.tensor_array_unstack_tensor2] = Function([tensor2], self.tensor_array_unstack_tensor2_helper(const(0), ndim, tensor2), + self.l(self.tensor_t()), []) + + print(self.mod[self.tensor_array_unstack_tensor2].astext()) + print(self.mod[self.tensor_array_unstack_tensor2_helper].astext()) + + def define_tensor_array_scatter(self): + self.tensor_array_scatter_helper = GlobalVar("tensor_array_scatter_helper") + ta = Var("ta", self.l(self.tensor_t())) + current = Var("current", scalar_type('int32')) + limit = Var("limit", scalar_type('int32')) + indices_ = Var('indices_', TensorType([Any()], 'int32')) + values_ = Var('values_', self.l(self.tensor_t())) + + helper_body = If(equal(current, limit), + ta, + self.tensor_array_scatter_helper( + self.tensor_array_write(ta, op.take(indices_, current), self.tensor_array_read(values_, current)), + add(current, const(1)), + limit, indices_, values_)) + + self.mod[self.tensor_array_scatter_helper] = Function([ta, current, limit, indices_, values_], helper_body, self.l(self.tensor_t()), []) + + self.tensor_array_scatter = GlobalVar("tensor_array_scatter") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + indices = Var('indices', TensorType([Any()], 'int32')) + values = Var('values', self.l(self.tensor_t())) + + indices_shape = op.shape_of(indices) + limit = op.take(indices_shape, const(0)) + body = self.tensor_array_scatter_helper(tensor_array, const(0), limit, indices, values) + self.mod[self.tensor_array_scatter] = Function([tensor_array, indices, values], body, self.l(self.tensor_t()), []) + + def define_tensor_array_gather(self): + self.tensor_array_gather_helper = GlobalVar("tensor_array_gather_helper") + ta = Var("ta", self.l(self.tensor_t())) + accu = Var("accu", self.l(self.tensor_t())) + current = Var("current", scalar_type('int32')) + limit = Var("limit", scalar_type('int32')) + indices_ = Var('indices_', TensorType([Any()], 'int32')) + + helper_body = If(equal(current, const(0)), + self.tensor_array_stack(accu), + self.tensor_array_gather_helper( + ta, + self.cons(self.tensor_array_read(ta, op.take(indices_, subtract(current, const(1)))), accu), + subtract(current, const(1)), + limit, indices_)) + + self.mod[self.tensor_array_gather_helper] = \ + Function([ta, accu, current, limit, indices_], helper_body, self.tensor_t(), []) + + self.tensor_array_gather = GlobalVar("tensor_array_gather") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + indices = Var('indices', TensorType([Any()], 'int32')) + + indices_shape = op.shape_of(indices) + limit = op.take(indices_shape, const(0)) + body = self.tensor_array_gather_helper(tensor_array, self.nil(), limit, limit, indices) + self.mod[self.tensor_array_gather] = Function([tensor_array, indices], body, self.tensor_t(), []) + + def define_tensor_take(self): + self.tensor_take = GlobalVar('tensor_take') + t = Var('tensor', self.tensor_t()) + lower = Var('lower', scalar_type('int32')) + upper = Var('upper', scalar_type('int32')) + + t1 = Var('t1') + t2 = Var('t2') + t3 = Var('t3') + tensor1_case = Clause(PatternConstructor(self.tensor1, [PatternVar(t1)]), self.tensor1(op.take(t1, op.arange(lower, upper, dtype='int32'))) + ) + tensor2_case = Clause(PatternConstructor(self.tensor2, [PatternVar(t2)]), self.tensor2(op.take(t2, op.arange(lower, upper, dtype='int32'), axis=0)) + ) + tensor3_case = Clause(PatternConstructor(self.tensor3, [PatternVar(t3)]), self.tensor3(op.take(t3, op.arange(lower, upper, dtype='int32'), axis=0)) + ) + self.mod[self.tensor_take] = Function([t, lower, upper], Match(t, [tensor1_case, tensor2_case, tensor3_case]), self.tensor_t(), []) + + def define_tensor_array_split(self): + self.tensor_array_split_helper = GlobalVar('ta_split_helper') + ta1 = Var("tensor_array", self.l(self.tensor_t())) + value1 = Var('value1', self.tensor_t()) + offset1 = Var('offset1', scalar_type('int32')) + current1 = Var('current1', scalar_type('int32')) + limit1 = Var('limit1', scalar_type('int32')) + lengths1 = Var('lengths', TensorType([Any()], 'int32')) + + helper1_body = If(equal(current1, limit1), + ta1, + self.tensor_array_write( + self.tensor_array_split_helper( + ta1, + value1, + add(offset1, op.take(lengths1, current1)), + add(current1, const(1)), + limit1, + lengths1 + ), + current1, + self.tensor_take(value1, + offset1, + add( + op.take(lengths1, current1), + offset1) + ))) + self.mod[self.tensor_array_split_helper] = \ + Function([ta1, value1, offset1, current1, limit1, lengths1], helper1_body, self.l(self.tensor_t()), []) + + + self.tensor_array_split = GlobalVar("tensor_array_split") + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + value = Var('value', self.tensor_t()) + lengths = Var('lengths', TensorType([Any()], 'int32')) + + lengths_shape = op.shape_of(lengths) + lengths_limit = op.take(lengths_shape, const(0)) + body = self.tensor_array_split_helper( + tensor_array, + value, + const(0), + const(0), + lengths_limit, + lengths) + self.mod[self.tensor_array_split] = Function([tensor_array, value, lengths], body, self.l(self.tensor_t()), []) + + def define_tensor_array_concat(self): + self.tensor_array_concat = GlobalVar('tensor_array_concat') + tensor_array = Var("tensor_array", self.l(self.tensor_t())) + hd = Var("hd") + tl = Var("tl") + + nil_case = Clause(PatternConstructor(self.nil), self.tensor_nil()) + cons_case = Clause(PatternConstructor(self.cons, [PatternVar(hd), PatternVar(tl)]), + Match(tl, [ + Clause(PatternConstructor(self.nil), hd), + Clause(PatternWildcard(), self.tensor_concatenate(hd, self.tensor_array_concat(tl))) + ], False) + ) + + self.mod[self.tensor_array_concat] = Function([tensor_array], Match(tensor_array, [nil_case, cons_case], False), self.tensor_t(), []) + def define_list_hd(self): """Defines a function to get the head of a list. Assume the list has at least one element. @@ -519,3 +774,19 @@ def __init__(self, mod=None): self.define_tree_size() self.define_iterate() + + self.define_tensor_adt() + self.define_tensor_take() + self.define_tensor_add_one() + self.define_tensor_concat() + self.define_tensor_array() + self.define_tensor_array_read() + self.define_tensor_array_size() + self.define_tensor_array_write() + self.define_tensor_array_stack() + self.define_tensor_array_unstack_tensor1() + self.define_tensor_array_unstack_tensor2() + self.define_tensor_array_scatter() + self.define_tensor_array_gather() + self.define_tensor_array_split() + self.define_tensor_array_concat() diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index c1622338174df..973e27dd26cca 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -429,6 +429,7 @@ Array lower(Schedule sch, const BuildConfig& config) { Array out_arg_list; auto stmt = BuildStmt(sch, args, binds, true, &out_arg_list, config); + std::cout << "lower func:" << stmt << "\n"; return Array({ ir::MakeAPI(stmt, name, out_arg_list, 0, config->restricted_func) }); } diff --git a/src/relay/backend/interpreter.cc b/src/relay/backend/interpreter.cc index 913d7addea4d2..afbdd7d8f0953 100644 --- a/src/relay/backend/interpreter.cc +++ b/src/relay/backend/interpreter.cc @@ -310,7 +310,126 @@ class Interpreter : return MakeClosure(func); } - Value InvokePrimitiveOp(Function func, + Array ComputeDynamicShape(const Function& func, + const Array& args) { + + std::cout << "Computing Shape " << func << "\n"; + auto key = CCacheKeyNode::make(func, Target::Create("llvm")); + auto cfunc = engine_->LowerShapeFunc(key); + size_t arity = cfunc->inputs.size() + cfunc->outputs.size(); + + std::vector values(arity); + std::vector codes(arity); + TVMArgsSetter setter(values.data(), codes.data()); + std::vector inputs(cfunc->inputs.size()); + std::vector outputs(cfunc->outputs.size()); + + DLContext cpu_ctx; + cpu_ctx.device_type = kDLCPU; + cpu_ctx.device_id = 0; + + auto fset_input = [&](size_t i, Value val, bool need_shape) { + const TensorValueNode* tv = val.as(); + CHECK(tv != nullptr) << "expect Tensor argument"; + if (need_shape) { + int64_t ndim = tv->data.Shape().size(); + NDArray shape_arr; + if (ndim == 0) { + shape_arr = NDArray::Empty({}, Type2TVMType(Int(64)), cpu_ctx); + } else { + shape_arr = NDArray::Empty({ndim}, Type2TVMType(Int(64)), cpu_ctx); + int64_t* data = reinterpret_cast(shape_arr->data); + for (auto j = 0; j < ndim; ++j) { + data[j] = tv->data.Shape()[j]; + } + } + inputs[i] = shape_arr; + setter(i, shape_arr); + } else { + auto arr = tv->data.CopyTo(cpu_ctx); + inputs[i] = arr; + setter(i, arr); + } + }; + + size_t arg_counter = 0; + for (size_t i = 0; i < args.size(); ++i) { + auto arg = args[i]; + auto param = func->params[i]; + int state = cfunc->shape_func_param_states[i]->value; + if (arg.as()) { + if (state & kNeedInputData) { + fset_input(arg_counter++, arg, false); + } + if (state & kNeedInputShape) { + fset_input(arg_counter++, arg, true); + } + } else { + const TupleValueNode* tuple = arg.as(); + CHECK(tuple != nullptr); + if (state & kNeedInputData) { + for (size_t i = 0; i < tuple->fields.size(); ++i) { + fset_input(arg_counter++, tuple->fields[i], false); + } + } + if (state & kNeedInputShape) { + for (size_t i = 0; i < tuple->fields.size(); ++i) { + fset_input(arg_counter++, tuple->fields[i], true); + } + } + } + } + CHECK_EQ(arg_counter, cfunc->inputs.size()) + << "Shape function input sizes mismatch"; + + auto fset_shape_output = [&](size_t i, Type val_type) { + const TensorTypeNode* rtype = val_type.as(); + CHECK(rtype != nullptr); + int64_t ndim = rtype->shape.size(); + auto arr = NDArray::Empty({ndim}, Type2TVMType(Int(64)), cpu_ctx); + outputs[i] = arr; + setter(arg_counter + i, arr); + }; + + auto ret_type = func->body->checked_type(); + size_t out_cnt = 0; + if (auto rtype = ret_type.as()) { + out_cnt = rtype->fields.size(); + for (size_t i = 0; i < out_cnt; ++i) { + fset_shape_output(i, rtype->fields[i]); + } + } else { + out_cnt = 1; + auto tt = Downcast(ret_type); + fset_shape_output(0, tt); + } + CHECK_EQ(cfunc->outputs.size(), out_cnt) + << "Shape function output sizes mismatch"; + + PackedFunc shape_func; + TVMRetValue rv; + if (const auto* f = runtime::Registry::Get("relay.backend.build")) { + tvm::runtime::Module m = (*f)(cfunc->funcs, cfunc->target); + shape_func = m.GetFunction(cfunc->func_name); + } else { + LOG(FATAL) << "relay.backend.build is not registered"; + } + shape_func.CallPacked(TVMArgs(values.data(), codes.data(), arity), &rv); + + // Get output shapes + Array out_shapes; + for (auto out_tensor : outputs) { + int64_t* shape_data = reinterpret_cast(out_tensor->data); + Shape out_shape; + for (int i = 0; i < out_tensor->shape[0]; ++i) { + out_shape.push_back(tvm::Integer(shape_data[i])); + } + out_shapes.push_back(out_shape); + } + return out_shapes; + } + + Value InvokePrimitiveOp(const Function& func, const Array& args) { auto call_node = func->body.as(); diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 7b0bcfb7d584e..efa84004aa008 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -46,41 +46,70 @@ def convert_to_list(x): x = [x] return x +def vmobj_to_list(o): + if isinstance(o, tvm.relay.backend.vmobj.TensorObject): + return [o.asnumpy().tolist()] + elif isinstance(o, tvm.relay.backend.vmobj.DatatypeObject): + result = [] + for f in o: + result.extend(vmobj_to_list(f)) + return result + elif isinstance(o, tvm.relay.backend.interpreter.TupleValue): + result = [] + for f in o.fields: + result.append(vmobj_to_list(f)) + return result + elif isinstance(o, tvm.relay.backend.interpreter.ConstructorValue): + if o.constructor.name_hint == 'cons': + tl = vmobj_to_list(o.fields[1]) + hd = vmobj_to_list(o.fields[0]) + hd.extend(tl) + return hd + elif o.constructor.name_hint == 'nil': + return [] + elif o.constructor.name_hint == 'tensor0': + return [o.fields[0].asnumpy()] + elif o.constructor.name_hint == 'tensor1': + return [o.fields[0].asnumpy()] + elif o.constructor.name_hint == 'tensor2': + return [o.fields[0].asnumpy()] + elif o.constructor.name_hint == 'tensor_nil': + return [0] + else: + pass + else: + raise RuntimeError("Unknown object type: %s" % type(o)) + def run_tvm_graph(graph_def, input_data, input_node, num_output=1, - target='llvm', out_names=None, opt_level=3): + target='llvm', out_names=None, opt_level=3, mode='graph_runtime'): """ Generic function to compile on relay and execute on tvm """ input_data = convert_to_list(input_data) input_node = convert_to_list(input_node) - layout = None if target == "cuda": layout = "NCHW" target_host = None - shape_dict = {e: i.shape for e, i in zip(input_node, input_data)} - mod, params = relay.frontend.from_tensorflow(graph_def, layout=layout, shape=shape_dict, outputs=out_names) - with relay.build_config(opt_level=opt_level): - graph, lib, params = relay.build(mod, target, target_host, params) - - ctx = tvm.context(target, 0) - from tvm.contrib import graph_runtime - m = graph_runtime.create(graph, lib, ctx) - # set inputs - for e, i in zip(input_node, input_data): - m.set_input(e, tvm.nd.array(i)) - - m.set_input(**params) - # execute - m.run() - # get outputs - assert out_names is None or num_output == len(out_names), ( - "out_names: {} num_output: {}".format(out_names, num_output)) - tvm_output_list = [m.get_output(i).asnumpy() for i in range(num_output)] - return tvm_output_list + + import pdb + # pdb.set_trace() + if mode == 'interp': + ex = relay.create_executor("debug", mod=mod, ctx=tvm.cpu(), target="llvm") + inputs = [] + for param in mod['main'].params: + inputs.append(tvm.nd.array(params[param.name_hint])) + result = ex.evaluate()(*inputs) + return vmobj_to_list(result) + else: + # get outputs + assert out_names is None or num_output == len(out_names), ( + "out_names: {} num_output: {}".format(out_names, num_output)) + tvm_output_list = [m.get_output(i).asnumpy() for i in range(num_output)] + return tvm_output_list def run_tf_graph(sess, input_data, input_node, output_node): """ Generic function to execute tensorflow """ @@ -97,7 +126,7 @@ def run_tf_graph(sess, input_data, input_node, output_node): def compare_tf_with_tvm(in_data, in_name, out_name, init_global_variables=False, - no_gpu=False, opt_level=3): + no_gpu=False, opt_level=3, mode='graph_runtime'): """Generic function to generate and compare tensorflow and TVM output""" def name_without_num(name): return name.split(':')[0] if ":" in name else name @@ -118,6 +147,7 @@ def name_without_num(name): ) tf_output = run_tf_graph(sess, in_data, in_name, out_name) + print('tf output is {}'.format(tf_output)) for device in ["llvm", "cuda"]: ctx = tvm.context(device, 0) if not ctx.exist: @@ -128,7 +158,8 @@ def name_without_num(name): tvm_output = run_tvm_graph(final_graph_def, in_data, in_node, target=device, out_names=out_name, - num_output=len(out_name), opt_level=opt_level) + num_output=len(out_name), opt_level=opt_level, mode=mode) + print('tvm output {}'.format(tvm_output)) # since the names from tensorflow and relay runs are not exactly same, # first len(tf_output) will be compared for i in range(len(tf_output)): @@ -529,6 +560,74 @@ def test_forward_squeeze(): _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5]) _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5, -1]) +def test_tensor_array_constructor(): + with tf.Graph().as_default(): + dtype = 'float32' + t = tf.constant([[1.0, 2.0], [3.0, 4.0]]) + t2 = tf.constant([[[1.0, 2.0], [3.0, 4.0]]]) + ta1 = tf.TensorArray(dtype=tf.float32, size=2, infer_shape=False, dynamic_size=False) + ta2 = ta1.write(0, t) + ta3 = ta2.write(1, t2) + out = ta3.read(0) + g = tf.get_default_graph() + compare_tf_with_tvm([], [], 'TensorArrayReadV3:0', mode='interp') + +def test_tensor_array_scatter(): + with tf.Graph().as_default(): + dtype = 'float32' + t = tf.constant([[1.0], [2.0], [3.0]]) + indices = tf.constant([2, 1, 0]) + ta1 = tf.TensorArray(dtype=tf.float32, size=3, infer_shape=False, dynamic_size=False) + ta2 = ta1.scatter(indices, t) + out0 = ta2.read(0) + out1 = ta2.read(1) + out2 = ta2.read(2) + g = tf.get_default_graph() + compare_tf_with_tvm([], [], ['TensorArrayReadV3:0'], mode='interp') + compare_tf_with_tvm([], [], ['TensorArrayReadV3_1:0'], mode='interp') + compare_tf_with_tvm([], [], ['TensorArrayReadV3_2:0'], mode='interp') + +def test_tensor_array_gather(): + with tf.Graph().as_default(): + dtype = 'float32' + t = tf.constant([[1.0], [2.0], [3.0]]) + scatter_indices = tf.constant([2, 1, 0]) + gather_indices = tf.constant([1, 2]) + ta1 = tf.TensorArray(dtype=tf.float32, size=3, infer_shape=False, dynamic_size=False) + ta2 = ta1.scatter(scatter_indices, t) + t1 = ta2.gather(gather_indices) + g = tf.get_default_graph() + compare_tf_with_tvm([], [], ['TensorArrayGatherV3:0'], mode='interp') + +def test_tensor_array_split(): + with tf.Graph().as_default(): + dtype = 'float32' + t = tf.constant([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]) + split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32) + ta1 = tf.TensorArray(dtype=tf.float32, size=4, infer_shape=False, dynamic_size=False) + ta2 = ta1.split(t, split_length) + out0 = ta2.read(0) + out1 = ta2.read(1) + out2 = ta2.read(2) + out3 = ta2.read(3) + g = tf.get_default_graph() + compare_tf_with_tvm([], [], ['TensorArrayReadV3:0'], mode='interp') + compare_tf_with_tvm([], [], ['TensorArrayReadV3_1:0'], mode='interp') + compare_tf_with_tvm([], [], ['TensorArrayReadV3_2:0'], mode='interp') + compare_tf_with_tvm([], [], ['TensorArrayReadV3_3:0'], mode='interp') + +def test_tensor_array_concat(): + with tf.Graph().as_default(): + dtype = 'float32' + t = tf.constant([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]) + split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32) + ta1 = tf.TensorArray(dtype=tf.float32, size=4, infer_shape=False, dynamic_size=False) + ta2 = ta1.split(t, split_length) + t = ta2.concat() + g = tf.get_default_graph() + print("Graph is {}".format(g.as_graph_def())) + compare_tf_with_tvm([], [], ['TensorArrayConcatV3:0'], mode='interp') + ####################################################################### # ConcatV2 # -------- diff --git a/tests/python/relay/test_adt.py b/tests/python/relay/test_adt.py index 7be7c75dfe644..295bdb9f45767 100644 --- a/tests/python/relay/test_adt.py +++ b/tests/python/relay/test_adt.py @@ -21,6 +21,8 @@ from tvm.relay.prelude import Prelude from tvm.relay.testing import add_nat_definitions, count as count_, make_nat_value, make_nat_expr +import numpy as np + mod = relay.Module() p = Prelude(mod) add_nat_definitions(p) @@ -683,6 +685,136 @@ def test_iterate(): res = intrp.evaluate(relay.Function([], expr)()) assert count(res) == 12 +def test_tensor_array_add_one(): + x = relay.var('x') + mod = relay.Module() + p = Prelude(mod) + mod["main"] = relay.Function([x], p.tensor_add_one(p.tensor1(x))) + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + x_np = np.random.uniform(size=(1,)).astype('float32') + result = ex.evaluate()(x_np) + print(result) + +def test_tensor_array_constructor(): + x = relay.var('x') + mod = relay.Module() + p = Prelude(mod) + mod["main"] = relay.Function([x], p.tensor_array(x)) + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + result = ex.evaluate()(5) + print("\n{}".format(result)) + +def test_tensor_array_read(): + mod = relay.Module() + p = Prelude(mod) + l = relay.var('l') + i = relay.var('i') + mod["main"] = relay.Function([l, i], p.tensor_array_read(p.tensor_array(l), i)) + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + result = ex.evaluate()(10, 5) + print("\n{}".format(result)) + +def vmobj_to_list(o): + if isinstance(o, tvm.relay.backend.vmobj.TensorObject): + return [o.asnumpy().tolist()] + elif isinstance(o, tvm.relay.backend.interpreter.TensorValue): + return [o] + elif isinstance(o, tvm.relay.backend.vmobj.DatatypeObject): + result = [] + for f in o: + result.extend(vmobj_to_list(f)) + return result + elif isinstance(o, tvm.relay.backend.interpreter.ConstructorValue): + if o.constructor.name_hint == 'cons': + tl = vmobj_to_list(o.fields[1]) + hd = vmobj_to_list(o.fields[0]) + hd.extend(tl) + return hd + elif o.constructor.name_hint == 'nil': + return [] + elif o.constructor.name_hint == 'tensor0': + return [o.fields[0]] + elif o.constructor.name_hint == 'tensor1': + return [o.fields[0]] + elif o.constructor.name_hint == 'tensor2': + return [o.fields[0]] + elif o.constructor.name_hint == 'tensor_nil': + return [0] + else: + import pdb + pdb.set_trace() + else: + raise RuntimeError("Unknown object type: %s" % type(o)) + +def test_tensor_array_stack(): + mod = relay.Module() + p = Prelude(mod) + l = relay.var('l') + v = relay.var('v') + init_tensor_array = p.tensor_array(relay.const(3)) + tensor_array1 = p.tensor_array_write(init_tensor_array, relay.const(0), p.tensor1(v)) + tensor_array2 = p.tensor_array_write(tensor_array1, relay.const(1), p.tensor1(v)) + tensor_array3 = p.tensor_array_write(tensor_array2, relay.const(2), p.tensor1(v)) + tensor_array4 = p.tensor_array_stack(tensor_array3) + mod["main"] = relay.Function([v], tensor_array4) + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + t = np.random.uniform(size=(1,)).astype('float32') + result = ex.evaluate()(t) + res = vmobj_to_list(result) + import pdb + # pdb.set_trace() + print("\n{}".format([x.data.shape for x in res])) + +def test_tensor_array_unstack(): + mod = relay.Module() + p = Prelude(mod) + v = relay.var('v') + mod["main"] = relay.Function([v], p.tensor_array_unstack_tensor1(v)) + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + t = np.random.uniform(size=(1,)).astype('float32') + result = ex.evaluate()(t) + res = vmobj_to_list(result) + import pdb + # pdb.set_trace() + print("t is {}\n{}".format(t, res)) + +def test_tensor_take(): + mod = relay.Module() + p = Prelude(mod) + v = relay.var('v') + lower = relay.var('lower') + upper = relay.var('upper') + + mod["main"] = relay.Function([v, lower, upper], p.tensor_take(p.tensor2(v), lower, upper)) + + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + t = np.random.uniform(size=(10, 10)).astype('float32') + result = ex.evaluate()(t, 2, 5) + res = vmobj_to_list(result) + print("t is {}\n{}".format(t, res)) + +def test_any_take(): + mod = relay.Module() + p = Prelude(mod) + v = relay.var('v', relay.ty.TensorType([relay.ty.Any(), relay.ty.Any()])) + lower = relay.var('lower', 'int32') + upper = relay.var('upper', 'int32') + + t1 = relay.op.take(v, relay.op.arange(lower, upper, dtype='int32'), axis=0) + + mod["main"] = relay.Function([v, lower, upper], t1) + for kind in ["debug"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + t = np.random.uniform(size=(10, 10)).astype('float32') + result = ex.evaluate()(t, 2, 5) + res = vmobj_to_list(result) + print("t is {}\n{}".format(t, res)) if __name__ == "__main__": test_nat_constructor() @@ -707,3 +839,9 @@ def test_iterate(): test_size() test_compose() test_iterate() + + test_tensor_array_add_one() + test_tensor_array_constructor() + test_tensor_array_read() + test_tensor_array_stack() + test_tensor_array_unstack()