From 037a55a99cdaf1b14fd5275ca42269b7b7a3b2e8 Mon Sep 17 00:00:00 2001 From: seanlatias Date: Thu, 13 Feb 2020 13:55:40 -0500 Subject: [PATCH 1/7] initial attempt for introducing struct --- python/heterocl/api.py | 7 +-- python/heterocl/compute_api.py | 27 ++++++++-- python/heterocl/dsl.py | 14 ++--- python/heterocl/nparray.py | 4 +- python/heterocl/schedule.py | 3 +- python/heterocl/tensor.py | 76 ++++++++++++++++++++++++---- python/heterocl/types.py | 25 ++++++++- python/heterocl/util.py | 5 +- tvm/HalideIR/src/ir/IR.cpp | 1 + tvm/HalideIR/src/ir/IR.h | 1 + tvm/src/codegen/llvm/codegen_llvm.cc | 9 ++++ 11 files changed, 144 insertions(+), 28 deletions(-) diff --git a/python/heterocl/api.py b/python/heterocl/api.py index f3e2151c8..b0c9351b5 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -90,11 +90,12 @@ def placeholder(shape, name=None, dtype=None): """ name = util.get_name("placeholder", name) dtype = util.get_dtype(dtype) - + tvm_dtype = types.dtype_to_str(dtype) + if shape == (): - return Scalar(tvm_api._Var(name, dtype)) + return Scalar(tvm_api._Var(name, tvm_dtype)) tensor = Tensor(shape, dtype, name) - tensor.tensor = tvm_api._Placeholder(tensor.buf.shape, dtype, name) + tensor.tensor = tvm_api._Placeholder(tensor.buf.shape, tvm_dtype, name) # placeholder is also a stage stage = Stage(name) diff --git a/python/heterocl/compute_api.py b/python/heterocl/compute_api.py index 304642d39..ce68859e3 100644 --- a/python/heterocl/compute_api.py +++ b/python/heterocl/compute_api.py @@ -5,8 +5,9 @@ from collections import OrderedDict from .tvm import expr as _expr, stmt as _stmt, make as _make from .tvm.api import _IterVar, min_value -from .util import get_index, get_name, get_type, get_dtype, make_for, CastRemover +from .util import get_index, get_name, get_type, get_tvm_dtype, make_for, CastRemover from .tensor import Scalar, Tensor, TensorSlice +from .types import Struct, dtype_to_str from .schedule import Stage from .debug import APIError from .dsl import if_, for_ @@ -117,7 +118,7 @@ def compute_body(name, if not return_tensor: stage.input_stages.add(tensor.last_update) else: - tensor = Tensor(shape, stage._dtype, name, stage._buf) + tensor = Tensor(shape, stage._hcl_dtype, name, stage._buf) buffer_var = tensor._buf.data dtype = tensor.dtype shape = tensor.shape @@ -137,6 +138,24 @@ def compute_body(name, stmt = stage.pop_stmt() stmt = ReplaceReturn(buffer_var, dtype, index).mutate(stmt) stmt = make_for(indices, stmt, 0) + elif isinstance(ret, (tuple, list)): + indices = lambda_ivs + index, _, _ = get_index(shape, indices, 0) + hcl_dtype = tensor.hcl_dtype + if not isinstance(hcl_dtype, Struct): + raise TensorError("Cannot assign a tuple/list to a non-struct-type tensor") + start = 0 + end = 0 + for sdtype, expr in zip(hcl_dtype.dtype_dict.values(), ret): + end = start + sdtype.bits + sdtype = dtype_to_str(sdtype) + load = _make.Load(dtype, buffer_var, index) + expr = _make.SetSlice(load, expr, end, start) + stage.emit(_make.Store(buffer_var, + _make.Cast(dtype, expr), + index)) + start = end + stmt = make_for(indices, stage.pop_stmt(), 0) elif isinstance(ret, (TensorSlice, Scalar, _expr.Expr, numbers.Number)): indices = lambda_ivs index, _, _ = get_index(shape, indices, 0) @@ -539,7 +558,7 @@ def unpack_A(A): # to do so, we will need the name name_ = name if Stage.get_len() == 0 \ else Stage.get_current().name_with_prefix + "." + name - dtype = get_dtype(dtype, name_) + dtype = get_tvm_dtype(dtype, name_) ret = get_type(dtype) factor = tensor.type.bits // ret[1] bitwidth = ret[1] @@ -612,7 +631,7 @@ def pack(tensor, axis=0, factor=None, name=None, dtype=None): # to do so, we will need the name name_ = name if Stage.get_len() == 0 \ else Stage.get_current().name_with_prefix + "." + name - dtype = get_dtype(dtype, name_) + dtype = get_tvm_dtype(dtype, name_) ret = get_type(dtype) factor = ret[1] // tensor.type.bits bitwidth = tensor.type.bits diff --git a/python/heterocl/dsl.py b/python/heterocl/dsl.py index b226cb0ab..2916dee97 100644 --- a/python/heterocl/dsl.py +++ b/python/heterocl/dsl.py @@ -399,19 +399,19 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n if dtypes is None: dtypes = [] for name_ in new_names: - dtypes.append(util.get_dtype(None, name_)) + dtypes.append(util.get_tvm_dtype(None, name_)) elif isinstance(dtypes, list): if len(dtypes) != nargs: raise APIError("The number of data types does not match the of arguments") for (name_, dtype_) in zip(new_names, dtypes): - dtypes.append(util.get_dtype(dtype_, name_)) + dtypes.append(util.get_tvm_dtype(dtype_, name_)) dtypes = dtypes[int(len(dtypes)/2):] else: - dtype = util.get_dtype(dtypes) + dtype = util.get_tvm_dtype(dtypes) dtypes = [] for name_ in new_names: - dtypes.append(util.get_dtype(dtype, name_)) - ret_dtype = util.get_dtype(ret_dtype, s.name_with_prefix) + dtypes.append(util.get_tvm_dtype(dtype, name_)) + ret_dtype = util.get_tvm_dtype(ret_dtype, s.name_with_prefix) # prepare inputs for IR generation inputs = [] inputs_tvm = [] @@ -441,7 +441,7 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n ret_void = _make.UIntImm("uint1", 0) if s.has_return else _make.UIntImm("uint1", 1) body = s.pop_stmt() s.stmt_stack.append([]) - s.emit(_make.KernelDef(inputs_tvm, arg_shapes, arg_dtypes, + s.emit(_make.KernelDef(inputs_tvm, arg_shapes, arg_dtypes, body, ret_void, ret_dtype, name, [])) for name_, i in zip(names, inputs): s.var_dict[name_] = i @@ -499,6 +499,6 @@ def compute_out(A, x): if not Stage.get_len(): raise DSLError("Imperative DSL must be used with other compute APIs") stage = Stage.get_current() - dtype = util.get_dtype(stage.ret_dtype) + dtype = util.get_tvm_dtype(stage.ret_dtype) stage.emit(_make.Return(_make.Cast(dtype, val))) stage.has_return = True diff --git a/python/heterocl/nparray.py b/python/heterocl/nparray.py index be7f57dbf..36a4fb696 100644 --- a/python/heterocl/nparray.py +++ b/python/heterocl/nparray.py @@ -2,7 +2,7 @@ #pylint: disable=missing-docstring import numpy as np from .tvm.ndarray import array, cpu -from .util import get_dtype +from .util import get_tvm_dtype from . import types def cast_np(np_in, dtype): @@ -79,7 +79,7 @@ def asarray(arr, dtype=None, ctx=cpu(0)): np_A = numpy.zeros(10) hcl_A = np_A.asarray() """ - dtype = get_dtype(dtype) + dtype = get_tvm_dtype(dtype) return array(arr, dtype, ctx) def pack_np(np_in, dtype_in, dtype_out): diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index 2cf6eeb6e..da98f0a27 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -324,7 +324,8 @@ def __init__(self, name=None, dtype=None, shape=()): else Stage.get_current().name_with_prefix + "." + self.name # Private attributes for building a stage self._op = None - self._dtype = util.get_dtype(dtype, self.name_with_prefix) + self._hcl_dtype = util.get_dtype(dtype, self.name_with_prefix) + self._dtype = util.get_tvm_dtype(dtype, self.name_with_prefix) self._buf = tvm_api.decl_buffer(shape, self._dtype, self.name) self._shape = self._buf.shape diff --git a/python/heterocl/tensor.py b/python/heterocl/tensor.py index f95d48811..f437e7a76 100644 --- a/python/heterocl/tensor.py +++ b/python/heterocl/tensor.py @@ -113,11 +113,12 @@ class TensorSlice(NodeGeneric, _expr.ExprOp): # not allowed: A[5:7] """ - def __init__(self, tensor, indices): + def __init__(self, tensor, indices, dtype=None): if not isinstance(indices, tuple): indices = (indices,) self.tensor = tensor self.indices = indices + self._dtype = dtype if dtype is not None else self.tensor.dtype def __getitem__(self, indices): if not isinstance(indices, tuple): @@ -134,10 +135,15 @@ def __setitem__(self, indices, expr): builder = Stage.get_current() if bit is None: builder.emit(_make.Store(self.tensor.buf.data, - _make.Cast(self.tensor.dtype, expr), + _make.Cast(self._dtype, expr), index)) elif isinstance(bit, slice): + print(self._dtype) load = _make.Load(self.tensor.dtype, self.tensor.buf.data, index) + # special handle for struct + if util.get_type(self._dtype) != "uint": + ty = "uint" + str(util.get_type(self._dtype)[1]) + expr = _make.Call(ty, "bitcast", [expr], _expr.Call.PureIntrinsic, None, 0) expr = _make.SetSlice(load, expr, bit.start, bit.stop) builder.emit(_make.Store(self.tensor.buf.data, _make.Cast(self.tensor.dtype, expr), @@ -146,9 +152,51 @@ def __setitem__(self, indices, expr): load = _make.Load(self.tensor.dtype, self.tensor.buf.data, index) expr = _make.SetBit(load, expr, bit) builder.emit(_make.Store(self.tensor.buf.data, - _make.Cast(self.tensor.dtype, expr), + _make.Cast(self._dtype, expr), index)) + def __getattr__(self, key): + hcl_dtype = self.tensor.hcl_dtype + if not isinstance(hcl_dtype, types.Struct): + raise TensorError("Cannot access attribute if the data type is not struct") + try: + start = 0 + end = 0 + dtype = None + for dkey, dval in hcl_dtype.dtype_dict.items(): + if dkey == key: + end = start + dval.bits + dtype = types.dtype_to_str(dval) + break + else: + start += dval.bits + indices = (slice(end, start),) + return TensorSlice(self.tensor, self.indices + indices, dtype) + except KeyError: + raise DTypeError("Field " + key + " is not in struct " + str(hcl_dtype)) + + def __setattr__(self, key, expr): + if key in ("tensor", "indices", "_dtype"): + super().__setattr__(key, expr) + else: + hcl_dtype = self.tensor.hcl_dtype + if not isinstance(hcl_dtype, types.Struct): + raise TensorError("Cannot access attribute if the data type is not struct") + try: + start = 0 + end = 0 + for dkey, dval in hcl_dtype.dtype_dict.items(): + if dkey == key: + end = start + dval.bits + self._dtype = types.dtype_to_str(dval) + break + else: + start += dval.bits + indices = (slice(end, start),) + self.__setitem__(indices, expr) + except KeyError: + raise DTypeError("Field " + key + " is not in struct " + str(hcl_dtype)) + @property def dtype(self): return self.tensor.dtype @@ -158,12 +206,21 @@ def asnode(self): raise TensorError("Accessing a slice of tensor is not allowed") index, bit, _ = util.get_index(self.tensor.shape, self.indices, 0) if bit is None: - return _make.Load(self.tensor.dtype, self.tensor.buf.data, index) + return _make.Load(self._dtype, self.tensor.buf.data, index) elif isinstance(bit, slice): - return _make.GetSlice(_make.Load(self.tensor.dtype, self.tensor.buf.data, index), + load = _make.GetSlice(_make.Load(self.tensor.dtype, self.tensor.buf.data, index), bit.start, bit.stop) - return _make.GetBit(_make.Load(self.tensor.dtype, self.tensor.buf.data, index), bit) + if self.tensor.dtype != self._dtype: + bw_from = types.get_bitwidth(self.tensor.dtype) + bw_to = types.get_bitwidth(self._dtype) + if bw_from != bw_to: + ty = util.get_type(self.tensor.dtype)[0] + str(bw_to) + load = _make.Cast(ty, load) + return _make.Call(self._dtype, "bitcast", [load], _expr.Call.PureIntrinsic, None, 0) + else: + return load + return _make.GetBit(_make.Load(self._dtype, self.tensor.buf.data, index), bit) class Tensor(NodeGeneric, _expr.ExprOp): """A HeteroCL tensor. @@ -230,14 +287,15 @@ class Tensor(NodeGeneric, _expr.ExprOp): def __init__(self, shape, dtype="int32", name="tensor", buf=None): self._tensor = None self._buf = buf - self.dtype = dtype + self.hcl_dtype = dtype + self.dtype = types.dtype_to_str(dtype) self.shape = shape self.name = name self.var_dict = {} self.first_update = None self.last_update = None if buf is None: - self._buf = decl_buffer(shape, dtype, name) + self._buf = decl_buffer(shape, self.dtype, name) def __repr__(self): return "Tensor('" + self.name + "', " + str(self.shape) + ", " + str(self.dtype) + ")" @@ -291,7 +349,7 @@ def buf(self): @property def type(self): - return types.dtype_to_hcl(self.dtype) + return self.hcl_dtype @property def op(self): diff --git a/python/heterocl/types.py b/python/heterocl/types.py index 5f22a87c6..9f73d8b95 100644 --- a/python/heterocl/types.py +++ b/python/heterocl/types.py @@ -1,6 +1,7 @@ """Define HeteroCL data types""" #pylint: disable=too-few-public-methods, too-many-return-statements import numbers +from collections import OrderedDict from .debug import DTypeError class Type(object): @@ -48,6 +49,27 @@ class UFixed(Type): def __repr__(self): return "UFixed(" + str(self.bits) + ", " + str(self.fracs) + ")" +class Struct(Type): + """A C-like struct""" + def __init__(self, dtype_dict): + self.dtype_dict = OrderedDict(dtype_dict) + self.bits = 0 + for dtype in dtype_dict.values(): + self.bits += dtype.bits + Type.__init__(self, self.bits, 0) + + def __repr__(self): + return "Struct(" + str(self.dtype_dict) + ")" + + def __getattr__(self, key): + try: + return self.dtype_dict[key] + except KeyError: + raise DTypeError(key + " is not in struct") + + def __getitem__(self, key): + return self.__getattr__(key) + def dtype_to_str(dtype): """Convert a data type to string format. @@ -66,7 +88,8 @@ def dtype_to_str(dtype): if isinstance(dtype, Type): if isinstance(dtype, Int): return "int" + str(dtype.bits) - elif isinstance(dtype, UInt): + # struct is treated as uint + elif isinstance(dtype, (UInt, Struct)): return "uint" + str(dtype.bits) elif isinstance(dtype, Fixed): bits = dtype.bits diff --git a/python/heterocl/util.py b/python/heterocl/util.py index 704b774cb..5d9e9b5a8 100644 --- a/python/heterocl/util.py +++ b/python/heterocl/util.py @@ -75,7 +75,10 @@ def get_dtype(dtype, name=None): dtype_ = Scheme.current.dtype_dict.get(name) dtype = dtype if dtype_ is None else dtype_ dtype = config.init_dtype if dtype is None else dtype - return types.dtype_to_str(dtype) + return dtype + +def get_tvm_dtype(dtype, name=None): + return types.dtype_to_str(get_dtype(dtype, name)) def true(): return _make.UIntImm("uint1", 1) diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index a604b6fd2..33ae584ce 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -963,6 +963,7 @@ Call::ConstString Call::shift_left = "shift_left"; Call::ConstString Call::shift_right = "shift_right"; Call::ConstString Call::abs = "abs"; Call::ConstString Call::absd = "absd"; +Call::ConstString Call::bitcast = "bitcast"; Call::ConstString Call::lerp = "lerp"; Call::ConstString Call::random = "random"; Call::ConstString Call::popcount = "popcount"; diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index e8a8835bf..232a8364d 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -707,6 +707,7 @@ struct Call : public ExprNode { shift_right, abs, absd, + bitcast, rewrite_buffer, random, lerp, diff --git a/tvm/src/codegen/llvm/codegen_llvm.cc b/tvm/src/codegen/llvm/codegen_llvm.cc index a5c38154e..1abca591e 100644 --- a/tvm/src/codegen/llvm/codegen_llvm.cc +++ b/tvm/src/codegen/llvm/codegen_llvm.cc @@ -690,6 +690,15 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { } else { return builder_->CreateLShr(a, b_new); } + } else if (op->is_intrinsic(Call::bitcast)) { + llvm::Value* v = MakeValue(op->args[0]); + Type tv = op->args[0].type(); + Type to = op->type; + if (tv.bits() != to.bits()) { + // To be implemented + } else { + return builder_->CreateBitCast(v, LLVMType(to)); + } } else if (op->is_intrinsic(intrinsic::tvm_storage_sync)) { return CreateStorageSync(op); } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { From 06bf94aba89cf4a5df29cccba27fd84933909da3 Mon Sep 17 00:00:00 2001 From: seanlatias Date: Thu, 13 Feb 2020 15:00:13 -0500 Subject: [PATCH 2/7] add a simple test for struct; more are coming ... --- python/heterocl/compute_api.py | 4 ++++ python/heterocl/tensor.py | 3 ++- tests/test_dtype.py | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/python/heterocl/compute_api.py b/python/heterocl/compute_api.py index ce68859e3..05539e26f 100644 --- a/python/heterocl/compute_api.py +++ b/python/heterocl/compute_api.py @@ -150,6 +150,10 @@ def compute_body(name, end = start + sdtype.bits sdtype = dtype_to_str(sdtype) load = _make.Load(dtype, buffer_var, index) + expr = _make.Cast(sdtype, expr) + if get_type(sdtype) != "uint": + ty = "uint" + str(get_type(sdtype)[1]) + expr = _make.Call(ty, "bitcast", [expr], _expr.Call.PureIntrinsic, None, 0) expr = _make.SetSlice(load, expr, end, start) stage.emit(_make.Store(buffer_var, _make.Cast(dtype, expr), diff --git a/python/heterocl/tensor.py b/python/heterocl/tensor.py index f437e7a76..de9a4def8 100644 --- a/python/heterocl/tensor.py +++ b/python/heterocl/tensor.py @@ -141,7 +141,8 @@ def __setitem__(self, indices, expr): print(self._dtype) load = _make.Load(self.tensor.dtype, self.tensor.buf.data, index) # special handle for struct - if util.get_type(self._dtype) != "uint": + if (isinstance(self.tensor.type, types.Struct) + and util.get_type(self._dtype) != "uint"): ty = "uint" + str(util.get_type(self._dtype)[1]) expr = _make.Call(ty, "bitcast", [expr], _expr.Call.PureIntrinsic, None, 0) expr = _make.SetSlice(load, expr, bit.start, bit.stop) diff --git a/tests/test_dtype.py b/tests/test_dtype.py index 1d9d00279..b441269f5 100644 --- a/tests/test_dtype.py +++ b/tests/test_dtype.py @@ -205,3 +205,38 @@ def kernel(A): f(hcl_A, hcl_C) assert np.array_equal(np_A, hcl_C.asnumpy()) + +def test_dtype_struct(): + hcl.init() + A = hcl.placeholder((100,), dtype=hcl.Int(8)) + B = hcl.placeholder((100,), dtype=hcl.Fixed(13, 11)) + C = hcl.placeholder((100,), dtype=hcl.Float()) + + def kernel(A, B, C): + stype = hcl.Struct({"fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float()}) + D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) + E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) + F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) + G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) + return E, F, G + + s = hcl.create_schedule([A, B, C], kernel) + f = hcl.build(s) + np_A = np.random.randint(0, 500, size=100) - 250 + np_B = np.random.rand(100) - 0.5 + np_C = np.random.rand(100) - 0.5 + np_E = np.zeros(100) + np_F = np.zeros(100) + np_G = np.zeros(100) + hcl_A = hcl.asarray(np_A, dtype=hcl.Int(8)) + hcl_B = hcl.asarray(np_B, dtype=hcl.Fixed(13, 11)) + hcl_C = hcl.asarray(np_C, dtype=hcl.Float()) + hcl_E = hcl.asarray(np_E, dtype=hcl.Int(8)) + hcl_F = hcl.asarray(np_F, dtype=hcl.Fixed(13, 11)) + hcl_G = hcl.asarray(np_G, dtype=hcl.Float()) + f(hcl_A, hcl_B, hcl_C, hcl_E, hcl_F, hcl_G) + + assert np.allclose(hcl_A.asnumpy(), hcl_E.asnumpy()) + assert np.allclose(hcl_B.asnumpy(), hcl_F.asnumpy()) + assert np.allclose(hcl_C.asnumpy(), hcl_G.asnumpy()) + From 5f7cd56b16c206226b4c6ff2b066073a93cb8a6c Mon Sep 17 00:00:00 2001 From: seanlatias Date: Thu, 13 Feb 2020 15:35:47 -0500 Subject: [PATCH 3/7] add complex test --- python/heterocl/tensor.py | 1 - tests/test_dtype.py | 52 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/python/heterocl/tensor.py b/python/heterocl/tensor.py index de9a4def8..7815e4dcb 100644 --- a/python/heterocl/tensor.py +++ b/python/heterocl/tensor.py @@ -138,7 +138,6 @@ def __setitem__(self, indices, expr): _make.Cast(self._dtype, expr), index)) elif isinstance(bit, slice): - print(self._dtype) load = _make.Load(self.tensor.dtype, self.tensor.buf.data, index) # special handle for struct if (isinstance(self.tensor.type, types.Struct) diff --git a/tests/test_dtype.py b/tests/test_dtype.py index b441269f5..7276e8a3d 100644 --- a/tests/test_dtype.py +++ b/tests/test_dtype.py @@ -240,3 +240,55 @@ def kernel(A, B, C): assert np.allclose(hcl_B.asnumpy(), hcl_F.asnumpy()) assert np.allclose(hcl_C.asnumpy(), hcl_G.asnumpy()) +def test_dtye_strcut_complex(): + hcl.init() + A = hcl.placeholder((100,)) + B = hcl.placeholder((100,)) + C = hcl.placeholder((100,)) + O = hcl.placeholder((100, 6)) + + def kernel(A, B, C, O): + dtype_xyz = hcl.Struct({"x": hcl.Int(), "y": hcl.Int(), "z": hcl.Int()}) + dtype_out = hcl.Struct({"v0": hcl.Int(), + "v1": hcl.Int(), + "v2": hcl.Int(), + "v3": hcl.Int(), + "v4": hcl.Int(), + "v5": hcl.Int()}) + + D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=dtype_xyz) + E = hcl.compute(A.shape, lambda x: (D[x].x * D[x].x, + D[x].y * D[x].y, + D[x].z * D[x].z, + D[x].x * D[x].y, + D[x].y * D[x].z, + D[x].x * D[x].z), dtype=dtype_out) + with hcl.Stage(): + with hcl.for_(0, 100) as i: + for j in range(0, 6): + O[i][j] = E[i].__getattr__("v" + str(j)) + + s = hcl.create_schedule([A, B, C, O], kernel) + f = hcl.build(s) + + np_A = np.random.randint(10, size=100) + np_B = np.random.randint(10, size=100) + np_C = np.random.randint(10, size=100) + np_O = np.zeros((100, 6)) + + np_G = np.zeros((100, 6)).astype("int") + for i in range(0, 100): + np_G[i][0] = np_A[i] * np_A[i] + np_G[i][1] = np_B[i] * np_B[i] + np_G[i][2] = np_C[i] * np_C[i] + np_G[i][3] = np_A[i] * np_B[i] + np_G[i][4] = np_B[i] * np_C[i] + np_G[i][5] = np_A[i] * np_C[i] + + hcl_A = hcl.asarray(np_A) + hcl_B = hcl.asarray(np_B) + hcl_C = hcl.asarray(np_C) + hcl_O = hcl.asarray(np_O) + f(hcl_A, hcl_B, hcl_C, hcl_O) + + assert np.array_equal(hcl_O.asnumpy(), np_G) From c9ad7e4ba7769ac2effc75eb69c2bb1afeb5cbbc Mon Sep 17 00:00:00 2001 From: seanlatias Date: Thu, 13 Feb 2020 16:01:52 -0500 Subject: [PATCH 4/7] errors from nn.py; need to look into that ... --- hlib/python/hlib/op/nn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hlib/python/hlib/op/nn.py b/hlib/python/hlib/op/nn.py index da5393196..25c71f226 100644 --- a/hlib/python/hlib/op/nn.py +++ b/hlib/python/hlib/op/nn.py @@ -173,6 +173,7 @@ def conv2d( d = [] for i in range(len(padding)): p.append(tvm_to_primitive(padding[i])) + for i in range(len(strides)): s.append(tvm_to_primitive(strides[i])) d.append(tvm_to_primitive(dilation[i])) strides = s From c46550583f102b91a8d4a886af452bbe8f5dc8a1 Mon Sep 17 00:00:00 2001 From: seanlatias Date: Fri, 14 Feb 2020 14:56:48 -0500 Subject: [PATCH 5/7] add C codegen for struct --- tvm/src/codegen/codegen_c.cc | 27 +++++++++++++++++++++++++++ tvm/src/codegen/hlsc/codegen_vhls.cc | 4 +++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 006edf933..b6b222c0e 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -663,6 +663,33 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) PrintBinaryIntrinsitc(op, " << ", os, this); } else if (op->is_intrinsic(Call::shift_right)) { PrintBinaryIntrinsitc(op, " >> ", os, this); + } else if (op->is_intrinsic(Call::bitcast)) { + this->PrintIndent(); + std::string conv_name = GetUniqueName("_converter"); + int bits = op->args[0].type().bits(); + if (op->args[0].type().code() == Type::Float) { + if (bits == 32) { + stream << "union { float from; uint32_t to;} " << conv_name << ";\n"; + } else if (bits == 64) { + stream << "union { double from; uint64_t to;} " << conv_name << ";\n"; + } else { + LOG(FATAL) << "Unsupported bitwidth" + << op->args[0].type().bits() << "for floating point"; + } + this->PrintIndent(); + stream << conv_name << ".from = "; + this->PrintExpr(op->args[0], stream); + stream << ";\n"; + os << conv_name << ".to"; + } else { + this->PrintType(op->type, stream); + stream << " " << conv_name << ";\n"; + this->PrintIndent(); + stream << conv_name << "(" << bits-1 << ", 0) = "; + this->PrintExpr(op->args[0], stream); + stream << ";\n"; + os << conv_name; + } } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { os << "("; PrintExpr(op->args[0], os); diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index f944bef83..ac4d9e899 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -98,6 +98,7 @@ void CodeGenVivadoHLS::AddFunction(LoweredFunc f, this->decl_stream << "#include \n"; this->decl_stream << "#include \n"; this->decl_stream << "#include \n\n"; + this->decl_stream << "#include \n\n"; CodeGenHLSC::AddFunction(f, map_arg_type); if (soda_header_.is_open()) soda_header_.close(); @@ -137,10 +138,11 @@ void CodeGenVivadoHLS::VisitStmt_(const Store* op) { Type t = op->value.type(); Expr new_index_left = ir::Simplify(ss->index_left - 1); std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + std::string rhs = PrintExpr(ss->value); PrintIndent(); this->stream << ref << "(" << PrintExpr(new_index_left) << ", " << PrintExpr(ss->index_right) - << ") = " << PrintExpr(ss->value) << ";\n"; + << ") = " << rhs << ";\n"; } else if (const SetBit* sb = op->value.as()) { Type t = op->value.type(); std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); From e9bf89ed44295975a5004f62bed8b78cac2d2e19 Mon Sep 17 00:00:00 2001 From: seanlatias Date: Fri, 14 Feb 2020 15:20:55 -0500 Subject: [PATCH 6/7] the previous C codegen was not complete, the other conversion direction was not taken care of --- tvm/src/codegen/codegen_c.cc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index b6b222c0e..d28fac208 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -667,15 +667,19 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) this->PrintIndent(); std::string conv_name = GetUniqueName("_converter"); int bits = op->args[0].type().bits(); - if (op->args[0].type().code() == Type::Float) { - if (bits == 32) { - stream << "union { float from; uint32_t to;} " << conv_name << ";\n"; - } else if (bits == 64) { - stream << "union { double from; uint64_t to;} " << conv_name << ";\n"; - } else { - LOG(FATAL) << "Unsupported bitwidth" - << op->args[0].type().bits() << "for floating point"; - } + if (op->args[0].type().code() == Type::Float || + op->type.code() == Type::Float) { + CHECK(bits == 32 || bits == 64); + std::string ty_from = bits == 32 ? "float" : "double"; + std::string ty_to = bits == 32 ? "uint32_t" : "uint64_t"; + bool from_float = op->args[0].type().code() == Type::Float; + stream << "union { "; + if (from_float) stream << ty_from; + else stream << ty_to; + stream << " from; "; + if (from_float) stream << ty_to; + else stream << ty_from; + stream << " to;} " << conv_name << ";\n"; this->PrintIndent(); stream << conv_name << ".from = "; this->PrintExpr(op->args[0], stream); @@ -687,6 +691,7 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) this->PrintIndent(); stream << conv_name << "(" << bits-1 << ", 0) = "; this->PrintExpr(op->args[0], stream); + stream << "(" << bits-1 << ", 0)"; stream << ";\n"; os << conv_name; } From a032667706d834695b3418fe1423ea8c993871d8 Mon Sep 17 00:00:00 2001 From: seanlatias Date: Fri, 14 Feb 2020 15:37:50 -0500 Subject: [PATCH 7/7] clean up the code --- python/heterocl/tensor.py | 76 +++++++++++++++------------- python/heterocl/types.py | 5 +- tvm/src/codegen/llvm/codegen_llvm.cc | 7 +-- 3 files changed, 48 insertions(+), 40 deletions(-) diff --git a/python/heterocl/tensor.py b/python/heterocl/tensor.py index 7815e4dcb..10387a044 100644 --- a/python/heterocl/tensor.py +++ b/python/heterocl/tensor.py @@ -139,11 +139,13 @@ def __setitem__(self, indices, expr): index)) elif isinstance(bit, slice): load = _make.Load(self.tensor.dtype, self.tensor.buf.data, index) - # special handle for struct + # special handle for struct: we need to make sure the bitwidths + # are the same before and after bitcast if (isinstance(self.tensor.type, types.Struct) and util.get_type(self._dtype) != "uint"): ty = "uint" + str(util.get_type(self._dtype)[1]) - expr = _make.Call(ty, "bitcast", [expr], _expr.Call.PureIntrinsic, None, 0) + expr = _make.Call(ty, "bitcast", + [expr], _expr.Call.PureIntrinsic, None, 0) expr = _make.SetSlice(load, expr, bit.start, bit.stop) builder.emit(_make.Store(self.tensor.buf.data, _make.Cast(self.tensor.dtype, expr), @@ -158,44 +160,46 @@ def __setitem__(self, indices, expr): def __getattr__(self, key): hcl_dtype = self.tensor.hcl_dtype if not isinstance(hcl_dtype, types.Struct): - raise TensorError("Cannot access attribute if the data type is not struct") - try: + raise TensorError( + "Cannot access attribute if type is not struct") + start = 0 + end = 0 + dtype = None + for dkey, dval in hcl_dtype.dtype_dict.items(): + if dkey == key: + end = start + dval.bits + dtype = types.dtype_to_str(dval) + break + else: + start += dval.bits + if dtype is None: + raise DTypeError("Field " + key + + " is not in struct " + str(hcl_dtype)) + indices = (slice(end, start),) + return TensorSlice(self.tensor, self.indices + indices, dtype) + + def __setattr__(self, key, expr): + if key in ("tensor", "indices", "_dtype"): + super().__setattr__(key, expr) + else: + hcl_dtype = self.tensor.hcl_dtype + if not isinstance(hcl_dtype, types.Struct): + raise TensorError( + "Cannot access attribute if type is not struct") start = 0 end = 0 - dtype = None for dkey, dval in hcl_dtype.dtype_dict.items(): if dkey == key: end = start + dval.bits - dtype = types.dtype_to_str(dval) + self._dtype = types.dtype_to_str(dval) break else: start += dval.bits + if start == end: + raise DTypeError("Field " + key + + " is not in struct " + str(hcl_dtype)) indices = (slice(end, start),) - return TensorSlice(self.tensor, self.indices + indices, dtype) - except KeyError: - raise DTypeError("Field " + key + " is not in struct " + str(hcl_dtype)) - - def __setattr__(self, key, expr): - if key in ("tensor", "indices", "_dtype"): - super().__setattr__(key, expr) - else: - hcl_dtype = self.tensor.hcl_dtype - if not isinstance(hcl_dtype, types.Struct): - raise TensorError("Cannot access attribute if the data type is not struct") - try: - start = 0 - end = 0 - for dkey, dval in hcl_dtype.dtype_dict.items(): - if dkey == key: - end = start + dval.bits - self._dtype = types.dtype_to_str(dval) - break - else: - start += dval.bits - indices = (slice(end, start),) - self.__setitem__(indices, expr) - except KeyError: - raise DTypeError("Field " + key + " is not in struct " + str(hcl_dtype)) + self.__setitem__(indices, expr) @property def dtype(self): @@ -208,7 +212,8 @@ def asnode(self): if bit is None: return _make.Load(self._dtype, self.tensor.buf.data, index) elif isinstance(bit, slice): - load = _make.GetSlice(_make.Load(self.tensor.dtype, self.tensor.buf.data, index), + load = _make.GetSlice(_make.Load(self.tensor.dtype, + self.tensor.buf.data, index), bit.start, bit.stop) if self.tensor.dtype != self._dtype: @@ -217,10 +222,13 @@ def asnode(self): if bw_from != bw_to: ty = util.get_type(self.tensor.dtype)[0] + str(bw_to) load = _make.Cast(ty, load) - return _make.Call(self._dtype, "bitcast", [load], _expr.Call.PureIntrinsic, None, 0) + return _make.Call(self._dtype, "bitcast", + [load], _expr.Call.PureIntrinsic, None, 0) else: return load - return _make.GetBit(_make.Load(self._dtype, self.tensor.buf.data, index), bit) + return _make.GetBit(_make.Load(self._dtype, + self.tensor.buf.data, + index), bit) class Tensor(NodeGeneric, _expr.ExprOp): """A HeteroCL tensor. diff --git a/python/heterocl/types.py b/python/heterocl/types.py index 9f73d8b95..44e3fd9a8 100644 --- a/python/heterocl/types.py +++ b/python/heterocl/types.py @@ -50,7 +50,10 @@ def __repr__(self): return "UFixed(" + str(self.bits) + ", " + str(self.fracs) + ")" class Struct(Type): - """A C-like struct""" + """A C-like struct + + The struct members are defined with a Python dictionary + """ def __init__(self, dtype_dict): self.dtype_dict = OrderedDict(dtype_dict) self.bits = 0 diff --git a/tvm/src/codegen/llvm/codegen_llvm.cc b/tvm/src/codegen/llvm/codegen_llvm.cc index 1abca591e..6c8d257e7 100644 --- a/tvm/src/codegen/llvm/codegen_llvm.cc +++ b/tvm/src/codegen/llvm/codegen_llvm.cc @@ -694,11 +694,8 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { llvm::Value* v = MakeValue(op->args[0]); Type tv = op->args[0].type(); Type to = op->type; - if (tv.bits() != to.bits()) { - // To be implemented - } else { - return builder_->CreateBitCast(v, LLVMType(to)); - } + CHECK(tv.bits() == to.bits()); + return builder_->CreateBitCast(v, LLVMType(to)); } else if (op->is_intrinsic(intrinsic::tvm_storage_sync)) { return CreateStorageSync(op); } else if (op->is_intrinsic(intrinsic::tvm_address_of)) {