diff --git a/.circleci/config.yml b/.circleci/config.yml index 4fa87539c..ba119097d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,6 @@ test: &test key: v1.03-libhcl- - run: make build-python - run: pip install --user pytest - - run: pip install --user future - run: python -m pytest tests - run: pip install --user mxnet - run: python -m pytest samples diff --git a/.gitignore b/.gitignore index 65f3dfcf8..a70651d15 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ tags docs/source/samples docs/source/tutorials soda_* +*.cpp +*.h out # Downloaded files diff --git a/HISTORY b/HISTORY deleted file mode 100644 index e08d564bc..000000000 --- a/HISTORY +++ /dev/null @@ -1,11 +0,0 @@ -### 2019-12-09 - * fixed issue of zc706 simulation - * remove kernel-name variable allocation before KernelDef - * change multi-dimension array access to row-major single-dimension access - * create local buffer for each on-device variable - * updated the `KernelUpdater` class (using position index instead of name) - * added `stream_arg_pos` map in `CodeGenC` to facilitate codegen with streaming - * fixed test cases - * changed tvm `build` function to support legacy string type target - * fixed opencl aocl data type mismatching issue - * fixed kernel def data type conversion issue diff --git a/Makefile b/Makefile index 9508b9171..88c653d77 100644 --- a/Makefile +++ b/Makefile @@ -12,15 +12,15 @@ build-tvm: build-pkgs build-hcl: build-tvm cd python; \ - python setup.py develop --user; \ + python setup.py install --user; \ cd ../hlib/python; \ - python setup.py develop --user; + python setup.py install --user; build-python: cd python; \ - python setup.py develop --user; \ + python setup.py install --user; \ cd ../hlib/python; \ - python setup.py develop --user; + python setup.py install --user; clean: rm -rf build diff --git a/Makefile.config b/Makefile.config index 60d1cfd3e..2060d201c 100644 --- a/Makefile.config +++ b/Makefile.config @@ -12,9 +12,6 @@ CMAKE_OK = no # set whether to use vivado hls runtime USE_VIVADO_HLS = 1 -# set whether to use sdaccel opencl runtime -USE_SDACCEL_HLS = 1 - # Specify current directory level with respect to CLAY_ROOT ifndef LEVEL LEVEL := . diff --git a/hlib/python/hlib/nn.py b/hlib/python/hlib/nn.py index 8f1c4d0e8..c8fa146a8 100644 --- a/hlib/python/hlib/nn.py +++ b/hlib/python/hlib/nn.py @@ -32,17 +32,6 @@ def _pad(*indices): return data[tuple(index_tuple)] return hcl.compute(out_shape, _pad, name='pad') -def conv2d_nchw_imp(Input, Filter, Output, stride=[1,1], padding=[[0,0],[0,0]]): - with hcl.for_(0,Output.shape[0]) as n: - with hcl.for_(0,Output.shape[1]) as c: - with hcl.for_(0,Output.shape[2]) as h: - with hcl.for_(0,Output.shape[3]) as w: - partial = hcl.scalar(0) - with hcl.for_(0,Filter.shape[-2]) as x: - with hcl.for_(0,Filter.shape[-1]) as y: - partial.v += Input[n][c][h+x][w+y] * Filter[0][0][x][y] - Output[n,c,h,w] = partial - def conv2d_nchw(Input, Filter, name="conv2d", stride=[1,1], padding=[[0,0],[0,0]]): out_dtype = Input.dtype batch, in_channel, in_height, in_width = Input.shape diff --git a/hlib/rocc-ppac b/hlib/rocc-ppac deleted file mode 160000 index 40d323d0c..000000000 --- a/hlib/rocc-ppac +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 40d323d0c81e2f64dbfb63afb5eb5d6ccf7c5e48 diff --git a/python/heterocl/__init__.py b/python/heterocl/__init__.py index 4b90160f0..588196177 100644 --- a/python/heterocl/__init__.py +++ b/python/heterocl/__init__.py @@ -3,7 +3,6 @@ from .compute_api import * from .dsl import * from .types import * -from .devices import * from .nparray import * from .debug import hcl_excepthook from .tvm.intrin import * diff --git a/python/heterocl/api.py b/python/heterocl/api.py index f3e2151c8..4da52786f 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -53,7 +53,7 @@ def app2(A, B, C): # execute f2 """ # set the configurations - config.init_dtype = init_dtype + config.init_dtype = init_dtype # initialize global variables Schedule.stage_ops = [] Schedule.last_stages = OrderedSet([]) @@ -90,7 +90,7 @@ def placeholder(shape, name=None, dtype=None): """ name = util.get_name("placeholder", name) dtype = util.get_dtype(dtype) - + if shape == (): return Scalar(tvm_api._Var(name, dtype)) tensor = Tensor(shape, dtype, name) diff --git a/python/heterocl/debug.py b/python/heterocl/debug.py index a885d2e0b..cba313e23 100644 --- a/python/heterocl/debug.py +++ b/python/heterocl/debug.py @@ -45,11 +45,6 @@ class TensorError(HCLError): def __init__(self, msg): HCLError.__init__(self, msg, "\33[1;31m[Tensor]\33[0m ") -class DeviceError(HCLError): - """A subclass for specifying device related exception""" - def __init__(self, msg): - HCLError.__init__(self, msg, "\33[1;31m[Device]\33[0m ") - def hcl_excepthook(etype, value, tb): """Customized excepthook diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py deleted file mode 100644 index a5d81df86..000000000 --- a/python/heterocl/devices.py +++ /dev/null @@ -1,278 +0,0 @@ -"""Define HeteroCL device types""" -#pylint: disable=too-few-public-methods, too-many-return-statements -from .debug import DeviceError -from .tools import option_table, model_table -from future.utils import with_metaclass - -class tooling(type): - def __getattr__(cls, key): - if key in option_table: - return cls(key, *option_table[key]) - else: # unsupported device - raise DeviceError("not supported") - -class tool(with_metaclass(tooling, object)): - """The base class for all device tooling - - mode (sim/impl) is decided by tool configuration - e.g. run sw emulation by passing gcc / vivado_hls arg - and actual impl by passing sdaccel / aocl arg - - Parameters - ---------- - types: str - Device of device to place data - model: str - Model of device to place date - """ - def __init__(self, name, mode, kwargs): - self.name = name - self.mode = mode - self.options = kwargs - - def __getattr__(self, entry): - return self.mapping[entry] - - def __call__(self, mode, setting={}): - self.mode = mode - self.options = setting - return self - - def __str__(self): - return str(self.name) + "-" + \ - str(self.mode) + ":\n" + \ - str(self.options) - - def __repr__(self): - return str(self.name) + "-" + \ - str(self.mode) + ":\n" + \ - str(self.options) - -tool_table = { - "aws_f1" : tool("sdaccel", *option_table["sdaccel"]), - "zc706" : tool("vivado_hls", *option_table["vivado_hls"]), - "ppac" : tool("rocket", *option_table["rocket"]), - "stratix10_sx": tool("aocl", *option_table["aocl"]), - "llvm" : tool("llvm", *option_table["llvm"]) -} - -class Device(object): - """The base class for all device types - - The default data placement is on CPU. - - Parameters - ---------- - types: str - Device of device to place data - model: str - Model of device to place date - """ - def __init__(self, types, vendor, - model, **kwargs): - self.vendor = vendor - self.types = types - self.model = model - self.impls = {"lang": ""} - for key, value in kwargs.items(): - self.impls[key] = value - - def __getattr__(self, key): - """ device hierarchy """ - return self.impls[key] - - def set_lang(self, lang): - assert lang in \ - ["opencl", "hlsc", "c", "opengl", "merlinc", "cuda", "metal"], \ - "unsupported lang sepc " + lang - self.impls["lang"] = lang - return self - -class CPU(Device): - """cpu device with different models""" - def __init__(self, vendor, model, **kwargs): - if vendor not in ["riscv", "arm", "intel", "sparc", "powerpc"]: - raise DeviceError(vendor + " not supported yet") - assert "cpu_" + model in model_table[vendor], \ - model + " not supported yet" - super(CPU, self).__init__("CPU", vendor, model, **kwargs) - def __repr__(self): - return "cpu-" + self.vendor + "-" + str(self.model) + \ - ":" + self.impls["lang"] - -class FPGA(Device): - """fpga device with different models""" - def __init__(self, vendor, model, **kwargs): - if vendor not in ["xilinx", "intel"]: - raise DeviceError(vendor + " not supported yet") - assert "fpga_" + model in model_table[vendor], \ - model + " not supported yet" - super(FPGA, self).__init__("FPGA", vendor, model, **kwargs) - def __repr__(self): - return "fpga-" + self.vendor + "-" + str(self.model) + \ - ":" + self.impls["lang"] - -class GPU(Device): - """gpu device with different models""" - def __init__(self, vendor, model, **kwargs): - if vendor not in ["nvidia", "amd"]: - raise DeviceError(vendor + " not supported yet") - assert "gpu_" + model in model_table[vendor], \ - model + " not supported yet" - super(GPU, self).__init__("GPU", vendor, model, **kwargs) - def __repr__(self): - return "gpu-" + self.vendor + "-" + str(self.model) + \ - ":" + self.impls["lang"] - -class PIM(Device): - """cpu device with different models""" - def __init__(self, vendor, model, **kwargs): - if model not in ["ppac"]: - raise DeviceError(model + " not supported yet") - super(PIM, self).__init__("PIM", vendor, model, **kwargs) - def __repr__(self): - return "pim-" + str(self.model) - -dev_table = { - "aws_f1" : [CPU("intel", "e5"), FPGA("xilinx", "xcvu19p")], - "zc706" : [CPU("arm", "a9"), FPGA("xilinx", "xc7z045")], - "rocc-ppac" : [CPU("riscv", "riscv"), PIM("ppac", "ppac")], - "stratix10_sx": [CPU("arm", "a53"), FPGA("intel", "stratix10_gx")] -} - -class env(type): - """The platform class for compute environment setups - - serves as meta-class for attr getting - default platform: aws_f1, zynq, ppac - - Parameters - ---------- - host: str - Device of device to place data - model: str - Model of device to place date - """ - def __getattr__(cls, key): - if key == "aws_f1": - devs = dev_table[key] - host = devs[0].set_lang("opencl") - xcel = devs[1].set_lang("hlsc") - elif key == "zc706": - devs = dev_table[key] - host = devs[0].set_lang("hlsc") - xcel = devs[1].set_lang("hlsc") - elif key == "llvm": - devs = None - host = None - xcel = None - elif key == "ppac": - devs = dev_table["rocc-ppac"] - host = devs[0].set_lang("c") - xcel = None - else: # unsupported device - raise DeviceError("not supported") - tool = tool_table[key] - return cls(key, devs, host, xcel, tool) - -class platform(with_metaclass(env, object)): - def __init__(self, name, devs, host, xcel, tool): - self.name = name - self.devs = devs - self.host = host - self.xcel = xcel - self.tool = tool - - if isinstance(host, CPU): - self.cpu = host - if isinstance(xcel, FPGA): - self.fpga = xcel - elif isinstance(xcel, PIM) and \ - xcel.model == "ppac": - self.ppac = xcel - - def __getattr__(self, key): - """ return tool options """ - return self.tool.__getattr__(key) - - def __call__(self, tooling=None): - if tooling: # check and update - assert isinstance(tooling, tool) - self.tool = tooling - return self - - def __str__(self): - return str(self.name) + "(" + \ - str(self.host) + " : " + \ - str(self.xcel) + ")" - - def __repr__(self): - return str(self.name) + "(" + \ - str(self.host) + " : " + \ - str(self.xcel) + ")" - -def device_to_str(dtype): - """Convert a device type to string format. - - Parameters - ---------- - dtype : Device or str - The device type to be converted - - Returns - ------- - str - The converted device type in string format. - """ - if isinstance(dtype, Device): - if isinstance(dtype, CPU): - return "cpu_" + str(dtype.model) - elif isinstance(dtype, FPGA): - return "fpga_" + str(dtype.model) - else: - if not isinstance(dtype, str): - raise DeviceError("Unsupported device type format") - return dtype - -def device_to_hcl(dtype): - """Convert a device type to Heterocl type. - - Parameters - ---------- - dtype : Device or str - The device type to be converted - - Returns - ------- - Device - """ - if isinstance(dtype, Device): - return dtype - elif isinstance(dtype, str): - device, model = dtype.split("_") - if device == "cpu": - return CPU(model) - elif device == "gpu": - return GPU(model) - elif device == "fpga": - return FPGA(model) - else: - raise DeviceError("Unrecognized device type") - else: - raise DeviceError("Unrecognized device type format") - -def get_model(dtype): - """Get the model of a given device type. - - Parameters - ---------- - dtype : Device or str - The given device type - - Returns - ------- - str - """ - dtype = dtype_to_hcl(dtype) - return dtype.types, dtype.model - diff --git a/python/heterocl/dsl.py b/python/heterocl/dsl.py index b226cb0ab..6d42031f1 100644 --- a/python/heterocl/dsl.py +++ b/python/heterocl/dsl.py @@ -405,7 +405,6 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n raise APIError("The number of data types does not match the of arguments") for (name_, dtype_) in zip(new_names, dtypes): dtypes.append(util.get_dtype(dtype_, name_)) - dtypes = dtypes[int(len(dtypes)/2):] else: dtype = util.get_dtype(dtypes) dtypes = [] @@ -415,20 +414,15 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n # prepare inputs for IR generation inputs = [] inputs_tvm = [] - arg_shapes, arg_dtypes = [], [] for shape, name_, dtype in zip(shapes, new_names, dtypes): if shape == (): var_ = placeholder((), name_, dtype) inputs.append(var_) inputs_tvm.append(var_.var) - arg_shapes.append([1]) - arg_dtypes.append(dtype) - else: # tensor inputs (new bufs) + else: placeholder_ = placeholder(shape, name_, dtype) inputs.append(placeholder_) inputs_tvm.append(placeholder_.buf.data) - arg_shapes.append(list(shape)) - arg_dtypes.append(dtype) s.ret_dtype = ret_dtype fmodule(*inputs) @@ -441,8 +435,7 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n ret_void = _make.UIntImm("uint1", 0) if s.has_return else _make.UIntImm("uint1", 1) body = s.pop_stmt() s.stmt_stack.append([]) - s.emit(_make.KernelDef(inputs_tvm, arg_shapes, arg_dtypes, - body, ret_void, ret_dtype, name, [])) + s.emit(_make.KernelDef(inputs_tvm, body, ret_void, ret_dtype, name)) for name_, i in zip(names, inputs): s.var_dict[name_] = i s.input_stages.clear() diff --git a/python/heterocl/mutator.py b/python/heterocl/mutator.py index 7d49f1e76..88ca42788 100644 --- a/python/heterocl/mutator.py +++ b/python/heterocl/mutator.py @@ -77,8 +77,6 @@ def mutate(self, node): return self.mutate_SetSlice(node) elif isinstance(node, _expr.KernelExpr): return self.mutate_KernelExpr(node) - elif isinstance(node, _expr.StreamExpr): - return self.mutate_StreamExpr(node) else: return node elif isinstance(node, _stmt.Stmt): @@ -114,8 +112,6 @@ def mutate(self, node): return self.mutate_Break(node) elif isinstance(node, _stmt.While): return self.mutate_While(node) - elif isinstance(node, _stmt.StreamStmt): - return self.mutate_StreamStmt(node) else: return node elif isinstance(node, tuple): @@ -252,10 +248,6 @@ def mutate_KernelExpr(self, node): args = self.mutate(node.args) return _make.KernelExpr(node.dtype, args, node.name) - def mutate_StreamExpr(self, node): - args = self.mutate(node.args) - return _make.StreamExpr(node.dtype, args, node.name) - # statements def mutate_LetStmt(self, node): var = self.mutate(node.var) @@ -328,10 +320,6 @@ def mutate_KernelStmt(self, node): args = self.mutate(node.args) return _make.KernelStmt(args, node.name) - def mutate_StreamStmt(self, node): - args = self.mutate(node.args) - return _make.StreamStmt(node.dtype, args, node.name) - def mutate_Return(self, node): value = self.mutate(node.value) return _make.Return(value) diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index 03af1cf3e..abd74acdc 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -5,7 +5,6 @@ from ordered_set import OrderedSet from .tvm import make as _make from .tvm import stmt as _stmt -from .tvm import expr as _expr from .tvm import api as tvm_api from .tvm import _api_internal from .tvm._api_internal import _ExternOp @@ -135,42 +134,6 @@ def reuse_at(self, target, parent, axis, name=None): name = target.name + ".reuse" return self.sch.reuse_at(target, parent, axis, name) - def to(self, tensors, dst, src=None, - stream_type=_expr.StreamExpr.Channel, depth=10, name=None): - """Stream a list of Tensors to dst devices - - Parameters - ---------- - tensors : list of Tensor - The tensors to be moved - - dst : device or module - The tensors to be moved - - stream_type : {FIFO, Channel, Burst}, optional - The stream type - """ - if stream_type > 2: - raise APIError("Invalid channel type") - rets = [] - if not isinstance(tensors, list): - tensors = [tensors] - for tensor in tensors: - try: - target = tensor.tensor - except (AttributeError, ValueError): - try: - target = tensor._op - except AttributeError: - target = tensor - if name is None: - name = target.name + ".stream" - ret = self.sch.to(target, dst, src, - stream_type, depth, name) - name = None - rets.append(ret) - return rets - def partition(self, target, partition_type=_stmt.Partition.Complete, dim=0, factor=0): """Partition a Tensor into smaller Tensors or even registers @@ -339,7 +302,7 @@ def __exit__(self, ptype, value, trace): # create the output operation input_ops = [i._op for i in self.input_stages] input_bufs = [i._buf for i in self.input_stages] - output_bufs = [self._buf] + output_bufs = [self._buf] body = self.pop_stmt() Stage._current.pop() op = _ExternOp(self.name, "", self.axis_list, input_ops, @@ -368,7 +331,8 @@ def __exit__(self, ptype, value, trace): superstage.var_dict[self.name] = self # update prefix self.name_with_prefix = superstage.name_with_prefix + "." + self.name - else: # otherwise update the list of stages globally + # Otherwise update the list of stages globally + else: Schedule.stage_ops.append(self) Schedule.last_stages.add(self) Schedule.last_stages -= self.input_stages diff --git a/python/heterocl/tools.py b/python/heterocl/tools.py deleted file mode 100644 index bf47753fa..000000000 --- a/python/heterocl/tools.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Define HeteroCL default tool settings""" -#pylint: disable=too-few-public-methods, too-many-return-statements - -model_table = { - "xilinx" : ["fpga_xc7z045", "fpga_xcvu19p"], - "intel" : ["cpu_e5", "cpu_i7", "fpga_stratix10_gx", - "fpga_stratix10_dx", "fpga_stratix10_mx"], - "arm" : ["cpu_a7", "cpu_a9", "cpu_a53"], - "riscv" : ["cpu_riscv"] -} - -option_table = { - "llvm" : ("llvm_sim", {"version" : "6.0.0"}), - "sdaccel" : ("sw_emu", {"version" : "2017.1", "clock" : "1"}), - "vivado_hls" : ("csim", {"version" : "2017.1"}), - "rocket" : ("source", {"RISCV" : ""}), - - # refer to xilinx2016_1/ug904-vivado-implementation.pdf - "vivado" : ("pnr", - {"version" : "2017.1", - "logic" : ["Default", "Explore", "ExploreSequentialArea", "AddRemap", "ExploreArea"], - "placement" : ["Default", "Explore", "ExtraNetDelay_high", "ExtraNetDelay_medium", "ExtraNetDelay_low", "ExtraPostPlacementOpt", "WLDrivenBlockPlacement", "LateBlockPlacement", "AltSpreadLogic_low", "AltSpreadLogic_medium", "AltSpreadLogic_high"], - "routing" : ["Default", "Explore", "HigherDelayCost"], - "fanout_opt" : ["on", "off"], - "placement_opt" : ["on", "off"], - "critical_cell_opt" : ["on", "off"], - "critical_pin_opt" : ["on", "off"], - "retime" : ["on", "off"], - "rewire" : ["on", "off"], - }), - - "quartus" : ("pnr", - {"version" : "17.1", - "auto_dsp_recognition" : ['On', 'Off'], - "disable_register_merging_across_hierarchies": ['On', 'Off', 'Auto'], - "mux_restructure" : ['On', 'Off', 'Auto'], - "optimization_technique" : ['Area', 'Speed', 'Balanced'], - "synthesis_effort" : ['Auto', 'Fast'], - "synth_timing_driven_synthesis" : ['On', 'Off'], - "fitter_aggressive_routability_optimization" : ['Always', 'Automatically', 'Never'], - "fitter_effort" : ['Standard Fit', 'Auto Fit'], - "remove_duplicate_registers" : ['On', 'Off'], - "physical_synthesis" : ['On', 'Off'], - "adv_netlist_opt_synth_wysiwyg_remap" : ['On', 'Off'], - "allow_any_ram_size_for_recognition" : ['On', 'Off'], - "allow_any_rom_size_for_recognition" : ['On', 'Off'], - "allow_any_shift_register_size_for_recognition" : ['On', 'Off'], - "allow_power_up_dont_care" : ['On', 'Off'], - "allow_shift_register_merging_across_hierarchies" : ["Always", "Auto", "Off"], - "allow_synch_ctrl_usage" : ['On', 'Off'], - "auto_carry_chains" : ['On', 'Off'], - "auto_clock_enable_recognition" : ['On', 'Off'], - "auto_dsp_recognition" : ['On', 'Off'], - "auto_enable_smart_compile" : ['On', 'Off'], - "auto_open_drain_pins" : ['On', 'Off'], - "auto_ram_recognition" : ['On', 'Off'], - "auto_resource_sharing" : ['On', 'Off'], - "auto_rom_recognition" : ['On', 'Off'], - "auto_shift_register_recognition" : ["Always", "Auto", "Off"], - "disable_register_merging_across_hierarchies" : ["Auto", "On", "Off"], - "enable_state_machine_inference" : ['On', 'Off'], - "force_synch_clear" : ['On', 'Off'], - "ignore_carry_buffers" : ['On', 'Off'], - "ignore_cascade_buffers" : ['On', 'Off'], - "ignore_max_fanout_assignments" : ['On', 'Off'], - "infer_rams_from_raw_logic" : ['On', 'Off'], - "mux_restructure" : ["Auto", "On", "Off"], - "optimization_technique" : ["Area", "Balanced", "Speed"], - "optimize_power_during_synthesis" : ["Extra effort", "Normal compilation", "Off"], - "remove_duplicate_registers" : ['On', 'Off'], - "shift_register_recognition_aclr_signal" : ['On', 'Off'], - "state_machine_processing" : - ["Auto", "Gray", "Johnson, Minimal Bits", "One-Hot", "Sequential", "User-Encoded"], - "strict_ram_recognition" : ['On', 'Off'], - "synthesis_effort" : ["Auto", "Fast"], - "synthesis_keep_synch_clear_preset_behavior_in_unmapper" : ['On', 'Off'], - "synth_resource_aware_inference_for_block_ram" : ['On', 'Off'], - "synth_timing_driven_synthesis" : ['On', 'Off'], - "alm_register_packing_effort" : ["High", "Low", "Medium"], - "auto_delay_chains" : ['On', 'Off'], - "auto_delay_chains_for_high_fanout_input_pins" : ["On", "Off"], - "eco_optimize_timing" : ["On", "Off"], - "final_placement_optimization" : ["Always", "Automatically", "Never"], - "fitter_aggressive_routability_optimization" : ["Always", "Automatically", "Never"], - "fitter_effort" : ["Standard Fit", "Auto Fit"], - "optimize_for_metastability" : ["On", "Off"], - "optimize_hold_timing" : ["All Paths", "IO Paths and Minimum TPD Paths", "Off"], - "optimize_ioc_register_placement_for_timing" : - ["Normal", "Off", "Pack All IO Registers"], - "optimize_multi_corner_timing" : ['On', 'Off'], - "optimize_power_during_fitting" : ["Extra effort", "Normal compilation", "Off"], - "physical_synthesis" : ['On', 'Off'], - "placement_effort_multiplier" : [0.2, 0.5, 1.0, 2.0, 3.0, 4.0], - "programmable_power_technology_setting" : ["Automatic", "Force All Tiles with Failing Timing Paths to High Speed", "Force All Used Tiles to High Speed", "Minimize Power Only"], - "qii_auto_packed_registers" : ["Auto", "Minimize Area", "Minimize Area with Chains", "Normal", "Off", "Sparse", "Sparse Auto"], - "router_clocking_topology_analysis" : ['On', 'Off'], - "router_lcell_insertion_and_logic_duplication" : ["Auto", "On", "Off"], - "router_register_duplication" : ["Auto", "On", "Off"], - "router_timing_optimization_level" : ["MINIMUM", "Normal", "MAXIMUM"], - "seed" : (1, 5), - "tdc_aggressive_hold_closure_effort" : ['On', 'Off'], - "allow_register_retiming" : ['On', 'Off']}), - - "aocl" : ("emu", {"version" : "17.0", - "clokc" : 1.5, - }) -} - diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 47b4e31ae..c8dcc91f2 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -6,10 +6,8 @@ from __future__ import absolute_import as _abs import warnings import types -import os from ._ffi.node import NodeBase, register_node -from ._ffi.function import register_func from ._ffi.base import _RUNTIME_ONLY from . import api from . import tensor @@ -23,48 +21,6 @@ from . import ndarray from . import target as _target from . import make -from ..devices import platform - -# test build sim -@register_func -def tvm_callback_syn_postproc(code): - return "test" - -@register_func -def get_util_path(platform): - if platform == "aws_f1": - return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/sdaccel/" - elif platform == "rocket": - ppac = "/work/zhang-x1/users/sx233/heterocl/hlib/rocc-ppac" - emulator = os.path.join(ppac, "rocket/emulator/emulator-freechips." + \ - "rocketchip.system-RoccExampleConfig-debug") - # build emulator if not exist - if not os.path.isfile(emulator): - cmd = "cd " + ppac + ";" - cmd += "cp src/Ppac.v rocket/src/main/resources/vsrc;" + \ - "cp src/PpacRoCC.scala rocket/src/main/scala/tile;" + \ - "cd rocket && git apply ../src/rocc-ppac.patch;" + \ - "cd emulator && make CONFIG=RoccExampleConfig debug" - # create subprocess to check - subprocess.Popen(cmd, shell=True, stdout=open("build.log", "w")).wait() - - # re-build proxy kernel - if not os.path.isfile(ppac + "/rocket/riscv-pk/build/pk"): - cmd = "cd " + ppac + "/rocket/riscv-pk;" - cmd += "git apply ../../tests/patches/riscv-pk.patch;" - cmd += "mkdir build; cd build;" - cmd += " ../configure --prefix=$RISCV/riscv64-unknown-elf --host=riscv64-unknown-elf;" - cmd += "make -j8; make install" - subprocess.Popen(cmd, shell=True, stdout=open("build.log", "w")).wait() - # return util folder needed to compile generated test files - return "/work/zhang-x1/users/sx233/heterocl/rocc-ppac/tests" - - # copy tcl and testbench - elif platform == "vivado_hls": - return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/vivado" - - else: # unrecognized platform - assert False, "unsupported platform" class DumpIR(object): """ @@ -384,7 +340,6 @@ def lower(sch, stmt = f(stmt) # Phase 1 stmt = ir_pass.StorageFlatten(stmt, binds, 64) - stmt = ir_pass.InferStream(stmt, 32) #stmt = ir_pass.CanonicalSimplify(stmt) #TODO: SOLVE THIS!! stmt = ir_pass.LiftAllocateAttrs(stmt) if cfg.generate_reuse_buffer: @@ -423,7 +378,7 @@ def lower(sch, else: return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func) -def build_fpga_kernel(sch, args, target, name="default_function"): +def build_fpga_kernel(sch, args, target_name, name="default_function"): """Build an FPGA kernel. Parameters @@ -452,66 +407,20 @@ def build_fpga_kernel(sch, args, target, name="default_function"): if args is None: raise ValueError("args must be given for build from schedule") - # generate host (device) code / function - if target == "merlinc": + if target_name == "merlinc": BuildConfig.current = build_config(generate_reuse_buffer=False) else: BuildConfig.current = build_config() - flist = lower(sch, args, kernel_only=True, name=name) if isinstance(flist, container.LoweredFunc): flist = [flist] - fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] - - if isinstance(target, str): # string type - builder = getattr(codegen, "build_{0}".format(target)) - ret = builder(fdevice) - if isinstance(ret, str): - decl = ret[:ret.find("{device}")] - start = ret.find("{host}") - end = ret.rfind("{host}") - ret = decl + "\n" + ret[start+6:end] - ret = ret.strip("\n").lstrip("\n") + "\n\n" - return ret - - try: # generate and split code - host, xcel = None, None - if target.tool.name == "sdaccel": - host = target.host.lang.replace("opencl", "aocl") - xcel = target.xcel.lang.replace("hlsc", "vhls") - elif target.tool.name == "vivado_hls": - host = target.host.lang.replace("hlsc", "vhls") - xcel = target.xcel.lang.replace("hlsc", "vhls") - elif target.tool.name == "rocket": - host = target.host.lang.replace("c", "rv64_ppac") - - # return simulation built function - mode = str(target.tool.mode) - if "emu" in mode or "sim" in mode: - builder = getattr(codegen, "build_{0}".format("sim")) - keys = [k for k in target.tool.options.keys()] - vals = [v for v in target.tool.options.values()] - keys.insert(0, "name") - vals.insert(0, target.tool.name) - return builder(fdevice, keys, vals) - elif mode != "debug": # impl mode - pass - else: # return source code only - host_code, xcel_code = "", "" - if host: # src mode generate host code - builder = getattr(codegen, "build_{0}".format(host)) - host_code = builder(fdevice) - findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") - host_code = host_code[findex + 6 : rindex] - if xcel: # src mode generate xcel code - builder = getattr(codegen, "build_{0}".format(xcel)) - xcel_code = builder(fdevice) - findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") - xcel_code = xcel_code[findex + 8 : rindex] - return xcel_code + host_code + fdevice = [ir_pass.LowerIntrin(x, target_name) for x in flist] + try: + builder = getattr(codegen, "build_{0}".format(target_name)) + return builder(fdevice) except AttributeError: - raise AttributeError("Cannot find the target builder %s" % target) + raise AttributeError("Cannot find the target builder %s" % target_name) return None def build(sch, @@ -559,13 +468,11 @@ def build(sch, ---- See the note on :any:`tvm.target` on target string format. """ - if isinstance(target, platform): - return build_fpga_kernel(sch, args, target, name=name) - else: # default string type target - target = _target.current_target() if target is None else target - target = _target.create(target) if target else _target.create("llvm") - if "fpga" in target.keys: - return build_fpga_kernel(sch, args, target.target_name, name=name) + target = _target.current_target() if target is None else target + target = _target.create(target) if target else _target.create("llvm") + + if "fpga" in target.keys: + return build_fpga_kernel(sch, args, target.target_name, name=name) BuildConfig.current = build_config() if isinstance(sch, schedule._Schedule): diff --git a/python/heterocl/tvm/expr.py b/python/heterocl/tvm/expr.py index d1ea4ae75..d71307e8f 100644 --- a/python/heterocl/tvm/expr.py +++ b/python/heterocl/tvm/expr.py @@ -382,9 +382,3 @@ class Quantize(Expr): @register_node class KernelExpr(Expr): pass - -@register_node -class StreamExpr(Expr): - Channel = 0 - Pipe = 1 - FIFO = 2 diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 36ead39de..21905b443 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -3,7 +3,6 @@ from ._ffi.base import string_types from ._ffi.node import NodeBase, register_node from ._ffi.function import _init_api -from ..devices import Device from . import _api_internal from . import tensor as _tensor from . import expr as _expr @@ -333,53 +332,6 @@ def reuse_at(self, target, parent, axis, name): def partition(self, target, partition_type, dim, factor): return _api_internal._SchedulePartition(self, target, dim, factor, partition_type) - def to(self, tensor, dst, src, - types=_expr.StreamExpr.Channel, - depth=1, name=None): - """ Stream data to devices or on-chip module - - Parameters - ---------- - tensor : list of Tensors - Tensor to be streamed. - dst : hcl device or dst stage - The device or module for streaming - type : channel type - The streaming type (e.g. fifo or pipe) - - Returns - ------- - outer : IterVar - The outer variable of iteration. - """ - # create producer and consumer for stream - if isinstance(dst, Device): - dst = 1 if 'fpga' in str(dst) else 0 - return _api_internal._ScheduleMove(self, tensor, dst, - types, depth, name) - else: # connect kernel - assert isinstance(dst, _Stage), "dst not a stage " - if src: # remove buffer between kernels - assert isinstance(src, _Stage), \ - "destination should be a stage but " + str(type(src)) - try: - self.remove_args.append(tensor.op.output(0)) - except: - self.remove_args = [] - self.remove_args.append(tensor.op.output(0)) - _api_internal._ScheduleStream(self, tensor, dst, src, - types, depth, name) - else: # from externop buffer to kernel - shape = [_.value for _ in tensor.shape] - index, match = 0, [] - for s in dst.op.body.api_args: - arg_shape = [_.value for _ in s] - if shape == arg_shape: match.append(index) - index = index + 1 - assert len(match) > 0, "wrong kernel or tensor (shape not matching)" - _api_internal._ScheduleMoveToStage(self, tensor, dst, match[0], - types, depth, name) - @register_node("Stage") class _Stage(NodeBase): """A Stage represents schedule for one operation. @@ -702,7 +654,7 @@ def pragma(self, var, pragma_type): - **parallel_stride_pattern** Hint parallel loop to execute in strided pattern. - :code:`for (int i = task_id; i < end; i += num_task)` + :code:`for (int i = task_id; i < end; i += num_task)` """ _api_internal._StagePragma(self, var, pragma_type) diff --git a/python/heterocl/tvm/stmt.py b/python/heterocl/tvm/stmt.py index d5c2d0a18..4db84970f 100644 --- a/python/heterocl/tvm/stmt.py +++ b/python/heterocl/tvm/stmt.py @@ -112,7 +112,3 @@ class Partition(Stmt): @register_node class Stencil(Stmt): pass - -@register_node -class StreamStmt(Stmt): - pass diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 5687953ca..12235d95d 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -1,3 +1,43 @@ +"""Target management API of TVM. + +TVM's target string is in fomat `` [-option=value]...``. + +Note +---- +The list of options include: + +- **-device=** + + The device name. + +- **-mtriple=** or **-target** + + Specify the target triple, which is useful for cross + compilation. + +- **-mcpu=** + + Specify a specific chip in the current architecture to + generate code for. By default this is infered from the + target triple and autodetected to the current architecture. + +- **-mattr=a1,+a2,-a3,...** + + Override or control specific attributes of the target, + such as whether SIMD operations are enabled or not. The + default set of attributes is set by the current CPU. + +- **-system-lib** + + Build TVM system library module. System lib is a global module that contains + self registered functions in program startup. User can get the module using + :any:`tvm.module.system_lib`. + It is useful in environments where dynamic loading api like dlopen is banned. + The system lib will be available as long as the result code is linked by the program. + +We can use :any:`tvm.target.create` to create a tvm.target.Target from the target string. +We can also use other specific function in this module to create specific targets. +""" from __future__ import absolute_import import warnings @@ -10,8 +50,7 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', - 'opencl', 'sdaccel', 'sdaccel_csim', 'aocl', 'aocl_csim', 'rv64_ppac'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim'] def _merge_opts(opts, new_opts): """Helper function to merge options""" @@ -29,7 +68,7 @@ class Target(object): Parameters ---------- - target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev", "rv64_ppac"} + target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev"} The major target name. {"merlinc", "soda", "soda_xhls", "vhls"} diff --git a/python/heterocl/util.py b/python/heterocl/util.py index 704b774cb..996201105 100644 --- a/python/heterocl/util.py +++ b/python/heterocl/util.py @@ -4,7 +4,6 @@ from .tvm.expr import Var, Call from .tvm.api import _IterVar, decl_buffer from . import types -from . import devices from . import config from .scheme import Scheme from .debug import DTypeError diff --git a/samples/conv/conv.py b/samples/conv/conv.py deleted file mode 100644 index ca41a50a1..000000000 --- a/samples/conv/conv.py +++ /dev/null @@ -1,70 +0,0 @@ -import heterocl as hcl -import hlib -import numpy as np -from PIL import Image -from urllib.request import urlopen - -batch_size = 1 -hcl.init(hcl.UInt(32)) -dtype = hcl.UInt(32) -image_size = () -kernel_size = 3 - -# setup target using vivado -tool = hcl.tool.vivado("csim") -target = hcl.platform.zc706 - -def conv(): - image = hcl.placeholder((batch_size, 1, 256, 256), "input_image") - k1 = hcl.placeholder((1, 1, 3, 3), "kernel_1") - k2 = hcl.placeholder((1, 1, 3, 3), "kernel_2") - - def kernel(input_image, kernel_1, kernel_2): - - # return tensor required (cannot do def_()) - interm_shape = (1,1,254,254) - output_shape = (1,1,252,252) - - # make compute wrapped in hcl def - module1 = hcl.def_([input_image.shape, kernel_1.shape, interm_shape], name="conv1")(hlib.nn.conv2d_nchw_imp) - module2 = hcl.def_([interm_shape, kernel_2.shape, output_shape], name="conv2")(hlib.nn.conv2d_nchw_imp) - conv1 = hcl.compute(interm_shape, lambda *args: 0) - conv2 = hcl.compute(output_shape, lambda *args: 0) - module1(input_image, kernel_1, conv1) - module2(conv1, kernel_2, conv2) - - # derivative module for normalization - return hcl.compute(output_shape, lambda *args: conv2[args], name="derv") - - s = hcl.create_schedule([image, k1, k2], kernel) - - # data moved to local - i0, k10, k20 = s.to([image, k1, k2], target.fpga) - # s.to([i0, k10], s[kernel.conv1]) - # s.to([k20], s[kernel.conv2]) - s.to(kernel.derv, target.cpu) - - # create stream channel between modules - print(type(target.fpga), hcl.lower(s)) - return hcl.build(s, target) - -# Load sample data -img = Image.open(urlopen('http://i.stack.imgur.com/8zINU.gif')) -kernel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) -kernel_y = np.flip(kernel_x.T.T, axis=0) -img = np.array(img) - -img = img[np.newaxis, ...] -img = img[np.newaxis, ...] -kernel_x = kernel_x[np.newaxis, ...] -kernel_x = kernel_x[np.newaxis, ...] -kernel_y = kernel_y[np.newaxis, ...] -kernel_y = kernel_y[np.newaxis, ...] - -hcl_input = hcl.asarray(img, dtype) -kernel_x = hcl.asarray(kernel_x, dtype) -kernel_y = hcl.asarray(kernel_y, dtype) -hcl_output = hcl.asarray(np.zeros((1,1,254,254)), dtype) - -f = conv() -f(hcl_input, kernel_x, kernel_y, hcl_output) diff --git a/samples/digitrec/digitrec_stream.py b/samples/digitrec/digitrec_stream.py deleted file mode 100644 index 4c0da096a..000000000 --- a/samples/digitrec/digitrec_stream.py +++ /dev/null @@ -1,150 +0,0 @@ -import heterocl as hcl -import time -import numpy as np -import math -from digitrec_data import read_digitrec_data - -N = 8 * 8 -max_bit = int(math.ceil(math.log(N, 2))) -test_size = (180, ) -data_size = (10, 1800) - -dtype_image = hcl.UInt(N) -dtype_knnmat = hcl.UInt(max_bit) - -setting = { - "version" : "2019.1", - "clock" : "10" -} -tool = hcl.tool.vivado("csim", setting) -target = hcl.platform.aws_f1 - -def knn(test_images, train_images): - - def popcount(num): - out = hcl.local(0, "out") - with hcl.for_(0, train_images.type.bits) as i: - out.v += num[i] - return out.v - - def update_knn(dist, knn_mat, i, j): - max_id = hcl.local(0, "max_id") - with hcl.for_(0, 3) as k: - with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): - max_id.v = k - with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): - knn_mat[i][max_id.v] = dist[i][j] - - def sort_knn(knn_mat, i, j): - val = hcl.local(0, "val") - with hcl.if_( j == 1 ): - with hcl.if_( knn_mat[i][1] > knn_mat[i][2] ): - val.v = knn_mat[i][1] - knn_mat[i][1] = knn_mat[i][2] - knn_mat[i][2] = val.v - with hcl.else_(): - with hcl.if_( knn_mat[i][0] > knn_mat[i][1] ): - val.v = knn_mat[i][0] - knn_mat[i][0] = knn_mat[i][1] - knn_mat[i][1] = val.v - - def knn_vote(knn_mat, j): - id0 = hcl.local(0, "id0") - id1 = hcl.local(0, "id1") - id2 = hcl.local(0, "id2") - count = hcl.local(0, "count") - with hcl.for_(0, 10) as n: - with hcl.if_(knn_mat[n][0] < knn_mat[id0.v][0]): - id0.v = n - with hcl.for_(0, 10) as m: - with hcl.if_(knn_mat[m][0] < knn_mat[id1.v][0]): - id1.v = m - with hcl.for_(0, 10) as k: - with hcl.if_(knn_mat[k][0] < knn_mat[id2.v][0]): - id2.v = k - with hcl.if_(j == id0.v): - count.v += 1 - with hcl.elif_(j == id1.v): - count.v += 1 - with hcl.elif_(j == id2.v): - count.v += 1 - with hcl.else_(): - count.v += 0 - return count.v - - # support hcl.compute in hcl def - @hcl.def_([(), data_size, (10,3)]) - def knn_dist(test_image, train_images, pred_matrix): - pass - - with hcl.for_(0, 180) as index: - test_image = test_images[index] - diff = hcl.compute(train_images.shape, - lambda x, y: train_images[x][y] ^ test_image, - "diff") - dist = hcl.compute(diff.shape, - lambda x, y: popcount(diff[x][y]), - "dist") - knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") - hcl.mutate(dist.shape, - lambda x, y: update_knn(dist, knn_mat, x, y), - "knn_update") - hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") - knn_new = hcl.compute(knn_mat.shape, - lambda x, y: knn_mat[x][y], "copy") - knn_pred = hcl.compute((10,), - lambda x: knn_vote(knn_mat, x), "vote") - return knn_pred - -test_image = hcl.placeholder(test_size, "test_image", dtype_image) -train_images = hcl.placeholder(data_size, "train_images", dtype_image) - -scheme = hcl.create_scheme([test_image, train_images], knn) -scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) - -s = hcl.create_schedule_from_scheme(scheme) - -diff = knn.diff -dist = knn.dist -vote = knn.copy -knn_update = knn.knn_update - -s.to([test_images, train_images], target.xcel) -s.to(vote, target.host) - -# merge loop nests -s[diff].compute_at(s[dist], dist.axis[1]) -s[dist].compute_at(s[knn_update], knn_update.axis[1]) - -# reorder loop to expose more parallelism -s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) - -# parallel outer loop and pipeline inner loop -s[knn_update].parallel(knn_update.axis[1]) -s[knn_update].pipeline(knn_update.axis[0]) - -# at the end, we build the whole offloaded function. -# print(hcl.lower(s)) -f = hcl.build(s, target) - -train_images, _, test_images, test_labels = read_digitrec_data() -total = len(test_images) -total_time = 0 - -# read returned prediction from streaming pipe -hcl_train_images = hcl.asarray(train_images, dtype_image) -hcl_knn_pred = hcl.asarray(np.zeros((total, 10)), dtype_knnmat) - -start = time.time() -f(test_images, hcl_train_images, hcl_knn_pred) -total_time = total_time + (time.time() - start) - -knn_result = hcl_knn_pred.asnumpy() - -correct = 0.0 -for i in range(total): - if np.argmax(knn_result[i]) == test_labels[i]: - correct += 1 - -print("Average kernel time (s): {:.2f}".format(total_time/total)) -print("Accuracy (%): {:.2f}".format(100*correct/1)) diff --git a/samples/digitrec/kernel.cpp b/samples/digitrec/kernel.cpp new file mode 100644 index 000000000..21b550c8b --- /dev/null +++ b/samples/digitrec/kernel.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#pragma ACCEL kernel +void default_function(unsigned long test_image, unsigned long* train_images, unsigned char* knn_mat) { + for (int x = 0; x < 10; ++x) { + for (int y = 0; y < 3; ++y) { + knn_mat[(y + (x * 3))] = (unsigned char)50; + } + } + unsigned long knn_update; +#pragma ACCEL parallel + for (int y1 = 0; y1 < 1800; ++y1) { +#pragma ACCEL pipeline + for (int x1 = 0; x1 < 10; ++x1) { + unsigned char dist; + unsigned long diff; + diff = (train_images[(y1 + (x1 * 1800))] ^ test_image); + unsigned char out; + out = (unsigned char)0; + for (int i = 0; i < 49; ++i) { + out = ((unsigned char)(((unsigned long)out) + ((unsigned long)((diff & (1L << i)) >> i)))); + } + dist = out; + unsigned long max_id; + max_id = (unsigned long)0; + for (int i1 = 0; i1 < 3; ++i1) { + if (knn_mat[(((long)max_id) + ((long)(x1 * 3)))] < knn_mat[(i1 + (x1 * 3))]) { + max_id = ((unsigned long)i1); + } + } + if (dist < knn_mat[(((long)max_id) + ((long)(x1 * 3)))]) { + knn_mat[(((long)max_id) + ((long)(x1 * 3)))] = dist; + } + } + } +} + diff --git a/samples/gemm/common/common.mk b/samples/gemm/common/common.mk deleted file mode 100644 index 3409e4aa5..000000000 --- a/samples/gemm/common/common.mk +++ /dev/null @@ -1,55 +0,0 @@ -SHELL = /bin/bash -VPATH = ./ -CC = xcpp -CLCC = xocc -ifeq ($(XDEVICE_REPO_PATH),) - DEVICE_REPO_OPT = -else -DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} -endif -HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 -HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread -CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} -ifeq (${KEEP_TEMP},1) - CLCC_OPT += -s -endif -ifeq (${KERNEL_DEBUG},1) - CLCC_OPT += -g -endif -CLCC_OPT += --kernel ${KERNEL_NAME} -OBJECTS := $(HOST_SRCS:.cpp=.o) -.PHONY: all -all: run -host: ${HOST_EXE_DIR}/${HOST_EXE} -xbin_cpu_em: - make SDA_FLOW=cpu_emu xbin -f sdaccel.mk -xbin_hw_em: - make SDA_FLOW=hw_emu xbin -f sdaccel.mk -xbin_hw : - make SDA_FLOW=hw xbin -f sdaccel.mk -xbin: ${XCLBIN} -run_cpu_em: - make SDA_FLOW=cpu_emu run_em -f sdaccel.mk -run_hw_em: - make SDA_FLOW=hw_emu run_em -f sdaccel.mk -run_hw : - make SDA_FLOW=hw run_hw_int -f sdaccel.mk -run_em: xconfig host xbin - XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} -run_hw_int : host xbin_hw - source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} -estimate : - ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} -xconfig : emconfig.json -emconfig.json : - emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . -${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} - ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ -${XCLBIN}: - ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} -%.o: %.cpp - ${CC} ${HOST_CFLAGS} -c $< -o $@ -clean: - ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil -cleanall: clean - ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou diff --git a/samples/gemm/gemm_aocl.cl b/samples/gemm/gemm_aocl.cl deleted file mode 100644 index 198757823..000000000 --- a/samples/gemm/gemm_aocl.cl +++ /dev/null @@ -1,14 +0,0 @@ -#include "ihc_apint.h" -__kernel void default_function(__global int* restrict placeholder0, __global int* restrict placeholder1, __global int* restrict matrix_3) { - for (int x = 0; x < 10; ++x) { - for (int y = 0; y < 10; ++y) { - int sum; - sum = 0; - for (int k = 0; k < 10; ++k) { - sum = ((int)(((int64_t)(((long)placeholder0[(k + (x * 10))]) * ((long)placeholder1[(y + (k * 10))]))) + ((int64_t)sum))); - } - matrix_3[(y + (x * 10))] = sum; - } - } -} - diff --git a/samples/gemm/gemm_main.py b/samples/gemm/gemm_main.py index 4796bf2fb..fb05a094d 100644 --- a/samples/gemm/gemm_main.py +++ b/samples/gemm/gemm_main.py @@ -52,6 +52,5 @@ def time_gemm(dtype, m=1024, n=1024, k=1024, target=None): ############################################################################### # Test the algorithm with different data types dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] - -# for dtype in dtypes: -# time_gemm(hcl.Float(), 10, 10, 10, 'sdaccel') +for dtype in dtypes: + time_gemm(dtype) diff --git a/samples/gemm/gemm_runtime.py b/samples/gemm/gemm_runtime.py deleted file mode 100644 index 49947fa4c..000000000 --- a/samples/gemm/gemm_runtime.py +++ /dev/null @@ -1,86 +0,0 @@ -# Yang.Bai -# yb269@cornell.edu - -import heterocl as hcl -import numpy as np - -hcl.init() - -# matrix_size = (16, 16) -# def add_compute(A, B): -# C = hcl.compute(A.shape, lambda x, y: A[x, y] + B[x, y], "C") -# return C - -# def add_compute_2(A, B): -# C = hcl.compute(A.shape, lambda x: A[x] + B[x], "C") -# return C - -# A = hcl.placeholder(matrix_size, "A") -# B = hcl.placeholder(matrix_size, "B") - -# s = hcl.create_schedule([A, B], add_compute) -# # f2 = hcl.build(s, target='sdaccel') -# f2 = hcl.build(s, target='aocl') -# print (f2) - -# hcl_A = hcl.asarray(np.random.random_sample(matrix_size), dtype = hcl.Float()) -# hcl_B = hcl.asarray(np.random.random_sample(matrix_size), dtype = hcl.Float()) -# hcl_C = hcl.asarray(np.zeros(matrix_size), dtype = hcl.Float()) -# hcl_C2 = hcl.asarray(np.zeros(matrix_size), dtype = hcl.Float()) -# f3 = hcl.build(s) - -# A = hcl.placeholder((10, ), "A") -# B = hcl.placeholder((10, ), "B") -# s = hcl.create_schedule([A, B], add_compute_2) -# f4 = hcl.build(s, target='sdaccel') -# print (f4) -# print (hcl_A, hcl_B, hcl_C) - -matrix_1_size = (10, 10) -matrix_2_size = (10, 10) -matrix_3_size = (matrix_1_size[0], matrix_2_size[1]) - -def gemm_compute(matrix_1, matrix_2): - m = matrix_1.shape[0]; - k = matrix_1.shape[1]; - n = matrix_2.shape[1]; - r = hcl.reduce_axis(0, k, 'k') - temp = hcl.compute((m, n), - lambda x, y: hcl.sum(matrix_1[x, r] * matrix_2[r, y], - axis = r), name='matrix_3') - return temp - -matrix_1 = hcl.placeholder(matrix_1_size) -matrix_2 = hcl.placeholder(matrix_2_size) - -s = hcl.create_schedule([matrix_1, matrix_2], gemm_compute) -f = hcl.build(s, target='sdaccel_csim') -code = hcl.build(s, target='aocl') -with open('gemm_aocl.cl', 'w') as fin: - fin.write(code) - -code2 = hcl.build(s, target='sdaccel') -with open('gemm_sdaccel.cl', 'w') as fin2: - fin2.write(code2) - - -matrix_1_np = np.random.randint(10, size=matrix_1_size) -matrix_2_np = np.random.randint(10, size=matrix_2_size) -matrix_3_np = np.random.randint(10, size=matrix_3_size) - -hcl_matrix_1 = hcl.asarray(matrix_1_np) -hcl_matrix_2 = hcl.asarray(matrix_2_np) -hcl_matrix_3 = hcl.asarray(matrix_3_np) - -# f(hcl_matrix_1, hcl_matrix_2, hcl_matrix_3) - - - - - -# with open('sdaccel.cl', 'w') as f: -# f.write(code) - - - - diff --git a/samples/gemm/gemm_sdaccel.cl b/samples/gemm/gemm_sdaccel.cl deleted file mode 100644 index f46a88426..000000000 --- a/samples/gemm/gemm_sdaccel.cl +++ /dev/null @@ -1,13 +0,0 @@ -__kernel void default_function(__global int* placeholder0, __global int* placeholder1, __global int* matrix_3) { - for (int x = 0; x < 10; ++x) { - for (int y = 0; y < 10; ++y) { - __local int sum; - sum = 0; - for (int k = 0; k < 10; ++k) { - sum = ((int)(((long)(((long)placeholder0[(k + (x * 10))]) * ((long)placeholder1[(y + (k * 10))]))) + ((long)sum))); - } - matrix_3[(y + (x * 10))] = sum; - } - } -} - diff --git a/samples/gemm/gemm_sdaccel.py b/samples/gemm/gemm_sdaccel.py deleted file mode 100644 index 85c318120..000000000 --- a/samples/gemm/gemm_sdaccel.py +++ /dev/null @@ -1,8 +0,0 @@ -import heterocl as hcl -import numpy as np -from gemm_main import * - -#dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] -#for dtype in dtypes: -time_gemm(hcl.Int(32), 15, 15, 15, 'sdaccel_sw_emu') -# time_gemm(hcl.Float(), 100, 100, 100, 'sdaccel_sw_emu') diff --git a/samples/gemm/gemm_vhls.py b/samples/gemm/gemm_vhls.py index 8edd84bdd..e27fa155e 100644 --- a/samples/gemm/gemm_vhls.py +++ b/samples/gemm/gemm_vhls.py @@ -2,6 +2,6 @@ import numpy as np from gemm_main import * -#dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] -#for dtype in dtypes: -time_gemm(hcl.Int(32), 10, 10, 10, 'vhls_csim') +dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] +for dtype in dtypes: + time_gemm(dtype, 10, 10, 10, 'vhls_csim') diff --git a/samples/gemm/host.cpp b/samples/gemm/host.cpp deleted file mode 100644 index 914b2aa26..000000000 --- a/samples/gemm/host.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#define CL_HPP_CL_1_2_DEFAULT_BUILD -#define CL_HPP_TARGET_OPENCL_VERSION 120 -#define CL_HPP_MINIMUM_OPENCL_VERSION 120 -#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#pragma once - - - - -int main(void) { -#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) - #define STR_VALUE(arg) #arg - #define GET_STRING(name) STR_VALUE(name) - #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) -#endif - char* xclbinFilename = argv[1]; - - std::vector source_0(6 * 2); - std::vector source_1(2 * 7); - std::vector source_2(6 * 7); - - size_t vector_size_bytes_0 = sizeof(int) * 6 * 2; - size_t vector_size_bytes_1 = sizeof(int) * 2 * 7; - size_t vector_size_bytes_2 = sizeof(int) * 6 * 7; - - int* arg_0 = (int*)shmat(4849666, nullptr, 0); - for (size_t i0 = 0; i0 < 6; i0++) { - for (size_t i1 = 0; i1 < 2; i1++) { - source_0[i1 + i0*2] = arg_0[i1 + i0*2]; - } - } - int* arg_1 = (int*)shmat(7667712, nullptr, 0); - for (size_t i0 = 0; i0 < 2; i0++) { - for (size_t i1 = 0; i1 < 7; i1++) { - source_1[i1 + i0*7] = arg_1[i1 + i0*7]; - } - } - int* arg_2 = (int*)shmat(7667713, nullptr, 0); - for (size_t i0 = 0; i0 < 6; i0++) { - for (size_t i1 = 0; i1 < 7; i1++) { - source_2[i1 + i0*7] = arg_2[i1 + i0*7]; - } - } - std::vector platforms; - cl::Platform::get(&platforms); - cl::Platform platform = platforms[0]; - - std::vector devices; - platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices); - cl::Device device = devices[0]; - - cl::Context context(device); - cl::CommandQueue q(context, device); - - std::ifstream bin_file(xclbinFilename, std::ifstream::binary); - bin_file.seekg (0, bin_file.end); - unsigned nb = bin_file.tellg(); - bin_file.seekg (0, bin_file.beg); - char *buf = new char [nb]; - bin_file.read(buf, nb); - - cl::Program::Binaries bins; - bins.push_back({buf,nb}); - devices.resize(1); - cl::Program program(context, devices, bins); - - int err1; - cl::Kernel kernel(program, "default_function", &err1); - auto default_function = cl::KernelFunctor(kernel); - - cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); - cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); - cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2); - - q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); - q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); - q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - - default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2); - q.finish(); - - q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); - q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); - q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - - for (size_t i0 = 0; i0 < 6; i0++) { - for (size_t i1 = 0; i1 < 2; i1++) { - arg_0[i1 + i0*2] = source_0[i1 + i0*2]; - } - } - shmdt(arg_0); - for (size_t i0 = 0; i0 < 2; i0++) { - for (size_t i1 = 0; i1 < 7; i1++) { - arg_1[i1 + i0*7] = source_1[i1 + i0*7]; - } - } - shmdt(arg_1); - for (size_t i0 = 0; i0 < 6; i0++) { - for (size_t i1 = 0; i1 < 7; i1++) { - arg_2[i1 + i0*7] = source_2[i1 + i0*7]; - } - } - shmdt(arg_2); -} diff --git a/samples/gemm/sdaccel.mk b/samples/gemm/sdaccel.mk deleted file mode 100644 index 9cf0dafd7..000000000 --- a/samples/gemm/sdaccel.mk +++ /dev/null @@ -1,33 +0,0 @@ -ifndef XILINX_SDX -$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) -endif -SDA_FLOW = cpu_emu -HOST_SRCS = host.cpp -HOST_EXE_DIR=. -HOST_EXE = host -HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL -HOST_LFLAGS = -KERNEL_SRCS = default_function.cl -KERNEL_NAME = default_function -KERNEL_DEFS = -KERNEL_INCS = -XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 -XDEVICE_REPO_PATH= -KEEP_TEMP=1 -KERNEL_DEBUG= -XCLBIN_NAME=bin_krnl -HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" -BOARD_SETUP_FILE=setup.sh -ifeq (${SDA_FLOW},cpu_emu) - CLCC_OPT += -t sw_emu - XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin -else ifeq (${SDA_FLOW},hw_emu) - CLCC_OPT += -t hw_emu - XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin -else ifeq (${SDA_FLOW},hw) - XCLBIN = ${XCLBIN_NAME}_hw.xclbin -CLCC_OPT += -t hw -endif -HOST_ARGS = ${XCLBIN} -COMMON_DIR = ./common -include ${COMMON_DIR}/common.mk diff --git a/samples/kmeans/kmeans_aocl.cl b/samples/kmeans/kmeans_aocl.cl deleted file mode 100644 index e64b116f4..000000000 --- a/samples/kmeans/kmeans_aocl.cl +++ /dev/null @@ -1,49 +0,0 @@ -#include "ihc_apint.h" -__kernel void default_function(__global int* restrict placeholder2, __global int* restrict placeholder3, __global int* restrict compute3) { - for (int x = 0; x < 32; ++x) { - compute3[x] = 0; - } - int main_loop; - for (int _1 = 0; _1 < 10; ++_1) { - #pragma ii 1 - for (int N = 0; N < 32; ++N) { - int local2; - local2 = 100000; - for (int i = 0; i < 6; ++i) { - int local3; - local3 = 0; - for (int i1 = 0; i1 < 3; ++i1) { - local3 = ((int)(((int64_t)local3) + ((int64_t)(((int64_t)((int33_t)(placeholder2[(i1 + (N * 3))] - placeholder3[(i1 + (i * 3))]))) * ((int64_t)((int33_t)(placeholder2[(i1 + (N * 3))] - placeholder3[(i1 + (i * 3))]))))))); - } - if (local3 < local2) { - local2 = local3; - compute3[N] = i; - } - } - } - int compute4[6]; - for (int x1 = 0; x1 < 6; ++x1) { - compute4[x1] = 0; - } - int compute5[18]; - for (int x2 = 0; x2 < 6; ++x2) { - for (int y = 0; y < 3; ++y) { - compute5[(y + (x2 * 3))] = 0; - } - } - int calc_sum; - #pragma unroll - for (int n = 0; n < 32; ++n) { - compute4[compute3[n]] = (compute4[compute3[n]] + 1); - for (int i2 = 0; i2 < 3; ++i2) { - compute5[(i2 + (compute3[n] * 3))] = ((int)(((int33_t)compute5[(i2 + (compute3[n] * 3))]) + ((int33_t)placeholder2[(i2 + (n * 3))]))); - } - } - int update_mean; - #pragma unroll - for (int k_d_fused = 0; k_d_fused < 18; ++k_d_fused) { - placeholder3[k_d_fused] = (compute5[k_d_fused] / compute4[(k_d_fused / 3)]); - } - } -} - diff --git a/samples/kmeans/kmeans_sdaccel.py b/samples/kmeans/kmeans_sdaccel.py deleted file mode 100644 index c204c592e..000000000 --- a/samples/kmeans/kmeans_sdaccel.py +++ /dev/null @@ -1,27 +0,0 @@ -import numpy as np -import random -import heterocl as hcl -from kmeans_main import top - -K = 16 -N = 320 -dim = 32 - -f1 = top('sdaccel_sw_emu') -#f2 = top() -points_np = np.random.randint(100, size=(N, dim)) -labels_np = np.zeros(N) -means_np = points_np[random.sample(range(N), K),:] - -hcl_points1 = hcl.asarray(points_np) -hcl_means1 = hcl.asarray(means_np) -hcl_labels1 = hcl.asarray(labels_np) - -hcl_points2 = hcl.asarray(points_np) -hcl_means2 = hcl.asarray(means_np) -hcl_labels2 = hcl.asarray(labels_np) - -f1(hcl_points1, hcl_means1, hcl_labels1) -#f2(hcl_points2, hcl_means2, hcl_labels2) - -#assert np.array_equal(hcl_labels1.asnumpy(), hcl_labels2.asnumpy()) diff --git a/samples/kmeans/merlinc_code.cl b/samples/kmeans/merlinc_code.cl deleted file mode 100644 index ea672313d..000000000 --- a/samples/kmeans/merlinc_code.cl +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include -#pragma ACCEL kernel -void default_function(int* placeholder2, int* placeholder3, int* compute3) { - for (int x = 0; x < 320; ++x) { - compute3[x] = 0; - } - int main_loop; - for (int _1 = 0; _1 < 200; ++_1) { -#pragma ACCEL pipeline - for (int N = 0; N < 320; ++N) { - int local2; - local2 = 100000; - for (int i = 0; i < 16; ++i) { - int local3; - local3 = 0; - for (int i1 = 0; i1 < 32; ++i1) { - local3 = ((int)(((long)local3) + ((long)(((long)((long)(placeholder2[(i1 + (N * 32))] - placeholder3[(i1 + (i * 32))]))) * ((long)((long)(placeholder2[(i1 + (N * 32))] - placeholder3[(i1 + (i * 32))]))))))); - } - if (local3 < local2) { - local2 = local3; - compute3[N] = i; - } - } - } - int compute4[16]; - for (int x1 = 0; x1 < 16; ++x1) { - compute4[x1] = 0; - } - int compute5[512]; - for (int x2 = 0; x2 < 16; ++x2) { - for (int y = 0; y < 32; ++y) { - compute5[(y + (x2 * 32))] = 0; - } - } - int calc_sum; -#pragma ACCEL parallel flatten - for (int n = 0; n < 320; ++n) { - compute4[compute3[n]] = (compute4[compute3[n]] + 1); - for (int i2 = 0; i2 < 32; ++i2) { - compute5[(i2 + (compute3[n] * 32))] = ((int)(((long)compute5[(i2 + (compute3[n] * 32))]) + ((long)placeholder2[(i2 + (n * 32))]))); - } - } - int update_mean; -#pragma ACCEL parallel flatten - for (int k_d_fused = 0; k_d_fused < 512; ++k_d_fused) { - placeholder3[k_d_fused] = (compute5[k_d_fused] / compute4[(k_d_fused / 32)]); - } - } -} - diff --git a/samples/kmeans/sdaccel_code.cl b/samples/kmeans/sdaccel_code.cl deleted file mode 100644 index 196f96257..000000000 --- a/samples/kmeans/sdaccel_code.cl +++ /dev/null @@ -1,48 +0,0 @@ -__kernel void default_function(__global int* placeholder4, __global int* placeholder5, __global int* compute6) { - for (int x = 0; x < 320; ++x) { - compute6[x] = 0; - } - __local int main_loop; - for (int _1 = 0; _1 < 200; ++_1) { - __attribute__((xcl_pipeline_loop(1))) - for (int N = 0; N < 320; ++N) { - __local int local4; - local4 = 100000; - for (int i = 0; i < 16; ++i) { - __local int local5; - local5 = 0; - for (int i1 = 0; i1 < 32; ++i1) { - local5 = ((int)(((long)local5) + ((long)(((long)((long)(placeholder4[(i1 + (N * 32))] - placeholder5[(i1 + (i * 32))]))) * ((long)((long)(placeholder4[(i1 + (N * 32))] - placeholder5[(i1 + (i * 32))]))))))); - } - if (local5 < local4) { - local4 = local5; - compute6[N] = i; - } - } - } - __local int compute7[16]; - for (int x1 = 0; x1 < 16; ++x1) { - compute7[x1] = 0; - } - __local int compute8[512]; - for (int x2 = 0; x2 < 16; ++x2) { - for (int y = 0; y < 32; ++y) { - compute8[(y + (x2 * 32))] = 0; - } - } - __local int calc_sum; - - for (int n = 0; n < 320; ++n) { - compute7[compute6[n]] = (compute7[compute6[n]] + 1); - for (int i2 = 0; i2 < 32; ++i2) { - compute8[(i2 + (compute6[n] * 32))] = ((int)(((long)compute8[(i2 + (compute6[n] * 32))]) + ((long)placeholder4[(i2 + (n * 32))]))); - } - } - __local int update_mean; - - for (int k_d_fused = 0; k_d_fused < 512; ++k_d_fused) { - placeholder5[k_d_fused] = (compute8[k_d_fused] / compute7[(k_d_fused / 32)]); - } - } -} - diff --git a/samples/kmeans/submit.sh b/samples/kmeans/submit.sh deleted file mode 100644 index a4345a542..000000000 --- a/samples/kmeans/submit.sh +++ /dev/null @@ -1,3 +0,0 @@ -unset DISPLAY -aoc -board=a10gx -time time.out -time-passes -regtest_mode -v -fpc -fp-relaxed --opt-arg -nocaching -regtest_mode -report -I $INTELFPGAOCLSDKROOT/include/kernel_headers kmeans_aocl.cl - diff --git a/samples/kmeans/vhls_code.cl b/samples/kmeans/vhls_code.cl deleted file mode 100644 index b651dd8bf..000000000 --- a/samples/kmeans/vhls_code.cl +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include - -void default_function(ap_int<32> placeholder6[320][32], ap_int<32> placeholder7[16][32], ap_int<32> compute9[320]) { - for (ap_int<32> x = 0; x < 320; ++x) { - compute9[x] = 0; - } - ap_int<32> main_loop; - for (ap_int<32> _ = 0; _ < 200; ++_) { - for (ap_int<32> N = 0; N < 320; ++N) { - #pragma HLS pipeline - ap_int<32> local6; - local6 = 100000; - for (ap_int<32> i = 0; i < 16; ++i) { - ap_int<32> local7; - local7 = 0; - for (ap_int<32> i1 = 0; i1 < 32; ++i1) { - local7 = ((ap_int<32>)(((ap_int<67>)local7) + ((ap_int<67>)(((ap_int<66>)((ap_int<33>)(placeholder6[N][i1] - placeholder7[i][i1]))) * ((ap_int<66>)((ap_int<33>)(placeholder6[N][i1] - placeholder7[i][i1]))))))); - } - if (local7 < local6) { - local6 = local7; - compute9[N] = i; - } - } - } - ap_int<32> compute10[16]; - for (ap_int<32> x1 = 0; x1 < 16; ++x1) { - compute10[x1] = 0; - } - ap_int<32> compute11[16][32]; - for (ap_int<32> x2 = 0; x2 < 16; ++x2) { - for (ap_int<32> y = 0; y < 32; ++y) { - compute11[x2][y] = 0; - } - } - ap_int<32> calc_sum; - for (ap_int<32> n = 0; n < 320; ++n) { - #pragma HLS unroll - compute10[compute9[n]] = (compute10[compute9[n]] + 1); - for (ap_int<32> i2 = 0; i2 < 32; ++i2) { - compute11[compute9[n]][i2] = ((ap_int<32>)(((ap_int<33>)compute11[compute9[n]][i2]) + ((ap_int<33>)placeholder6[n][i2]))); - } - } - ap_int<32> update_mean; - for (ap_int<32> k_d_fused = 0; k_d_fused < 512; ++k_d_fused) { - #pragma HLS unroll - placeholder7[(k_d_fused / 32)][(k_d_fused % 32)] = (compute11[(k_d_fused / 32)][(k_d_fused % 32)] / compute10[(k_d_fused / 32)]); - } - } -} - diff --git a/samples/lenet/common/common.mk b/samples/lenet/common/common.mk deleted file mode 100644 index 3409e4aa5..000000000 --- a/samples/lenet/common/common.mk +++ /dev/null @@ -1,55 +0,0 @@ -SHELL = /bin/bash -VPATH = ./ -CC = xcpp -CLCC = xocc -ifeq ($(XDEVICE_REPO_PATH),) - DEVICE_REPO_OPT = -else -DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} -endif -HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 -HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread -CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} -ifeq (${KEEP_TEMP},1) - CLCC_OPT += -s -endif -ifeq (${KERNEL_DEBUG},1) - CLCC_OPT += -g -endif -CLCC_OPT += --kernel ${KERNEL_NAME} -OBJECTS := $(HOST_SRCS:.cpp=.o) -.PHONY: all -all: run -host: ${HOST_EXE_DIR}/${HOST_EXE} -xbin_cpu_em: - make SDA_FLOW=cpu_emu xbin -f sdaccel.mk -xbin_hw_em: - make SDA_FLOW=hw_emu xbin -f sdaccel.mk -xbin_hw : - make SDA_FLOW=hw xbin -f sdaccel.mk -xbin: ${XCLBIN} -run_cpu_em: - make SDA_FLOW=cpu_emu run_em -f sdaccel.mk -run_hw_em: - make SDA_FLOW=hw_emu run_em -f sdaccel.mk -run_hw : - make SDA_FLOW=hw run_hw_int -f sdaccel.mk -run_em: xconfig host xbin - XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} -run_hw_int : host xbin_hw - source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} -estimate : - ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} -xconfig : emconfig.json -emconfig.json : - emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . -${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} - ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ -${XCLBIN}: - ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} -%.o: %.cpp - ${CC} ${HOST_CFLAGS} -c $< -o $@ -clean: - ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil -cleanall: clean - ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou diff --git a/samples/lenet/lenet_aocl.cl b/samples/lenet/lenet_aocl.cl deleted file mode 100644 index 9b2a200f8..000000000 --- a/samples/lenet/lenet_aocl.cl +++ /dev/null @@ -1,138 +0,0 @@ -#include "ihc_apint.h" -__kernel void default_function(__global float* restrict input_image, __global float* restrict weight_conv1, __global float* restrict weight_conv2, __global float* restrict weight_fc1, __global float* restrict weight_fc2, __global float* restrict lenet) { - float conv2d; - for (int nn = 0; nn < 1; ++nn) { - for (int yy = 0; yy < -1; ++yy) { - for (int xx = 0; xx < -1; ++xx) { - float reducer0; - reducer0 = 0.000000e+00f; - for (int ra1 = 0; ra1 < 5; ++ra1) { - for (int ra2 = 0; ra2 < 5; ++ra2) { - reducer0 = ((input_image[(((xx + ra2) + ((yy + ra1) * 3)) + (nn * 9))] * weight_conv1[(ra2 + (ra1 * 5))]) + reducer0); - } - } - conv2d = reducer0; - } - } - } - float tanh1; - for (int args = 0; args < 1; ++args) { - for (int args1 = 0; args1 < -1; ++args1) { - for (int args2 = 0; args2 < -1; ++args2) { - tanh1 = ((float)tanh(((float)conv2d))); - } - } - } - float max_pool; - for (int i = 0; i < 1; ++i) { - for (int h = 0; h < -1; ++h) { - for (int w = 0; w < -1; ++w) { - float reducer1; - reducer1 = -1.000000e+00f; - for (int ra3 = 0; ra3 < 2; ++ra3) { - for (int ra4 = 0; ra4 < 2; ++ra4) { - reducer1 = max(tanh1, reducer1); - } - } - max_pool = reducer1; - } - } - } - float conv2d1[250]; - for (int nn1 = 0; nn1 < 1; ++nn1) { - for (int ff = 0; ff < 10; ++ff) { - for (int yy1 = 0; yy1 < -5; ++yy1) { - for (int xx1 = 0; xx1 < -5; ++xx1) { - float reducer2; - reducer2 = 0.000000e+00f; - for (int ra6 = 0; ra6 < 5; ++ra6) { - for (int ra7 = 0; ra7 < 5; ++ra7) { - reducer2 = ((max_pool * weight_conv2[((ra7 + (ra6 * 5)) + (ff * 25))]) + reducer2); - } - } - conv2d1[(((xx1 - (yy1 * 5)) + (ff * 25)) + (nn1 * 250))] = reducer2; - } - } - } - } - float tanh2[250]; - for (int args3 = 0; args3 < 1; ++args3) { - for (int args0 = 0; args0 < 10; ++args0) { - for (int args11 = 0; args11 < -5; ++args11) { - for (int args21 = 0; args21 < -5; ++args21) { - tanh2[(((args21 - (args11 * 5)) + (args0 * 25)) + (args3 * 250))] = ((float)tanh(((float)conv2d1[(((args21 - (args11 * 5)) + (args0 * 25)) + (args3 * 250))]))); - } - } - } - } - float max_pool1[90]; - for (int i1 = 0; i1 < 1; ++i1) { - for (int c = 0; c < 10; ++c) { - for (int h1 = 0; h1 < -3; ++h1) { - for (int w1 = 0; w1 < -3; ++w1) { - float reducer3; - reducer3 = -1.000000e+00f; - for (int ra8 = 0; ra8 < 2; ++ra8) { - for (int ra9 = 0; ra9 < 2; ++ra9) { - reducer3 = max(tanh2[(((((w1 * 2) - (((h1 * 2) + ra8) * 5)) + ra9) + (c * 25)) + (i1 * 250))], reducer3); - } - } - max_pool1[(((w1 - (h1 * 3)) + (c * 9)) + (i1 * 90))] = reducer3; - } - } - } - } - float compute0[90]; - for (int i2 = 0; i2 < 1; ++i2) { - for (int j = 0; j < 90; ++j) { - compute0[(j + (i2 * 90))] = max_pool1[((((j % -3) - (((j / -3) % -3) * 3)) + ((((j / -3) / -3) % 10) * 9)) + (i2 * 90))]; - } - } - float dense[25]; - for (int i3 = 0; i3 < 1; ++i3) { - for (int j1 = 0; j1 < 25; ++j1) { - float reducer4; - reducer4 = 0.000000e+00f; - for (int ra10 = 0; ra10 < 90; ++ra10) { - reducer4 = ((compute0[(ra10 + (i3 * 90))] * weight_fc1[(ra10 + (j1 * 40))]) + reducer4); - } - dense[(j1 + (i3 * 25))] = reducer4; - } - } - float tanh3[25]; - for (int args4 = 0; args4 < 1; ++args4) { - for (int args01 = 0; args01 < 25; ++args01) { - tanh3[(args01 + (args4 * 25))] = ((float)tanh(((float)dense[(args01 + (args4 * 25))]))); - } - } - float dense1[10]; - for (int i4 = 0; i4 < 1; ++i4) { - for (int j2 = 0; j2 < 10; ++j2) { - float reducer5; - reducer5 = 0.000000e+00f; - for (int ra11 = 0; ra11 < 25; ++ra11) { - reducer5 = ((tanh3[(ra11 + (i4 * 25))] * weight_fc2[(ra11 + (j2 * 25))]) + reducer5); - } - dense1[(j2 + (i4 * 10))] = reducer5; - } - } - float compute1; - int max1; - max1 = 0; - for (int ra12 = 0; ra12 < 10; ++ra12) { - max1 = ((int)max(dense1[ra12], ((float)max1))); - } - compute1 = ((float)max1); - float compute2; - int sum; - sum = 0; - for (int ra13 = 0; ra13 < 10; ++ra13) { - sum = ((int)(exp(((float)(dense1[ra13] - compute1))) + ((float)sum))); - } - compute2 = ((float)sum); - float update0; - for (int j3 = 0; j3 < 10; ++j3) { - lenet[j3] = ((float)(exp(((float)(dense1[j3] - compute1))) / ((float)compute2))); - } -} - diff --git a/samples/lenet/lenet_main_withoutq.py b/samples/lenet/lenet_main_withoutq.py deleted file mode 100644 index b16bdd6c3..000000000 --- a/samples/lenet/lenet_main_withoutq.py +++ /dev/null @@ -1,125 +0,0 @@ -import heterocl as hcl -import hlib -import numpy as np - -hcl.init() - -def softmax(out, x): - assert len(x.shape) == 2, "only support 2-dim softmax" - m, n = x.shape - k = hcl.reduce_axis(0, n) - max_elem = hcl.compute((m,), lambda i: hcl.max(x[i, k], axis=k)) - k = hcl.reduce_axis(0, n) - expsum = hcl.compute((m,), - lambda i: hcl.sum(hcl.exp(x[i, k] - max_elem[i]), axis=k)) - return hcl.update(out, - lambda i, j: hcl.exp(x[i, j] - max_elem[i]) / expsum[i]) - -def build_lenet(input_image, weight_conv1, weight_conv2, - weight_fc1, weight_fc2, lenet): - # first conv - conv1 = hlib.nn.conv2d_nchw(input_image, weight_conv1) - tanh1 = hlib.nn.tanh(conv1, "tanh1") - pool1 = hlib.nn.max_pool(tanh1, kernel=(2,2), stride=(2,2)) - # second conv - conv2 = hlib.nn.conv2d_nchw(pool1, weight_conv2) - tanh2 = hlib.nn.tanh(conv2, "tanh2") - pool2 = hlib.nn.max_pool(tanh2, kernel=(2,2), stride=(2,2)) - # first fc - flat = hlib.nn.flatten(pool2) - fc1 = hlib.nn.dense(flat, weight_fc1) - tanh3 = hlib.nn.tanh(fc1, "tanh3") - # second fc - fc2 = hlib.nn.dense(tanh3, weight_fc2) - # loss - return softmax(lenet, fc2) - - -import mxnet as mx -# download pretrained lenet model -mx.gluon.utils.download('https://gist.githubusercontent.com/Huyuwei/dc00ce83f537914c64a204133d23b019/raw/79af41e7c8ba9120ea7f35fb1d0484b65bccd54f/lenet-0010.params') -mx.gluon.utils.download('https://gist.githubusercontent.com/Huyuwei/dc00ce83f537914c64a204133d23b019/raw/79af41e7c8ba9120ea7f35fb1d0484b65bccd54f/lenet-symbol.json') -sym, arg_params, aux_params = mx.model.load_checkpoint('lenet', 10) -# get weights -weight_conv1_np = arg_params['convolution0_weight'].asnumpy() -weight_conv2_np = arg_params['convolution1_weight'].asnumpy() -weight_fc1_np = arg_params['fullyconnected0_weight'].asnumpy() -weight_fc2_np = arg_params['fullyconnected1_weight'].asnumpy() - - -# qtype1 = hcl.Fixed(16, 14) -# qtype2 = hcl.Fixed(16, 14) - -# qtype1 = hcl.Fixed(16, 12) -# qtype2 = hcl.Fixed(16, 12) - - - -correct_sum = 0 -batch_size = 1000 -mnist = mx.test_utils.get_mnist() - - -def build_lenet_inf(batch_size=batch_size, target=None): - # set up input/output placeholders - input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") - # weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) - # weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1) - # weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1) - # weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1) - weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1") - weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2") - weight_fc1 = hcl.placeholder((500, 800), "weight_fc1") - weight_fc2 = hcl.placeholder((10, 500), "weight_fc2") - lenet = hcl.placeholder((batch_size, 10), "lenet") - # create a quantization scheme - # scheme = hcl.create_scheme( - # [input_image, weight_conv1, weight_conv2, - # weight_fc1, weight_fc2, lenet], build_lenet) - # # quantize the three activation layers - # scheme.quantize( - # [build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3], qtype2) - # s = hcl.create_schedule_from_scheme(scheme) - s = hcl.create_schedule([input_image, weight_conv1, weight_conv2, weight_fc1, weight_fc2, lenet], build_lenet) - return hcl.build(s, target=target) - -code1 = build_lenet_inf(batch_size, 'merlinc') -# print (code1) -with open('merlinc_code.cl', 'w') as f: - f.write(code1) - -code2 = build_lenet_inf(batch_size, 'sdaccel') - -with open('sdaccel_code.cl', 'w') as f: - f.write(code2) - -code3 = build_lenet_inf(batch_size, 'vhls') -with open('vhls_code.cl', 'w') as f: - f.write(code3) - -f = build_lenet_inf(batch_size, 'sdaccel_sw_emu') - -# weight_conv1_hcl = hcl.asarray(weight_conv1_np, dtype=qtype1) -# weight_conv2_hcl = hcl.asarray(weight_conv2_np, dtype=qtype1) -# weight_fc1_hcl = hcl.asarray(weight_fc1_np, dtype=qtype1) -# weight_fc2_hcl = hcl.asarray(weight_fc2_np, dtype=qtype1) - -weight_conv1_hcl = hcl.asarray(weight_conv1_np) -weight_conv2_hcl = hcl.asarray(weight_conv2_np) -weight_fc1_hcl = hcl.asarray(weight_fc1_np) -weight_fc2_hcl = hcl.asarray(weight_fc2_np) - - -for i in range(10000 // batch_size): - label = mnist['test_label'][i*batch_size:(i+1)*batch_size] - input_image_np = mnist['test_data'][i*batch_size:(i+1)*batch_size] - input_image_hcl = hcl.asarray(input_image_np) - output_hcl = hcl.asarray(np.zeros((batch_size,10))) - f(input_image_hcl, weight_conv1_hcl, weight_conv2_hcl, - weight_fc1_hcl, weight_fc2_hcl, output_hcl) - print (output_hcl.asnumpy()) - prediction = np.argmax(output_hcl.asnumpy(), axis=1) - correct_sum += np.sum(np.equal(prediction, label)) - -print("Testing accuracy: {}".format(correct_sum / 10000.)) - diff --git a/samples/lenet/lenet_sdaccel.py b/samples/lenet/lenet_sdaccel.py deleted file mode 100644 index 917b2b625..000000000 --- a/samples/lenet/lenet_sdaccel.py +++ /dev/null @@ -1,23 +0,0 @@ -import heterocl as hcl -import numpy as np -from lenet_main import * - -batch_size = 50 - -# f = build_lenet_inf(batch_size, 'vhls_csim') -f = build_lenet_inf(batch_size, 'sdaccel_sw_emu') - -mnist = mx.test_utils.get_mnist() -correct_sum = 0 - -for i in range(50 // batch_size): - label = mnist['test_label'][i*batch_size:(i+1)*batch_size] - input_image_np = mnist['test_data'][i*batch_size:(i+1)*batch_size] - input_image_hcl = hcl.asarray(input_image_np) - output_hcl = hcl.asarray(np.zeros((batch_size,10))) - f(input_image_hcl, weight_conv1_hcl, weight_conv2_hcl, weight_fc1_hcl, weight_fc2_hcl, output_hcl) - prediction = np.argmax(output_hcl.asnumpy(), axis=1) - correct_sum += np.sum(np.equal(prediction, label)) - -print(str(qtype1) + ", " + str(qtype2) + ": Accuracy over 10000 test images is: {}".format(correct_sum / 10000.)) -assert correct_sum == 9882 diff --git a/samples/lenet/merlinc_code.cl b/samples/lenet/merlinc_code.cl deleted file mode 100644 index 1c5118707..000000000 --- a/samples/lenet/merlinc_code.cl +++ /dev/null @@ -1,155 +0,0 @@ -#include -#include -#include -#pragma ACCEL kernel -void default_function(int* input_image, int* weight_conv1, int* weight_conv2, int* weight_fc1, int* weight_fc2, int* lenet) { - int conv2d[11520000]; - for (int nn = 0; nn < 1000; ++nn) { - for (int ff = 0; ff < 20; ++ff) { - for (int yy = 0; yy < 24; ++yy) { - for (int xx = 0; xx < 24; ++xx) { - float reducer0; - reducer0 = 0.000000e+00f; - for (int ra1 = 0; ra1 < 5; ++ra1) { - for (int ra2 = 0; ra2 < 5; ++ra2) { - reducer0 = (((float)(((long)input_image[(((xx + ra2) + ((yy + ra1) * 28)) + (nn * 784))]) * ((long)weight_conv1[((ra2 + (ra1 * 5)) + (ff * 25))]))) + reducer0); - } - } - conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = ((int)reducer0); - } - } - } - } - int tanh1[11520000]; - for (int args = 0; args < 1000; ++args) { - for (int args0 = 0; args0 < 20; ++args0) { - for (int args1 = 0; args1 < 24; ++args1) { - for (int args2 = 0; args2 < 24; ++args2) { - tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((int)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); - } - } - } - } - int max_pool[2880000]; - for (int i = 0; i < 1000; ++i) { - for (int c = 0; c < 20; ++c) { - for (int h = 0; h < 12; ++h) { - for (int w = 0; w < 12; ++w) { - float reducer1; - reducer1 = -1.000000e+00f; - for (int ra3 = 0; ra3 < 2; ++ra3) { - for (int ra4 = 0; ra4 < 2; ++ra4) { - reducer1 = max(((float)tanh1[(((((w * 2) + ra4) + (((h * 2) + ra3) * 24)) + (c * 576)) + (i * 11520))]), reducer1); - } - } - max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = ((int)reducer1); - } - } - } - } - int conv2d1[3200000]; - for (int nn1 = 0; nn1 < 1000; ++nn1) { - for (int ff1 = 0; ff1 < 50; ++ff1) { - for (int yy1 = 0; yy1 < 8; ++yy1) { - for (int xx1 = 0; xx1 < 8; ++xx1) { - float reducer2; - reducer2 = 0.000000e+00f; - for (int ra5 = 0; ra5 < 20; ++ra5) { - for (int ra6 = 0; ra6 < 5; ++ra6) { - for (int ra7 = 0; ra7 < 5; ++ra7) { - reducer2 = (((float)(((long)max_pool[((((xx1 + ra7) + ((yy1 + ra6) * 12)) + (ra5 * 144)) + (nn1 * 2880))]) * ((long)weight_conv2[(((ra7 + (ra6 * 5)) + (ra5 * 25)) + (ff1 * 500))]))) + reducer2); - } - } - } - conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = ((int)reducer2); - } - } - } - } - int tanh2[3200000]; - for (int args3 = 0; args3 < 1000; ++args3) { - for (int args01 = 0; args01 < 50; ++args01) { - for (int args11 = 0; args11 < 8; ++args11) { - for (int args21 = 0; args21 < 8; ++args21) { - tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((int)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); - } - } - } - } - int max_pool1[800000]; - for (int i1 = 0; i1 < 1000; ++i1) { - for (int c1 = 0; c1 < 50; ++c1) { - for (int h1 = 0; h1 < 4; ++h1) { - for (int w1 = 0; w1 < 4; ++w1) { - float reducer3; - reducer3 = -1.000000e+00f; - for (int ra8 = 0; ra8 < 2; ++ra8) { - for (int ra9 = 0; ra9 < 2; ++ra9) { - reducer3 = max(((float)tanh2[(((((w1 * 2) + ra9) + (((h1 * 2) + ra8) * 8)) + (c1 * 64)) + (i1 * 3200))]), reducer3); - } - } - max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = ((int)reducer3); - } - } - } - } - int compute0[800000]; - for (int i2 = 0; i2 < 1000; ++i2) { - for (int j = 0; j < 800; ++j) { - compute0[(j + (i2 * 800))] = max_pool1[((((((j / 4) % 4) * 4) + (j % 4)) + ((j / 16) * 16)) + (i2 * 800))]; - } - } - int dense[500000]; - for (int i3 = 0; i3 < 1000; ++i3) { - for (int j1 = 0; j1 < 500; ++j1) { - float reducer4; - reducer4 = 0.000000e+00f; - for (int ra10 = 0; ra10 < 800; ++ra10) { - reducer4 = (((float)(((long)compute0[(ra10 + (i3 * 800))]) * ((long)weight_fc1[(ra10 + (j1 * 800))]))) + reducer4); - } - dense[(j1 + (i3 * 500))] = ((int)reducer4); - } - } - int tanh3[500000]; - for (int args4 = 0; args4 < 1000; ++args4) { - for (int args02 = 0; args02 < 500; ++args02) { - tanh3[(args02 + (args4 * 500))] = ((int)tanh(((double)dense[(args02 + (args4 * 500))]))); - } - } - int dense1[10000]; - for (int i4 = 0; i4 < 1000; ++i4) { - for (int j2 = 0; j2 < 10; ++j2) { - float reducer5; - reducer5 = 0.000000e+00f; - for (int ra11 = 0; ra11 < 500; ++ra11) { - reducer5 = (((float)(((long)tanh3[(ra11 + (i4 * 500))]) * ((long)weight_fc2[(ra11 + (j2 * 500))]))) + reducer5); - } - dense1[(j2 + (i4 * 10))] = ((int)reducer5); - } - } - int compute1[1000]; - for (int i5 = 0; i5 < 1000; ++i5) { - int max; - max = 0; - for (int ra12 = 0; ra12 < 10; ++ra12) { - max = max(dense1[(ra12 + (i5 * 10))], max); - } - compute1[i5] = max; - } - int compute2[1000]; - for (int i6 = 0; i6 < 1000; ++i6) { - int sum; - sum = 0; - for (int ra13 = 0; ra13 < 10; ++ra13) { - sum = ((int)(exp(((double)((long)(dense1[(ra13 + (i6 * 10))] - compute1[i6])))) + ((double)sum))); - } - compute2[i6] = sum; - } - int update0; - for (int i7 = 0; i7 < 1000; ++i7) { - for (int j3 = 0; j3 < 10; ++j3) { - lenet[(j3 + (i7 * 10))] = ((int)(exp(((double)((long)(dense1[(j3 + (i7 * 10))] - compute1[i7])))) / ((double)compute2[i7]))); - } - } -} - diff --git a/samples/lenet/sdaccel.mk b/samples/lenet/sdaccel.mk deleted file mode 100644 index ce266d89e..000000000 --- a/samples/lenet/sdaccel.mk +++ /dev/null @@ -1,32 +0,0 @@ -ifndef XILINX_SDX -$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) -endif -SDA_FLOW = cpu_emu -HOST_SRCS = host.cpp -HOST_EXE_DIR=. -HOST_EXE = host -HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL -HOST_LFLAGS = -KERNEL_SRCS = default_function.cl -KERNEL_NAME = default_function -KERNEL_DEFS = -KERNEL_INCS = -XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 -XDEVICE_REPO_PATH= -KEEP_TEMP=1 -KERNEL_DEBUG= -XCLBIN_NAME=bin_krnl -HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" -BOARD_SETUP_FILE=setup.sh -ifeq (${SDA_FLOW},cpu_emu) - CLCC_OPT += -t sw_emu - XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin -else ifeq (${SDA_FLOW},hw_emu) - CLCC_OPT += -t hw_emu - XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin -else ifeq (${SDA_FLOW},hw) - XCLBIN = ${XCLBIN_NAME}_hw.xclbin -CLCC_OPT += -t hw -endifHOST_ARGS = ${XCLBIN} -COMMON_DIR = ./common -include ${COMMON_DIR}/common.mk diff --git a/samples/lenet/sdaccel_code.cl b/samples/lenet/sdaccel_code.cl deleted file mode 100644 index 114880df0..000000000 --- a/samples/lenet/sdaccel_code.cl +++ /dev/null @@ -1,151 +0,0 @@ -__kernel void default_function(__global int* input_image, __global int* weight_conv1, __global int* weight_conv2, __global int* weight_fc1, __global int* weight_fc2, __global int* lenet) { - __local int conv2d[11520000]; - for (int nn = 0; nn < 1000; ++nn) { - for (int ff = 0; ff < 20; ++ff) { - for (int yy = 0; yy < 24; ++yy) { - for (int xx = 0; xx < 24; ++xx) { - __local float reducer6; - reducer6 = 0.000000e+00f; - for (int ra15 = 0; ra15 < 5; ++ra15) { - for (int ra16 = 0; ra16 < 5; ++ra16) { - reducer6 = (((float)(((long)input_image[(((xx + ra16) + ((yy + ra15) * 28)) + (nn * 784))]) * ((long)weight_conv1[((ra16 + (ra15 * 5)) + (ff * 25))]))) + reducer6); - } - } - conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = ((int)reducer6); - } - } - } - } - __local int tanh1[11520000]; - for (int args = 0; args < 1000; ++args) { - for (int args0 = 0; args0 < 20; ++args0) { - for (int args1 = 0; args1 < 24; ++args1) { - for (int args2 = 0; args2 < 24; ++args2) { - tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((int)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); - } - } - } - } - __local int max_pool[2880000]; - for (int i = 0; i < 1000; ++i) { - for (int c = 0; c < 20; ++c) { - for (int h = 0; h < 12; ++h) { - for (int w = 0; w < 12; ++w) { - __local float reducer7; - reducer7 = -1.000000e+00f; - for (int ra17 = 0; ra17 < 2; ++ra17) { - for (int ra18 = 0; ra18 < 2; ++ra18) { - reducer7 = max(((float)tanh1[(((((w * 2) + ra18) + (((h * 2) + ra17) * 24)) + (c * 576)) + (i * 11520))]), reducer7); - } - } - max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = ((int)reducer7); - } - } - } - } - __local int conv2d1[3200000]; - for (int nn1 = 0; nn1 < 1000; ++nn1) { - for (int ff1 = 0; ff1 < 50; ++ff1) { - for (int yy1 = 0; yy1 < 8; ++yy1) { - for (int xx1 = 0; xx1 < 8; ++xx1) { - __local float reducer8; - reducer8 = 0.000000e+00f; - for (int ra19 = 0; ra19 < 20; ++ra19) { - for (int ra20 = 0; ra20 < 5; ++ra20) { - for (int ra21 = 0; ra21 < 5; ++ra21) { - reducer8 = (((float)(((long)max_pool[((((xx1 + ra21) + ((yy1 + ra20) * 12)) + (ra19 * 144)) + (nn1 * 2880))]) * ((long)weight_conv2[(((ra21 + (ra20 * 5)) + (ra19 * 25)) + (ff1 * 500))]))) + reducer8); - } - } - } - conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = ((int)reducer8); - } - } - } - } - __local int tanh2[3200000]; - for (int args3 = 0; args3 < 1000; ++args3) { - for (int args01 = 0; args01 < 50; ++args01) { - for (int args11 = 0; args11 < 8; ++args11) { - for (int args21 = 0; args21 < 8; ++args21) { - tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((int)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); - } - } - } - } - __local int max_pool1[800000]; - for (int i1 = 0; i1 < 1000; ++i1) { - for (int c1 = 0; c1 < 50; ++c1) { - for (int h1 = 0; h1 < 4; ++h1) { - for (int w1 = 0; w1 < 4; ++w1) { - __local float reducer9; - reducer9 = -1.000000e+00f; - for (int ra22 = 0; ra22 < 2; ++ra22) { - for (int ra23 = 0; ra23 < 2; ++ra23) { - reducer9 = max(((float)tanh2[(((((w1 * 2) + ra23) + (((h1 * 2) + ra22) * 8)) + (c1 * 64)) + (i1 * 3200))]), reducer9); - } - } - max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = ((int)reducer9); - } - } - } - } - __local int compute3[800000]; - for (int i2 = 0; i2 < 1000; ++i2) { - for (int j = 0; j < 800; ++j) { - compute3[(j + (i2 * 800))] = max_pool1[((((((j / 4) % 4) * 4) + (j % 4)) + ((j / 16) * 16)) + (i2 * 800))]; - } - } - __local int dense[500000]; - for (int i3 = 0; i3 < 1000; ++i3) { - for (int j1 = 0; j1 < 500; ++j1) { - __local float reducer10; - reducer10 = 0.000000e+00f; - for (int ra24 = 0; ra24 < 800; ++ra24) { - reducer10 = (((float)(((long)compute3[(ra24 + (i3 * 800))]) * ((long)weight_fc1[(ra24 + (j1 * 800))]))) + reducer10); - } - dense[(j1 + (i3 * 500))] = ((int)reducer10); - } - } - __local int tanh3[500000]; - for (int args4 = 0; args4 < 1000; ++args4) { - for (int args02 = 0; args02 < 500; ++args02) { - tanh3[(args02 + (args4 * 500))] = ((int)tanh(((double)dense[(args02 + (args4 * 500))]))); - } - } - __local int dense1[10000]; - for (int i4 = 0; i4 < 1000; ++i4) { - for (int j2 = 0; j2 < 10; ++j2) { - __local float reducer11; - reducer11 = 0.000000e+00f; - for (int ra25 = 0; ra25 < 500; ++ra25) { - reducer11 = (((float)(((long)tanh3[(ra25 + (i4 * 500))]) * ((long)weight_fc2[(ra25 + (j2 * 500))]))) + reducer11); - } - dense1[(j2 + (i4 * 10))] = ((int)reducer11); - } - } - __local int compute4[1000]; - for (int i5 = 0; i5 < 1000; ++i5) { - __local int max; - max = 0; - for (int ra26 = 0; ra26 < 10; ++ra26) { - max = max(dense1[(ra26 + (i5 * 10))], max); - } - compute4[i5] = max; - } - __local int compute5[1000]; - for (int i6 = 0; i6 < 1000; ++i6) { - __local int sum; - sum = 0; - for (int ra27 = 0; ra27 < 10; ++ra27) { - sum = ((int)(exp(((double)((long)(dense1[(ra27 + (i6 * 10))] - compute4[i6])))) + ((double)sum))); - } - compute5[i6] = sum; - } - __local int update1; - for (int i7 = 0; i7 < 1000; ++i7) { - for (int j3 = 0; j3 < 10; ++j3) { - lenet[(j3 + (i7 * 10))] = ((int)(exp(((double)((long)(dense1[(j3 + (i7 * 10))] - compute4[i7])))) / ((double)compute5[i7]))); - } - } -} - diff --git a/samples/lenet/vhls_code.cl b/samples/lenet/vhls_code.cl deleted file mode 100644 index 3d85466b4..000000000 --- a/samples/lenet/vhls_code.cl +++ /dev/null @@ -1,155 +0,0 @@ -#include -#include -#include - -void default_function(ap_int<32> input_image[1000][1][28][28], ap_int<32> weight_conv1[20][1][5][5], ap_int<32> weight_conv2[50][20][5][5], ap_int<32> weight_fc1[500][800], ap_int<32> weight_fc2[10][500], ap_int<32> lenet[1000][10]) { - ap_int<32> conv2d[1000][20][24][24]; - for (ap_int<32> nn = 0; nn < 1000; ++nn) { - for (ap_int<32> ff = 0; ff < 20; ++ff) { - for (ap_int<32> yy = 0; yy < 24; ++yy) { - for (ap_int<32> xx = 0; xx < 24; ++xx) { - float reducer12; - reducer12 = 0.000000e+00f; - for (ap_int<32> ra29 = 0; ra29 < 5; ++ra29) { - for (ap_int<32> ra30 = 0; ra30 < 5; ++ra30) { - reducer12 = (((float)(((ap_int<64>)input_image[nn][0][(yy + ra29)][(xx + ra30)]) * ((ap_int<64>)weight_conv1[ff][0][ra29][ra30]))) + reducer12); - } - } - conv2d[nn][ff][yy][xx] = ((ap_int<32>)reducer12); - } - } - } - } - ap_int<32> tanh1[1000][20][24][24]; - for (ap_int<32> args = 0; args < 1000; ++args) { - for (ap_int<32> args0 = 0; args0 < 20; ++args0) { - for (ap_int<32> args1 = 0; args1 < 24; ++args1) { - for (ap_int<32> args2 = 0; args2 < 24; ++args2) { - tanh1[args][args0][args1][args2] = ((ap_int<32>)tanh(((double)conv2d[args][args0][args1][args2]))); - } - } - } - } - ap_int<32> max_pool[1000][20][12][12]; - for (ap_int<32> i = 0; i < 1000; ++i) { - for (ap_int<32> c = 0; c < 20; ++c) { - for (ap_int<32> h = 0; h < 12; ++h) { - for (ap_int<32> w = 0; w < 12; ++w) { - float reducer13; - reducer13 = -1.000000e+00f; - for (ap_int<32> ra31 = 0; ra31 < 2; ++ra31) { - for (ap_int<32> ra32 = 0; ra32 < 2; ++ra32) { - reducer13 = std::max(((float)tanh1[i][c][((h * 2) + ra31)][((w * 2) + ra32)]), reducer13); - } - } - max_pool[i][c][h][w] = ((ap_int<32>)reducer13); - } - } - } - } - ap_int<32> conv2d1[1000][50][8][8]; - for (ap_int<32> nn1 = 0; nn1 < 1000; ++nn1) { - for (ap_int<32> ff1 = 0; ff1 < 50; ++ff1) { - for (ap_int<32> yy1 = 0; yy1 < 8; ++yy1) { - for (ap_int<32> xx1 = 0; xx1 < 8; ++xx1) { - float reducer14; - reducer14 = 0.000000e+00f; - for (ap_int<32> ra33 = 0; ra33 < 20; ++ra33) { - for (ap_int<32> ra34 = 0; ra34 < 5; ++ra34) { - for (ap_int<32> ra35 = 0; ra35 < 5; ++ra35) { - reducer14 = (((float)(((ap_int<64>)max_pool[nn1][ra33][(yy1 + ra34)][(xx1 + ra35)]) * ((ap_int<64>)weight_conv2[ff1][ra33][ra34][ra35]))) + reducer14); - } - } - } - conv2d1[nn1][ff1][yy1][xx1] = ((ap_int<32>)reducer14); - } - } - } - } - ap_int<32> tanh2[1000][50][8][8]; - for (ap_int<32> args3 = 0; args3 < 1000; ++args3) { - for (ap_int<32> args01 = 0; args01 < 50; ++args01) { - for (ap_int<32> args11 = 0; args11 < 8; ++args11) { - for (ap_int<32> args21 = 0; args21 < 8; ++args21) { - tanh2[args3][args01][args11][args21] = ((ap_int<32>)tanh(((double)conv2d1[args3][args01][args11][args21]))); - } - } - } - } - ap_int<32> max_pool1[1000][50][4][4]; - for (ap_int<32> i1 = 0; i1 < 1000; ++i1) { - for (ap_int<32> c1 = 0; c1 < 50; ++c1) { - for (ap_int<32> h1 = 0; h1 < 4; ++h1) { - for (ap_int<32> w1 = 0; w1 < 4; ++w1) { - float reducer15; - reducer15 = -1.000000e+00f; - for (ap_int<32> ra36 = 0; ra36 < 2; ++ra36) { - for (ap_int<32> ra37 = 0; ra37 < 2; ++ra37) { - reducer15 = std::max(((float)tanh2[i1][c1][((h1 * 2) + ra36)][((w1 * 2) + ra37)]), reducer15); - } - } - max_pool1[i1][c1][h1][w1] = ((ap_int<32>)reducer15); - } - } - } - } - ap_int<32> compute6[1000][800]; - for (ap_int<32> i2 = 0; i2 < 1000; ++i2) { - for (ap_int<32> j = 0; j < 800; ++j) { - compute6[i2][j] = max_pool1[i2][(j / 16)][((j / 4) % 4)][(j % 4)]; - } - } - ap_int<32> dense[1000][500]; - for (ap_int<32> i3 = 0; i3 < 1000; ++i3) { - for (ap_int<32> j1 = 0; j1 < 500; ++j1) { - float reducer16; - reducer16 = 0.000000e+00f; - for (ap_int<32> ra38 = 0; ra38 < 800; ++ra38) { - reducer16 = (((float)(((ap_int<64>)compute6[i3][ra38]) * ((ap_int<64>)weight_fc1[j1][ra38]))) + reducer16); - } - dense[i3][j1] = ((ap_int<32>)reducer16); - } - } - ap_int<32> tanh3[1000][500]; - for (ap_int<32> args4 = 0; args4 < 1000; ++args4) { - for (ap_int<32> args02 = 0; args02 < 500; ++args02) { - tanh3[args4][args02] = ((ap_int<32>)tanh(((double)dense[args4][args02]))); - } - } - ap_int<32> dense1[1000][10]; - for (ap_int<32> i4 = 0; i4 < 1000; ++i4) { - for (ap_int<32> j2 = 0; j2 < 10; ++j2) { - float reducer17; - reducer17 = 0.000000e+00f; - for (ap_int<32> ra39 = 0; ra39 < 500; ++ra39) { - reducer17 = (((float)(((ap_int<64>)tanh3[i4][ra39]) * ((ap_int<64>)weight_fc2[j2][ra39]))) + reducer17); - } - dense1[i4][j2] = ((ap_int<32>)reducer17); - } - } - ap_int<32> compute7[1000]; - for (ap_int<32> i5 = 0; i5 < 1000; ++i5) { - ap_int<32> max; - max = 0; - for (ap_int<32> ra40 = 0; ra40 < 10; ++ra40) { - max = std::max(dense1[i5][ra40], max); - } - compute7[i5] = max; - } - ap_int<32> compute8[1000]; - for (ap_int<32> i6 = 0; i6 < 1000; ++i6) { - ap_int<32> sum; - sum = 0; - for (ap_int<32> ra41 = 0; ra41 < 10; ++ra41) { - sum = ((ap_int<32>)(exp(((double)((ap_int<33>)(dense1[i6][ra41] - compute7[i6])))) + ((double)sum))); - } - compute8[i6] = sum; - } - ap_int<32> update2; - for (ap_int<32> i7 = 0; i7 < 1000; ++i7) { - for (ap_int<32> j3 = 0; j3 < 10; ++j3) { - lenet[i7][j3] = ((ap_int<32>)(exp(((double)((ap_int<33>)(dense1[i7][j3] - compute7[i7])))) / ((double)compute8[i7]))); - } - } -} - diff --git a/samples/smith_waterman/common/common.mk b/samples/smith_waterman/common/common.mk deleted file mode 100644 index 3409e4aa5..000000000 --- a/samples/smith_waterman/common/common.mk +++ /dev/null @@ -1,55 +0,0 @@ -SHELL = /bin/bash -VPATH = ./ -CC = xcpp -CLCC = xocc -ifeq ($(XDEVICE_REPO_PATH),) - DEVICE_REPO_OPT = -else -DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} -endif -HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 -HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread -CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} -ifeq (${KEEP_TEMP},1) - CLCC_OPT += -s -endif -ifeq (${KERNEL_DEBUG},1) - CLCC_OPT += -g -endif -CLCC_OPT += --kernel ${KERNEL_NAME} -OBJECTS := $(HOST_SRCS:.cpp=.o) -.PHONY: all -all: run -host: ${HOST_EXE_DIR}/${HOST_EXE} -xbin_cpu_em: - make SDA_FLOW=cpu_emu xbin -f sdaccel.mk -xbin_hw_em: - make SDA_FLOW=hw_emu xbin -f sdaccel.mk -xbin_hw : - make SDA_FLOW=hw xbin -f sdaccel.mk -xbin: ${XCLBIN} -run_cpu_em: - make SDA_FLOW=cpu_emu run_em -f sdaccel.mk -run_hw_em: - make SDA_FLOW=hw_emu run_em -f sdaccel.mk -run_hw : - make SDA_FLOW=hw run_hw_int -f sdaccel.mk -run_em: xconfig host xbin - XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} -run_hw_int : host xbin_hw - source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} -estimate : - ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} -xconfig : emconfig.json -emconfig.json : - emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . -${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} - ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ -${XCLBIN}: - ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} -%.o: %.cpp - ${CC} ${HOST_CFLAGS} -c $< -o $@ -clean: - ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil -cleanall: clean - ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou diff --git a/samples/smith_waterman/lenet_aocl.cl b/samples/smith_waterman/lenet_aocl.cl deleted file mode 100644 index bf8608082..000000000 --- a/samples/smith_waterman/lenet_aocl.cl +++ /dev/null @@ -1,143 +0,0 @@ -#include "ihc_apint.h" -__kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_t* restrict seqBs, __global uint3_t* restrict outAs, __global uint3_t* restrict outBs) { - int B; - #pragma ii 1 - for (int t_outer = 0; t_outer < 32; ++t_outer) { - #pragma unroll - for (int t_inner = 0; t_inner < 32; ++t_inner) { - int maxtrix_max; - maxtrix_max = 0; - int i_max; - i_max = 0; - int j_max; - j_max = 0; - short matrix[16641]; - for (int x = 0; x < 129; ++x) { - for (int y = 0; y < 129; ++y) { - matrix[(y + (x * 129))] = (short)0; - } - } - short action[16641]; - for (int x1 = 0; x1 < 129; ++x1) { - for (int y1 = 0; y1 < 129; ++y1) { - action[(y1 + (x1 * 129))] = (short)3; - } - } - int mutate3; - for (int i = 0; i < 129; ++i) { - for (int j = 0; j < 129; ++j) { - int trace_back[4]; - for (int x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((int33_t)matrix[((j + (i * 129)) + -130)]) + ((int33_t)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); - trace_back[3] = 0; - int max; - max = trace_back[0]; - int act; - act = 0; - for (int i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[(j + (i * 129))] = ((short)max); - action[(j + (i * 129))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { - maxtrix_max = ((int)matrix[(j + (i * 129))]); - i_max = i; - j_max = j; - } - } - } - } - int T; - int curr_i; - curr_i = i_max; - int curr_j; - curr_j = j_max; - int next_i; - next_i = 0; - int next_j; - next_j = 0; - int act1; - act1 = ((int)action[(curr_j + (curr_i * 129))]); - int next_i1; - next_i1 = 0; - int next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - int tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - int a; - a = 0; - int b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); - } - outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)b); - curr_i = next_i; - curr_j = next_j; - int act2; - act2 = ((int)action[(curr_j + (curr_i * 129))]); - int next_i2; - next_i2 = 0; - int next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/smith_waterman/main.cpp b/samples/smith_waterman/main.cpp deleted file mode 100644 index 851a98bf7..000000000 --- a/samples/smith_waterman/main.cpp +++ /dev/null @@ -1,135 +0,0 @@ -#define CL_HPP_CL_1_2_DEFAULT_BUILD -#define CL_HPP_TARGET_OPENCL_VERSION 120 -#define CL_HPP_MINIMUM_OPENCL_VERSION 120 -#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#pragma once - - - - -int main(void) { -#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) - #define STR_VALUE(arg) #arg - #define GET_STRING(name) STR_VALUE(name) - #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) -#endif - char* xclbinFilename = argv[1]; - - std::vector source_0(1024 * 128); - std::vector source_1(1024 * 128); - std::vector source_2(1024 * 256); - std::vector source_3(1024 * 256); - - size_t vector_size_bytes_0 = sizeof(unsigned int) * 1024 * 128; - size_t vector_size_bytes_1 = sizeof(unsigned int) * 1024 * 128; - size_t vector_size_bytes_2 = sizeof(unsigned int) * 1024 * 256; - size_t vector_size_bytes_3 = sizeof(unsigned int) * 1024 * 256; - - unsigned int* arg_0 = (unsigned int*)shmat(1769476, nullptr, 0); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 128; i1++) { - source_0[i1 + i0*128] = arg_0[i1 + i0*128]; - } - } - unsigned int* arg_1 = (unsigned int*)shmat(3538944, nullptr, 0); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 128; i1++) { - source_1[i1 + i0*128] = arg_1[i1 + i0*128]; - } - } - unsigned int* arg_2 = (unsigned int*)shmat(3538945, nullptr, 0); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 256; i1++) { - source_2[i1 + i0*256] = arg_2[i1 + i0*256]; - } - } - unsigned int* arg_3 = (unsigned int*)shmat(2162690, nullptr, 0); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 256; i1++) { - source_3[i1 + i0*256] = arg_3[i1 + i0*256]; - } - } - std::vector platforms; - cl::Platform::get(&platforms); - cl::Platform platform = platforms[0]; - - std::vector devices; - platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices); - cl::Device device = devices[0]; - - cl::Context context(device); - cl::CommandQueue q(context, device); - - std::ifstream bin_file(xclbinFilename, std::ifstream::binary); - bin_file.seekg (0, bin_file.end); - unsigned nb = bin_file.tellg(); - bin_file.seekg (0, bin_file.beg); - char *buf = new char [nb]; - bin_file.read(buf, nb); - - cl::Program::Binaries bins; - bins.push_back({buf,nb}); - devices.resize(1); - cl::Program program(context, devices, bins); - - int err1; - cl::Kernel kernel(program, "default_function", &err1); - auto default_function = cl::KernelFunctor(kernel); - - cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); - cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); - cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2); - cl::Buffer buffer_3(context, CL_MEM_READ_WRITE, vector_size_bytes_3); - - q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); - q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); - q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - q.enqueueWriteBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); - - default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2, buffer_3); - q.finish(); - - q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); - q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); - q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - q.enqueueReadBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); - - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 128; i1++) { - arg_0[i1 + i0*128] = source_0[i1 + i0*128]; - } - } - shmdt(arg_0); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 128; i1++) { - arg_1[i1 + i0*128] = source_1[i1 + i0*128]; - } - } - shmdt(arg_1); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 256; i1++) { - arg_2[i1 + i0*256] = source_2[i1 + i0*256]; - } - } - shmdt(arg_2); - for (size_t i0 = 0; i0 < 1024; i0++) { - for (size_t i1 = 0; i1 < 256; i1++) { - arg_3[i1 + i0*256] = source_3[i1 + i0*256]; - } - } - shmdt(arg_3); -} diff --git a/samples/smith_waterman/merlinc_code.cl b/samples/smith_waterman/merlinc_code.cl deleted file mode 100644 index c3a347f35..000000000 --- a/samples/smith_waterman/merlinc_code.cl +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include -#include -#pragma ACCEL kernel -void default_function(unsigned char* seqAs, unsigned char* seqBs, unsigned char* outAs, unsigned char* outBs) { - int B; -#pragma ACCEL pipeline - for (int t_outer = 0; t_outer < 32; ++t_outer) { -#pragma ACCEL parallel - for (int t_inner = 0; t_inner < 32; ++t_inner) { - int maxtrix_max; - maxtrix_max = 0; - int i_max; - i_max = 0; - int j_max; - j_max = 0; - short matrix[16641]; - for (int x = 0; x < 129; ++x) { - for (int y = 0; y < 129; ++y) { - matrix[(y + (x * 129))] = (short)0; - } - } - short action[16641]; - for (int x1 = 0; x1 < 129; ++x1) { - for (int y1 = 0; y1 < 129; ++y1) { - action[(y1 + (x1 * 129))] = (short)3; - } - } - int mutate3; - for (int i = 0; i < 129; ++i) { - for (int j = 0; j < 129; ++j) { - int trace_back[4]; - for (int x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((long)matrix[((j + (i * 129)) + -130)]) + ((long)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); - trace_back[3] = 0; - int max; - max = trace_back[0]; - int act; - act = 0; - for (int i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[(j + (i * 129))] = ((short)max); - action[(j + (i * 129))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { - maxtrix_max = ((int)matrix[(j + (i * 129))]); - i_max = i; - j_max = j; - } - } - } - } - int T; - int curr_i; - curr_i = i_max; - int curr_j; - curr_j = j_max; - int next_i; - next_i = 0; - int next_j; - next_j = 0; - int act1; - act1 = ((int)action[(curr_j + (curr_i * 129))]); - int next_i1; - next_i1 = 0; - int next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - int tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - int a; - a = 0; - int b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); - } - outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)b); - curr_i = next_i; - curr_j = next_j; - int act2; - act2 = ((int)action[(curr_j + (curr_i * 129))]); - int next_i2; - next_i2 = 0; - int next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/smith_waterman/sdaccel.mk b/samples/smith_waterman/sdaccel.mk deleted file mode 100644 index ce266d89e..000000000 --- a/samples/smith_waterman/sdaccel.mk +++ /dev/null @@ -1,32 +0,0 @@ -ifndef XILINX_SDX -$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) -endif -SDA_FLOW = cpu_emu -HOST_SRCS = host.cpp -HOST_EXE_DIR=. -HOST_EXE = host -HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL -HOST_LFLAGS = -KERNEL_SRCS = default_function.cl -KERNEL_NAME = default_function -KERNEL_DEFS = -KERNEL_INCS = -XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 -XDEVICE_REPO_PATH= -KEEP_TEMP=1 -KERNEL_DEBUG= -XCLBIN_NAME=bin_krnl -HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" -BOARD_SETUP_FILE=setup.sh -ifeq (${SDA_FLOW},cpu_emu) - CLCC_OPT += -t sw_emu - XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin -else ifeq (${SDA_FLOW},hw_emu) - CLCC_OPT += -t hw_emu - XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin -else ifeq (${SDA_FLOW},hw) - XCLBIN = ${XCLBIN_NAME}_hw.xclbin -CLCC_OPT += -t hw -endifHOST_ARGS = ${XCLBIN} -COMMON_DIR = ./common -include ${COMMON_DIR}/common.mk diff --git a/samples/smith_waterman/sdaccel_code.cl b/samples/smith_waterman/sdaccel_code.cl deleted file mode 100644 index a0f5fdb01..000000000 --- a/samples/smith_waterman/sdaccel_code.cl +++ /dev/null @@ -1,142 +0,0 @@ -__kernel void default_function(__global unsigned char* seqAs, __global unsigned char* seqBs, __global unsigned char* outAs, __global unsigned char* outBs) { - __local int B; - __attribute__((xcl_pipeline_loop(1))) - for (int t_outer = 0; t_outer < 2; ++t_outer) { - - for (int t_inner = 0; t_inner < 32; ++t_inner) { - __local int maxtrix_max; - maxtrix_max = 0; - __local int i_max; - i_max = 0; - __local int j_max; - j_max = 0; - __local short matrix[841]; - for (int x = 0; x < 29; ++x) { - for (int y = 0; y < 29; ++y) { - matrix[(y + (x * 29))] = (short)0; - } - } - __local short action[841]; - for (int x1 = 0; x1 < 29; ++x1) { - for (int y1 = 0; y1 < 29; ++y1) { - action[(y1 + (x1 * 29))] = (short)3; - } - } - __local int mutate1; - for (int i = 0; i < 29; ++i) { - for (int j = 0; j < 29; ++j) { - __local int trace_back[4]; - for (int x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((long)matrix[((j + (i * 29)) + -30)]) + ((long)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 28)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 28)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 29)) + -29)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 29)) + -1)]) + -4); - trace_back[3] = 0; - __local int max; - max = trace_back[0]; - __local int act; - act = 0; - for (int i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[(j + (i * 29))] = ((short)max); - action[(j + (i * 29))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 29))])) { - maxtrix_max = ((int)matrix[(j + (i * 29))]); - i_max = i; - j_max = j; - } - } - } - } - __local int T; - __local int curr_i; - curr_i = i_max; - __local int curr_j; - curr_j = j_max; - __local int next_i; - next_i = 0; - __local int next_j; - next_j = 0; - __local int act1; - act1 = ((int)action[(curr_j + (curr_i * 29))]); - __local int next_i1; - next_i1 = 0; - __local int next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - __local int tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - __local int a; - a = 0; - __local int b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 28)) + -1)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 28)) + -1)]); - } - outAs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((unsigned char)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((unsigned char)b); - curr_i = next_i; - curr_j = next_j; - __local int act2; - act2 = ((int)action[(curr_j + (curr_i * 29))]); - __local int next_i2; - next_i2 = 0; - __local int next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/smith_waterman/sdaccel_code_nounroll.cl b/samples/smith_waterman/sdaccel_code_nounroll.cl deleted file mode 100644 index d5e145c05..000000000 --- a/samples/smith_waterman/sdaccel_code_nounroll.cl +++ /dev/null @@ -1,142 +0,0 @@ -__kernel void default_function(__global unsigned char* seqAs, __global unsigned char* seqBs, __global unsigned char* outAs, __global unsigned char* outBs) { - __local int B; - __attribute__((xcl_pipeline_loop(1))) - for (int t_outer = 0; t_outer < 32; ++t_outer) { - __attribute__((opencl_unroll_hint(2))) - for (int t_inner = 0; t_inner < 32; ++t_inner) { - __local int maxtrix_max; - maxtrix_max = 0; - __local int i_max; - i_max = 0; - __local int j_max; - j_max = 0; - __local short matrix[16641]; - for (int x = 0; x < 129; ++x) { - for (int y = 0; y < 129; ++y) { - matrix[(y + (x * 129))] = (short)0; - } - } - __local short action[16641]; - for (int x1 = 0; x1 < 129; ++x1) { - for (int y1 = 0; y1 < 129; ++y1) { - action[(y1 + (x1 * 129))] = (short)3; - } - } - __local int mutate1; - for (int i = 0; i < 129; ++i) { - for (int j = 0; j < 129; ++j) { - __local int trace_back[4]; - for (int x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((long)matrix[((j + (i * 129)) + -130)]) + ((long)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); - trace_back[3] = 0; - __local int max; - max = trace_back[0]; - __local int act; - act = 0; - for (int i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[(j + (i * 129))] = ((short)max); - action[(j + (i * 129))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { - maxtrix_max = ((int)matrix[(j + (i * 129))]); - i_max = i; - j_max = j; - } - } - } - } - __local int T; - __local int curr_i; - curr_i = i_max; - __local int curr_j; - curr_j = j_max; - __local int next_i; - next_i = 0; - __local int next_j; - next_j = 0; - __local int act1; - act1 = ((int)action[(curr_j + (curr_i * 129))]); - __local int next_i1; - next_i1 = 0; - __local int next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - __local int tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - __local int a; - a = 0; - __local int b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); - } - outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)b); - curr_i = next_i; - curr_j = next_j; - __local int act2; - act2 = ((int)action[(curr_j + (curr_i * 129))]); - __local int next_i2; - next_i2 = 0; - __local int next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/smith_waterman/smith_aocl.cl b/samples/smith_waterman/smith_aocl.cl deleted file mode 100644 index 80a4ba601..000000000 --- a/samples/smith_waterman/smith_aocl.cl +++ /dev/null @@ -1,143 +0,0 @@ -#include "ihc_apint.h" -__kernel void default_function(__global uint* restrict seqAs, __global uint* restrict seqBs, __global uint* restrict outAs, __global uint* restrict outBs) { - int B; - #pragma ii 1 - for (int t_outer = 0; t_outer < 2; ++t_outer) { - #pragma unroll - for (int t_inner = 0; t_inner < 32; ++t_inner) { - int maxtrix_max; - maxtrix_max = 0; - int i_max; - i_max = 0; - int j_max; - j_max = 0; - short matrix[841]; - for (int x = 0; x < 29; ++x) { - for (int y = 0; y < 29; ++y) { - matrix[(y + (x * 29))] = (short)0; - } - } - short action[841]; - for (int x1 = 0; x1 < 29; ++x1) { - for (int y1 = 0; y1 < 29; ++y1) { - action[(y1 + (x1 * 29))] = (short)3; - } - } - int mutate3; - for (int i = 0; i < 29; ++i) { - for (int j = 0; j < 29; ++j) { - int trace_back[4]; - for (int x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((int33_t)matrix[((j + (i * 29)) + -30)]) + ((int33_t)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 28)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 28)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 29)) + -29)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 29)) + -1)]) + -4); - trace_back[3] = 0; - int max; - max = trace_back[0]; - int act; - act = 0; - for (int i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[(j + (i * 29))] = ((short)max); - action[(j + (i * 29))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 29))])) { - maxtrix_max = ((int)matrix[(j + (i * 29))]); - i_max = i; - j_max = j; - } - } - } - } - int T; - int curr_i; - curr_i = i_max; - int curr_j; - curr_j = j_max; - int next_i; - next_i = 0; - int next_j; - next_j = 0; - int act1; - act1 = ((int)action[(curr_j + (curr_i * 29))]); - int next_i1; - next_i1 = 0; - int next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - int tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - int a; - a = 0; - int b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 28)) + -1)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 28)) + -1)]); - } - outAs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((uint3_t)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((uint3_t)b); - curr_i = next_i; - curr_j = next_j; - int act2; - act2 = ((int)action[(curr_j + (curr_i * 29))]); - int next_i2; - next_i2 = 0; - int next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/smith_waterman/smith_vhls.cl b/samples/smith_waterman/smith_vhls.cl deleted file mode 100644 index 4fd36c8aa..000000000 --- a/samples/smith_waterman/smith_vhls.cl +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include -#include - -void default_function(ap_uint<3> seqAs[64][28], ap_uint<3> seqBs[64][28], ap_uint<3> outAs[64][56], ap_uint<3> outBs[64][56]) { - ap_int<32> B; - for (ap_int<32> t_outer = 0; t_outer < 2; ++t_outer) { - #pragma HLS pipeline - for (ap_int<32> t_inner = 0; t_inner < 32; ++t_inner) { - #pragma HLS unroll - ap_int<32> maxtrix_max; - maxtrix_max = 0; - ap_int<32> i_max; - i_max = 0; - ap_int<32> j_max; - j_max = 0; - ap_int<16> matrix[29][29]; - for (ap_int<32> x = 0; x < 29; ++x) { - for (ap_int<32> y = 0; y < 29; ++y) { - matrix[x][y] = (ap_int<16>)0; - } - } - ap_int<16> action[29][29]; - for (ap_int<32> x1 = 0; x1 < 29; ++x1) { - for (ap_int<32> y1 = 0; y1 < 29; ++y1) { - action[x1][y1] = (ap_int<16>)3; - } - } - ap_int<32> mutate5; - for (ap_int<32> i = 0; i < 29; ++i) { - for (ap_int<32> j = 0; j < 29; ++j) { - ap_int<32> trace_back[4]; - for (ap_int<32> x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((ap_int<32>)(((ap_int<33>)matrix[(i + -1)][(j + -1)]) + ((ap_int<33>)((seqAs[(t_inner + (t_outer * 32))][(i + -1)] == seqBs[(t_inner + (t_outer * 32))][(j + -1)]) ? 1 : -4)))); - trace_back[1] = (((ap_int<32>)matrix[(i + -1)][j]) + -4); - trace_back[2] = (((ap_int<32>)matrix[i][(j + -1)]) + -4); - trace_back[3] = 0; - ap_int<32> max; - max = trace_back[0]; - ap_int<32> act; - act = 0; - for (ap_int<32> i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[i][j] = ((ap_int<16>)max); - action[i][j] = ((ap_int<16>)act); - if (maxtrix_max < ((ap_int<32>)matrix[i][j])) { - maxtrix_max = ((ap_int<32>)matrix[i][j]); - i_max = i; - j_max = j; - } - } - } - } - ap_int<32> T; - ap_int<32> curr_i; - curr_i = i_max; - ap_int<32> curr_j; - curr_j = j_max; - ap_int<32> next_i; - next_i = 0; - ap_int<32> next_j; - next_j = 0; - ap_int<32> act1; - act1 = ((ap_int<32>)action[((curr_j / 29) + curr_i)][(curr_j % 29)]); - ap_int<32> next_i1; - next_i1 = 0; - ap_int<32> next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - ap_int<32> tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - ap_int<32> a; - a = 0; - ap_int<32> b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((ap_int<32>)seqAs[((((curr_i - ((curr_i + -1) % 28)) + ((t_inner + (t_outer * 32)) * 28)) + -1) / 28)][((curr_i + -1) % 28)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((ap_int<32>)seqBs[((((curr_j - ((curr_j + -1) % 28)) + ((t_inner + (t_outer * 32)) * 28)) + -1) / 28)][((curr_j + -1) % 28)]); - } - outAs[((tick / 56) + (t_inner + (t_outer * 32)))][(tick % 56)] = ((ap_uint<3>)a); - outBs[((tick / 56) + (t_inner + (t_outer * 32)))][(tick % 56)] = ((ap_uint<3>)b); - curr_i = next_i; - curr_j = next_j; - ap_int<32> act2; - act2 = ((ap_int<32>)action[((curr_j / 29) + curr_i)][(curr_j % 29)]); - ap_int<32> next_i2; - next_i2 = 0; - ap_int<32> next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/smith_waterman/smith_waterman_sdaccel.py b/samples/smith_waterman/smith_waterman_sdaccel.py deleted file mode 100644 index 354cac757..000000000 --- a/samples/smith_waterman/smith_waterman_sdaccel.py +++ /dev/null @@ -1,24 +0,0 @@ -import heterocl as hcl -import numpy as np -from smith_waterman_main import * - -# f = top("vhls_csim") -f = top("sdaccel_sw_emu") - -# add a very simple test -_seqA_np = np.ones((num, lenA)) -for i in range(0, 4): - _seqA_np[0][i] = 2 -_seqB_np = np.ones((num, lenB)) -_seqA = hcl.asarray(_seqA_np, dtype) -_seqB = hcl.asarray(_seqB_np, dtype) -_consensusA = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) -_consensusB = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) -f(_seqA, _seqB, _consensusA, _consensusB) -_consensusA_np = _consensusA.asnumpy() -_consensusB_np = _consensusB.asnumpy() -for i in range(0, 256): - if i < 124: - assert _consensusA_np[0][i] == 1 - else: - assert _consensusA_np[0][i] == 0 diff --git a/samples/smith_waterman/vhls_code.cl b/samples/smith_waterman/vhls_code.cl deleted file mode 100644 index 8066bc2c2..000000000 --- a/samples/smith_waterman/vhls_code.cl +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include -#include - -void default_function(ap_uint<3> seqAs[1024][128], ap_uint<3> seqBs[1024][128], ap_uint<3> outAs[1024][256], ap_uint<3> outBs[1024][256]) { - ap_int<32> B; - for (ap_int<32> t_outer = 0; t_outer < 32; ++t_outer) { - #pragma HLS pipeline - for (ap_int<32> t_inner = 0; t_inner < 32; ++t_inner) { - #pragma HLS unroll - ap_int<32> maxtrix_max; - maxtrix_max = 0; - ap_int<32> i_max; - i_max = 0; - ap_int<32> j_max; - j_max = 0; - ap_int<16> matrix[129][129]; - for (ap_int<32> x = 0; x < 129; ++x) { - for (ap_int<32> y = 0; y < 129; ++y) { - matrix[x][y] = (ap_int<16>)0; - } - } - ap_int<16> action[129][129]; - for (ap_int<32> x1 = 0; x1 < 129; ++x1) { - for (ap_int<32> y1 = 0; y1 < 129; ++y1) { - action[x1][y1] = (ap_int<16>)3; - } - } - ap_int<32> mutate3; - for (ap_int<32> i = 0; i < 129; ++i) { - for (ap_int<32> j = 0; j < 129; ++j) { - ap_int<32> trace_back[4]; - for (ap_int<32> x2 = 0; x2 < 4; ++x2) { - trace_back[x2] = 0; - } - if ((i != 0) && (j != 0)) { - trace_back[0] = ((ap_int<32>)(((ap_int<33>)matrix[(i + -1)][(j + -1)]) + ((ap_int<33>)((seqAs[(t_inner + (t_outer * 32))][(i + -1)] == seqBs[(t_inner + (t_outer * 32))][(j + -1)]) ? 1 : -4)))); - trace_back[1] = (((ap_int<32>)matrix[(i + -1)][j]) + -4); - trace_back[2] = (((ap_int<32>)matrix[i][(j + -1)]) + -4); - trace_back[3] = 0; - ap_int<32> max; - max = trace_back[0]; - ap_int<32> act; - act = 0; - for (ap_int<32> i1 = 0; i1 < 4; ++i1) { - if (max < trace_back[i1]) { - max = trace_back[i1]; - act = i1; - } - } - matrix[i][j] = ((ap_int<16>)max); - action[i][j] = ((ap_int<16>)act); - if (maxtrix_max < ((ap_int<32>)matrix[i][j])) { - maxtrix_max = ((ap_int<32>)matrix[i][j]); - i_max = i; - j_max = j; - } - } - } - } - ap_int<32> T; - ap_int<32> curr_i; - curr_i = i_max; - ap_int<32> curr_j; - curr_j = j_max; - ap_int<32> next_i; - next_i = 0; - ap_int<32> next_j; - next_j = 0; - ap_int<32> act1; - act1 = ((ap_int<32>)action[((curr_j / 129) + curr_i)][(curr_j % 129)]); - ap_int<32> next_i1; - next_i1 = 0; - ap_int<32> next_j1; - next_j1 = 0; - if (act1 == 0) { - next_i1 = (curr_i + -1); - next_j1 = (curr_j + -1); - } else { - if (act1 == 1) { - next_i1 = (curr_i + -1); - next_j1 = curr_j; - } else { - if (act1 == 2) { - next_i1 = curr_i; - next_j1 = (curr_j + -1); - } else { - next_i1 = curr_i; - next_j1 = curr_j; - } - } - } - next_i = next_i1; - next_j = next_j1; - ap_int<32> tick; - tick = 0; - while (((curr_i != next_i) || (curr_j != next_j))) { - ap_int<32> a; - a = 0; - ap_int<32> b; - b = 0; - if (next_i == curr_i) { - a = 0; - } else { - a = ((ap_int<32>)seqAs[((((curr_i - ((curr_i + -1) % 128)) + ((t_inner + (t_outer * 32)) * 128)) + -1) / 128)][((curr_i + -1) % 128)]); - } - if (next_j == curr_j) { - b = 0; - } else { - b = ((ap_int<32>)seqBs[((((curr_j - ((curr_j + -1) % 128)) + ((t_inner + (t_outer * 32)) * 128)) + -1) / 128)][((curr_j + -1) % 128)]); - } - outAs[((tick / 256) + (t_inner + (t_outer * 32)))][(tick % 256)] = ((ap_uint<3>)a); - outBs[((tick / 256) + (t_inner + (t_outer * 32)))][(tick % 256)] = ((ap_uint<3>)b); - curr_i = next_i; - curr_j = next_j; - ap_int<32> act2; - act2 = ((ap_int<32>)action[((curr_j / 129) + curr_i)][(curr_j % 129)]); - ap_int<32> next_i2; - next_i2 = 0; - ap_int<32> next_j2; - next_j2 = 0; - if (act2 == 0) { - next_i2 = (curr_i + -1); - next_j2 = (curr_j + -1); - } else { - if (act2 == 1) { - next_i2 = (curr_i + -1); - next_j2 = curr_j; - } else { - if (act2 == 2) { - next_i2 = curr_i; - next_j2 = (curr_j + -1); - } else { - next_i2 = curr_i; - next_j2 = curr_j; - } - } - } - next_i = next_i2; - next_j = next_j2; - tick = (tick + 1); - } - } - } -} - diff --git a/samples/sobel/sobel.py b/samples/sobel/sobel.py deleted file mode 100644 index a4299d8ae..000000000 --- a/samples/sobel/sobel.py +++ /dev/null @@ -1,91 +0,0 @@ -import heterocl as hcl -import hlib -import numpy as np -from PIL import Image -from urllib.request import urlopen - -batch_size = 1 -hcl.init(hcl.UInt(32)) -dtype = hcl.UInt(32) -image_size = () -kernel_size = 3 - -# setup target using vivado -tool = hcl.tool.vivado("csim") -target = hcl.platform.zc706 - -def sobel(): - image = hcl.placeholder((batch_size, 1, 256, 256), "input_image") - k1 = hcl.placeholder((1, 1, 3, 3), "kernel_1") - k2 = hcl.placeholder((1, 1, 3, 3), "kernel_2") - - def kernel(input_image, kernel_1, kernel_2): - - def absolute(image, *args): - with hcl.if_(image[args] > 0): - hcl.return_(image[args]) - with hcl.else_(): - hcl.return_(-1 * image[args]) - - def dev(gx, gy, org): - assert gx.shape == gy.shape, "mismatch" - rx = hcl.reduce_axis(0, 255, "rx") - ry = hcl.reduce_axis(0, 255, "ry") - mat_sum = hcl.compute(gx.shape, lambda nn, ff, xx, yy: - gx[nn, ff, xx, yy] + gy[nn, ff, xx, yy], name="add") - return hcl.compute(mat_sum.shape, lambda nn, ff, xx, yy: - mat_sum[nn, ff, xx, yy] * 255.0 / hcl.max(mat_sum[nn, ff, rx, ry], axis=[rx, ry]), - name = "derv") - - # make the conv op a kernel on fpga. - # return tensor required (cannot do def_()) - output_shape = (1,1,254,254) - - # make compute wrapped in hcl def - module1 = hcl.def_([input_image.shape, kernel_1.shape, output_shape], name="conv1")(hlib.nn.conv2d_nchw_imp) - module2 = hcl.def_([input_image.shape, kernel_1.shape, output_shape], name="conv2")(hlib.nn.conv2d_nchw_imp) - conv1 = hcl.compute(output_shape, lambda *args: 0) - conv2 = hcl.compute(output_shape, lambda *args: 0) - module1(input_image, kernel_1, conv1) - module2(input_image, kernel_2, conv2) - - abs1 = hcl.compute(conv1.shape, - lambda *args: absolute(conv1, *args)) - abs2 = hcl.compute(conv2.shape, - lambda *args: absolute(conv2, *args)) - - # derivative module for normalization - return dev(abs1, abs2, input_image) - - s = hcl.create_schedule([image, k1, k2], kernel) - - # data moved to local - i0, k10 = s.to([image, k1], target.fpga) - s.to([i0, k10], s[kernel.conv1]) - s.to(kernel.derv, target.cpu) - - # create stream channel between modules - print(type(target.fpga), hcl.lower(s)) - return hcl.build(s, target) - -# Load sample data -img = Image.open(urlopen('http://i.stack.imgur.com/8zINU.gif')) -kernel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) -kernel_y = np.flip(kernel_x.T.T, axis=0) -img = np.array(img) - -img = img[np.newaxis, ...] -img = img[np.newaxis, ...] -kernel_x = kernel_x[np.newaxis, ...] -kernel_x = kernel_x[np.newaxis, ...] -kernel_y = kernel_y[np.newaxis, ...] -kernel_y = kernel_y[np.newaxis, ...] - -hcl_input = hcl.asarray(img, dtype) -kernel_x = hcl.asarray(kernel_x, dtype) -kernel_y = hcl.asarray(kernel_y, dtype) -hcl_output = hcl.asarray(np.zeros((1,1,254,254)), dtype) - -f = sobel() -f(hcl_input, kernel_x, kernel_y, hcl_output) - diff --git a/samples/stream/example.cl b/samples/stream/example.cl deleted file mode 100644 index fa3cfbd81..000000000 --- a/samples/stream/example.cl +++ /dev/null @@ -1,34 +0,0 @@ -#include "ihc_apint.h" -#pragma OPENCL EXTENSION cl_intel_channels : enable -channel int ret_add_c; -channel int ret_mul_c; -__kernel void ret_add(__global int* restrict ret_add_a, __global int* restrict ret_add_b) { - for (int i = 0; i < 10; ++i) { - for (int i1 = 0; i1 < 20; ++i1) { - write_channel_intel(ret_add_c, ((int)(((int33_t)ret_add_a[(i1 + (i * 20))]) + ((int33_t)ret_add_b[(i1 + (i * 20))])))); - } - } -} - -__kernel void ret_mul(__global int* restrict ret_mul_d, __global int* restrict ret_mul_e) { - for (int i = 0; i < 10; ++i) { - for (int i1 = 0; i1 < 20; ++i1) { - ret_mul_e[(i1 + (i * 20))] = ((int)(((long)read_channel_intel(ret_mul_c)) * ((long)ret_mul_d[(i1 + (i * 20))]))); - } - } -} - -__kernel void default_function(__global int* restrict a, __global int* restrict b, __global int* restrict c, __global int* restrict d, __global int* restrict e) { - int ret_add; - int ret_mul; - for (int x = 0; x < 10; ++x) { - for (int y = 0; y < 20; ++y) { - c[(y + (x * 20))] = 0; - } - } - int ret_add0; - ret_add(a, b); - int ret_mul0; - ret_mul(d, e); -} - diff --git a/samples/stream/mod.py b/samples/stream/mod.py deleted file mode 100644 index 8c12ad722..000000000 --- a/samples/stream/mod.py +++ /dev/null @@ -1,32 +0,0 @@ -import heterocl as hcl - -hcl.init() -initiation_interval = 4 -a = hcl.placeholder((10, 20)) -b = hcl.placeholder((10, 20)) - -@hcl.def_([a.shape, b.shape, (), ()]) -def ret_add(a, b, x, y): - hcl.return_(a[x, y] + b[x, y]) - -@hcl.def_([a.shape, b.shape, (), ()]) -def ret_mul(a, b, x, y): - hcl.return_(a[x, y] * b[x, y]) - -c = hcl.compute(a.shape, lambda i, j: ret_add(a, b, i, j)) -d = hcl.compute(b.shape, lambda i, j: ret_mul(a, b, i, j)) -s = hcl.create_schedule([a, b, c, d]) - -# compute customization -s[c].pipeline(c.axis[0], initiation_interval) -s.partition(b, dim=2, factor=2) - -# stream into modules / device -# s[c].stream_to(ret_mul) -# s[d].stream_to(hcl.FPGA) - -print(hcl.lower(s)) -code = hcl.build(s, target="vhls") -print(code) - - diff --git a/samples/stream/stream.py b/samples/stream/stream.py deleted file mode 100644 index 5c2396a57..000000000 --- a/samples/stream/stream.py +++ /dev/null @@ -1,58 +0,0 @@ -import heterocl as hcl - -hcl.init() -target = hcl.platform.zc706 -initiation_interval = 4 - -a = hcl.placeholder((10, 20), name="a") -b = hcl.placeholder((10, 20), name="b") -c = hcl.placeholder((10, 20), name="c") -d = hcl.placeholder((10, 20), name="d") -e = hcl.placeholder((10, 20), name="e") - -def add_mul(a, b, c, d, e): - @hcl.def_([a.shape, b.shape, c.shape]) - def ret_add(a, b, c): - with hcl.for_(0, a.shape[0]) as i: - with hcl.for_(0, a.shape[1]) as j: - c[i, j] = a[i, j] + b[i, j] - - @hcl.def_([c.shape, d.shape, e.shape]) - def ret_mul(c, d, e): - # hcl.update(c, lambda x, y: a[x, y] * b[x, y], 'c_mul') - with hcl.for_(0, c.shape[0]) as i: - with hcl.for_(0, c.shape[1]) as j: - e[i, j] = c[i, j] * d[i, j] - - ret_add(a, b, c) - ret_mul(c, d, e) - -# compute customization -s = hcl.create_schedule([a, b, c, d, e], add_mul) -# op1 = add_mul.ret_add.c -# op2 = add_mul.ret_mul.c -# s[op1].pipeline(op1.axis[0], initiation_interval) - -# stream into modules / device -a0, b0 = s.to([a, b], target.xcel) -d0 = s.to(d, target.xcel) -#s.partition(b0, dim=2, factor=2) -s.to([a0, b0], s[add_mul.ret_add]) -s.to(d0, s[add_mul.ret_mul]) - -# within device move producer to consumer -s.to(c, s[add_mul.ret_mul], - s[add_mul.ret_add], depth=10) - -# return tensor for inter-device move -# e0 = s.stream_to(e, hcl.CPU('riscv')) - -# print(add_mul.ret_mul._buf, c._buf) -print(hcl.lower(s)) -code = hcl.build(s, target) -print(code) -# -# with open("example.cl", "w") as f: -# f.write(code) -# f.close() - diff --git a/tests/test_codegen_aocl.py b/tests/test_codegen_aocl.py deleted file mode 100644 index a72d364f2..000000000 --- a/tests/test_codegen_aocl.py +++ /dev/null @@ -1,99 +0,0 @@ -import heterocl as hcl - -def test_ap_int(): - hcl.init(); - A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) - B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) - C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) - s = hcl.create_schedule([A, B, C]) - code = hcl.build(s, target='aocl') - print (code) - assert "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" in code - assert "ap_int<3> intd_t" in code - assert "ap_uint<3> uintd_t" in code - assert "ap_int<8> intd_t" in code - -def test_pragma(): - hcl.init() - A = hcl.placeholder((10, 32), "A") - B = hcl.placeholder((10, 32)) - C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) - - # unroll - s1 = hcl.create_schedule([A, B, C]) - s1[C].unroll(C.axis[1], factor=4) - code1 = hcl.build(s1, target='aocl') - print (code1) - assert "#pragma unroll 4" in code1 - - # pipeline - s2 = hcl.create_schedule([A, B, C]) - s2[C].pipeline(C.axis[0], initiation_interval=2) - code2 = hcl.build(s2, target='aocl') - print (code2) - assert "#pragma ii 2" in code2 - -def test_reorder(): - hcl.init() - A = hcl.placeholder((10, 100), "A") - - def two_stage(A): - B = hcl.compute(A.shape, lambda x, y : A[x, y] + 1, "B") - C = hcl.compute(A.shape, lambda x, y : B[x, y] + 1, "C") - return C - - s = hcl.create_schedule([A], two_stage) - s_B = two_stage.B - code = hcl.build(s, target='aocl') - print (code) - s[s_B].reorder(s_B.axis[1], s_B.axis[0]) - code2 = hcl.build(s, target='aocl') - print (code2) - -def test_split_fuse(): - hcl.init() - A = hcl.placeholder((10, 100), "A") - - def two_stage(A): - B = hcl.compute(A.shape, lambda x, y : A[x, y] + 1, "B") - C = hcl.compute(A.shape, lambda x, y : B[x, y] + 1, 'C') - return C - - s = hcl.create_schedule([A], two_stage) - s_B = two_stage.B - x_out, x_in = s[s_B].split(s_B.axis[0], 5) - code = hcl.build(s, target='aocl') - print (code) - s2 = hcl.create_schedule([A], two_stage) - s2_B = two_stage.B - x_y = s[s_B].fuse(s2_B.axis[0], s2_B.axis[1]) - code2 = hcl.build(s2, target='aocl') - print (code2) - -def test_binary_conv(): - hcl.init() - A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A") - B = hcl.placeholder((64, 32, 3, 3), dtype=hcl.UInt(1), name="B") - rc = hcl.reduce_axis(0, 32) - ry = hcl.reduce_axis(0, 3) - rx = hcl.reduce_axis(0, 3) - C = hcl.compute((1, 64, 12, 12), - lambda nn, ff, yy, xx: hcl.sum( - A[nn, rc, yy + ry, xx + rx] * B[ff, rc, ry, rx], axis=[rc, ry, rx]), - dtype=hcl.UInt(8), name="C") - s = hcl.create_schedule([A, B, C]) - s[C].split(C.axis[1], factor=5) - code = hcl.build(s, target='aocl') - print (code) - assert "for (ap_int<32> intd_t ff_outer = 0; ff_outer < 13; ++ff_outer)" in code - assert "for (ap_int<32> intd_t ff_inner = 0; ff_inner < 5; ++ff_inner)" in code - assert "if (ff_inner < (64 - (ff_outer * 5)))" in code - -if __name__ == '__main__': - test_ap_int() - test_pragma() - test_reorder() - test_split_fuse() - test_binary_conv() - - diff --git a/tests/test_codegen_ihls.py b/tests/test_codegen_ihls.py index 1b53f18ca..fc5a7e53b 100644 --- a/tests/test_codegen_ihls.py +++ b/tests/test_codegen_ihls.py @@ -65,4 +65,3 @@ def kernel(A): s = hcl.create_schedule([A], kernel) code = hcl.build(s, target="ihls") assert "A[0].slc<4>(1)" in code - diff --git a/tests/test_codegen_sdaccel.py b/tests/test_codegen_sdaccel.py deleted file mode 100644 index 43d94f238..000000000 --- a/tests/test_codegen_sdaccel.py +++ /dev/null @@ -1,36 +0,0 @@ -import heterocl as hcl - - - - - -def test_pragma(): - hcl.init(hcl.Float()) - A = hcl.placeholder((10, 32), "A") - B = hcl.placeholder((10, 32)) - C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) - - # unroll - s1 = hcl.create_schedule([A, B, C]) - s1[C].unroll(C.axis[1], factor=6) - code1 = hcl.build(s1, target='sdaccel') - print (code1) - assert "__attribute__((opencl_unroll_hint(6)))" in code1 - - # pipeline - s2 = hcl.create_schedule([A, B, C]) - s2[C].pipeline(C.axis[0], initiation_interval=2) - code2 = hcl.build(s2, target='sdaccel') - print (code2) - assert "__attribute__((xcl_pipeline_loop(2)))" in code2 - - # partition - s3 = hcl.create_schedule([A, B, C]) - s3.partition(A, hcl.Partition.Block, dim=2, factor=2) - code3 = hcl.build(s3, target='sdaccel') - print (code3) - assert "__attribute__((xcl_array_partition(block,2,2)))" in code3 - - -if __name__ == "__main__": - test_pragma() \ No newline at end of file diff --git a/tests/test_codegen_soda.py b/tests/test_codegen_soda.py index 492ee6146..56fb8df77 100644 --- a/tests/test_codegen_soda.py +++ b/tests/test_codegen_soda.py @@ -52,7 +52,6 @@ def test_blur(self): img_t(0, 0) = uint16((int32((uint18((uint17(img_i(-1, 0)) + uint17(img_i(0, 0)))) + uint18(img_i(1, 0)))) / 3)) output uint16: img_o(0, 0) = uint16((int32((uint18((uint17(img_t(0, -1)) + uint17(img_t(0, 0)))) + uint18(img_t(0, 1)))) / 3)) - ''') def test_gaussian(self): @@ -77,7 +76,6 @@ def test_gaussian(self): reduce_ssa3 = float32(((float64(img_i(-1, 0)) * 3699.65) + float64(reduce_ssa2))) reduce_ssa4 = float32(((float64(img_i(0, 0)) * 4620.30) + float64(reduce_ssa3))) img_o(0, 0) = reduce_ssa4 - ''' ) diff --git a/tests/test_codegen_vhls.py b/tests/test_codegen_vhls.py index a6385975b..dadae5068 100644 --- a/tests/test_codegen_vhls.py +++ b/tests/test_codegen_vhls.py @@ -85,7 +85,7 @@ def test_index_split(): s = hcl.create_schedule([A, B]) s[B].split(B.axis[0], 5) code = hcl.build(s, target="vhls") - assert "B[(x + ((y_inner + (y_outer * 5)) * 10))]" in code + assert "B[(y_inner + (y_outer * 5))][x]" in code def test_index_split_reshape(): hcl.init() @@ -95,7 +95,7 @@ def test_index_split_reshape(): s[B].split(B.axis[0], 5) s.reshape(B, (2, 5, 10)) code = hcl.build(s, target="vhls") - assert "B[(x + ((y_inner + (y_outer * 5)) * 10))]" in code + assert "B[y_outer][y_inner][x]" in code def test_index_fuse(): hcl.init() @@ -104,7 +104,7 @@ def test_index_fuse(): s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target="vhls") - assert "B[y_x_fused]" in code + assert "B[(y_x_fused / 10)][(y_x_fused % 10)]" in code def test_binary_conv(): hcl.init() diff --git a/tvm/HalideIR/src/ir/Expr.h b/tvm/HalideIR/src/ir/Expr.h index 4b70d51fc..b78a466ed 100644 --- a/tvm/HalideIR/src/ir/Expr.h +++ b/tvm/HalideIR/src/ir/Expr.h @@ -91,9 +91,6 @@ enum class IRNodeType : int { /** for memory customization **/ Reuse, Partition, - /** for data stream **/ - StreamExpr, - StreamStmt, /** for stencil analysis **/ Stencil }; @@ -305,20 +302,6 @@ enum class PartitionType : int { Cyclic = 2 }; -/** An enum describing the stream type */ -enum class StreamType : int { - Channel = 0, - Pipe = 1, - FIFO = 2 -}; - -/** An enum class for device type */ -enum class DeviceType : int { - CPU = 0, - FPGA = 1, - GPU = 2 -}; - /** A reference-counted handle to a statement node. */ struct Stmt : public IRHandle { Stmt() : IRHandle() {} diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index a604b6fd2..a9718b40e 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -692,27 +692,17 @@ Expr Quantize::make(Expr body, Expr bitwidth) { return Expr(node); } -Stmt KernelDef::make(Array args, Array> api_args, - Array api_types, Stmt body, Expr ret_void, - Type ret_type, std::string name, Array channels) { - internal_assert(api_args.size() == api_types.size()) << "KernelDef of unmatched args\n"; +Stmt KernelDef::make(Array args, Stmt body, Expr ret_void, Type ret_type, std::string name) { for (size_t i = 0; i < args.size(); i++) { internal_assert(args[i].defined()) << "KernelDef of undefined arg\n"; - internal_assert(api_types[i].defined()) << "KernelDef of undefined type\n"; - for (size_t j = 0; j < api_args[i].size(); j++) { - internal_assert(api_args[i][j].defined()) << "KernelDef of undefined shape\n"; - } } internal_assert(body.defined()) << "KernelDef of undefined body\n"; internal_assert(ret_void.defined()) << "KernelDef of undefined return type\n"; std::shared_ptr node = std::make_shared(); node->args = std::move(args); - node->api_args = std::move(api_args); - node->api_types = std::move(api_types); node->body = std::move(body); node->ret_void = std::move(ret_void); node->ret_type = ret_type; - node->channels = std::move(channels); node->name = name; return Stmt(node); } @@ -782,62 +772,6 @@ Stmt Partition::make(VarExpr buffer_var, int dim, int factor, PartitionType part return Stmt(node); } -Expr StreamExpr::make(Type type, VarExpr buffer_var, StreamType stream_type, int depth) { - internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; - - std::shared_ptr node = std::make_shared(); - node->type = type; - node->buffer_var = std::move(buffer_var); - node->depth = depth; - node->stream_type = stream_type; - return Expr(node); -} - -Expr StreamExpr::make(Type type, VarExpr buffer_var, StreamType stream_type, int depth, - Array annotate_keys, Array annotate_values) { - internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; - internal_assert(annotate_keys.size() == annotate_values.size()) << - "Length of annotate keys and annotate values not equal"; - - std::shared_ptr node = std::make_shared(); - node->type = type; - node->buffer_var = std::move(buffer_var); - node->depth = depth; - node->stream_type = stream_type; - node->annotate_keys = std::move(annotate_keys); - node->annotate_values = std::move(annotate_values); - return Expr(node); -} - -Stmt StreamStmt::make(VarExpr buffer_var, Expr value, StreamType stream_type, int depth) { - internal_assert(value.defined()) << "The stream-in value not defined\n"; - internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; - - std::shared_ptr node = std::make_shared(); - node->buffer_var = std::move(buffer_var); - node->value = std::move(value); - node->depth = depth; - node->stream_type = stream_type; - return Stmt(node); -} - -Stmt StreamStmt::make(VarExpr buffer_var, Expr value, StreamType stream_type, int depth, - Array annotate_keys, Array annotate_values) { - internal_assert(value.defined()) << "The stream-in value not defined\n"; - internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; - internal_assert(annotate_keys.size() == annotate_values.size()) << - "Length of annotate keys and annotate values not equal"; - - std::shared_ptr node = std::make_shared(); - node->buffer_var = std::move(buffer_var); - node->value = std::move(value); - node->depth = depth; - node->stream_type = stream_type; - node->annotate_keys = std::move(annotate_keys); - node->annotate_values = std::move(annotate_values); - return Stmt(node); -} - Stmt Stencil::make(Array inputs, Array outputs, Stmt body, int burst_width, int unroll_factor, int num_iteration) { internal_assert(body.defined()) << "Stencil of undefined body\n"; @@ -950,8 +884,6 @@ template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v-> template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const Reuse *)this, s); } template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const Partition *)this, s); } template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const Stencil *)this, s); } -template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const StreamStmt *)this, s); } -template<> void ExprNode::accept(IRVisitor *v, const Expr &e) const { v->visit((const StreamExpr *)this, e); } Call::ConstString Call::debug_to_file = "debug_to_file"; Call::ConstString Call::reinterpret = "reinterpret"; diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index e8a8835bf..fae48da29 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -1049,29 +1049,19 @@ struct Quantize : public ExprNode { /** The imperative function definition */ struct KernelDef : public StmtNode { Array args; - Array> api_args; - Array api_types; Stmt body; Expr ret_void; Type ret_type; std::string name; - // args to stream data - Array channels; - EXPORT static Stmt make(Array args, Array> api_args, - Array api_types, Stmt body, Expr ret_void, - Type ret_type, std::string name, - Array channels); + EXPORT static Stmt make(Array args, Stmt body, Expr ret_void, Type ret_type, std::string name); void VisitAttrs(IR::AttrVisitor* v) final { v -> Visit("args", &args); - v -> Visit("api_args", &api_args); - v -> Visit("api_types", &api_types); v -> Visit("body", &body); v -> Visit("ret_void", &ret_void); v -> Visit("ret_type", &ret_type); v -> Visit("name", &name); - v -> Visit("channels", &channels); } static const IRNodeType _type_info = IRNodeType::KernelDef; static constexpr const char* _type_key = "KernelDef"; @@ -1180,70 +1170,6 @@ struct Partition : public StmtNode { static constexpr const char* _type_key = "Partition"; }; -struct StreamStmt : public StmtNode { - VarExpr buffer_var; - Expr value; - int depth; - StreamType stream_type; - Array annotate_keys; - Array annotate_values; - - EXPORT static Stmt make(VarExpr buffer_var, - Expr value, - StreamType stream_type, - int depth); - - EXPORT static Stmt make(VarExpr buffer_var, - Expr value, - StreamType stream_type, - int depth, - Array annotate_keys, - Array annotate_values); - - void VisitAttrs(IR::AttrVisitor* v) final { - v -> Visit("buffer_var", &buffer_var); - v -> Visit("value", &value); - v -> Visit("depth", &depth); - v -> Visit("stream_type", &stream_type); - v -> Visit("annotate_keys", &annotate_keys); - v -> Visit("annotate_values", &annotate_values); - } - - static const IRNodeType _type_info = IRNodeType::StreamStmt; - static constexpr const char* _type_key = "StreamStmt"; -}; - -struct StreamExpr : public ExprNode { - VarExpr buffer_var; // var loaded - int depth; - StreamType stream_type; - Array annotate_keys; - Array annotate_values; - - EXPORT static Expr make(Type type, - VarExpr buffer_var, - StreamType stream_type, - int depth); - - EXPORT static Expr make(Type type, - VarExpr buffer_var, - StreamType stream_type, - int depth, - Array annotate_keys, - Array annotate_values); - - void VisitAttrs(IR::AttrVisitor* v) final { - v -> Visit("dtype", &type); - v -> Visit("buffer_var", &buffer_var); - v -> Visit("depth", &depth); - v -> Visit("stream_type", &stream_type); - v -> Visit("annotate_keys", &annotate_keys); - v -> Visit("annotate_values", &annotate_values); - } - static const IRNodeType _type_info = IRNodeType::StreamExpr; - static constexpr const char* _type_key = "StreamExpr"; -}; - struct Stencil : public StmtNode { Array inputs; Array outputs; diff --git a/tvm/HalideIR/src/ir/IREquality.cpp b/tvm/HalideIR/src/ir/IREquality.cpp index 46590056e..9e5798fbb 100644 --- a/tvm/HalideIR/src/ir/IREquality.cpp +++ b/tvm/HalideIR/src/ir/IREquality.cpp @@ -80,7 +80,6 @@ class IRComparer : public IRVisitor { void visit(const Call *, const Expr &); void visit(const Let *, const Expr &); void visit(const Shuffle *, const Expr &); - void visit(const StreamExpr *, const Expr &); void visit(const LetStmt *, const Stmt &); void visit(const AttrStmt *, const Stmt &); void visit(const AssertStmt *, const Stmt &); @@ -489,11 +488,6 @@ void IRComparer::visit(const Shuffle *op, const Expr &expr) { compare_expr_vector(e->indices, op->indices); } -void IRComparer::visit(const StreamExpr *op, const Expr &expr) { - const StreamExpr *node = expr_.as(); - compare_node_refs(op->buffer_var, node->buffer_var); -} - } // namespace diff --git a/tvm/HalideIR/src/ir/IRMutator.cpp b/tvm/HalideIR/src/ir/IRMutator.cpp index fbd3e82b5..13b346e93 100644 --- a/tvm/HalideIR/src/ir/IRMutator.cpp +++ b/tvm/HalideIR/src/ir/IRMutator.cpp @@ -480,8 +480,7 @@ void IRMutator::visit(const KernelDef *op, const Stmt &s) { stmt = s; } else { - stmt = KernelDef::make(op->args, op->api_args, op->api_types, - body, ret_void, op->ret_type, op->name, op->channels); + stmt = KernelDef::make(op->args, body, ret_void, op->ret_type, op->name); } } @@ -525,20 +524,6 @@ void IRMutator::visit(const KernelStmt *op, const Stmt &s) { } } -void IRMutator::visit(const StreamStmt *op, const Stmt &s) { - Expr value = mutate(op->value); - if (value.same_as(op->value)) { - stmt = s; - } else { - stmt = StreamStmt::make(op->buffer_var, value, - op->stream_type, op->depth); - } -} - -void IRMutator::visit(const StreamExpr *op, const Expr &e) { - expr = e; -} - void IRMutator::visit(const Return *op, const Stmt &s) { Expr value = mutate(op->value); if (value.same_as(op->value)) { diff --git a/tvm/HalideIR/src/ir/IRMutator.h b/tvm/HalideIR/src/ir/IRMutator.h index 4088ae5ea..1fea5fec6 100644 --- a/tvm/HalideIR/src/ir/IRMutator.h +++ b/tvm/HalideIR/src/ir/IRMutator.h @@ -99,8 +99,6 @@ class IRMutator : public IRVisitor { EXPORT virtual void visit(const Reuse *, const Stmt &); EXPORT virtual void visit(const Partition *, const Stmt &); EXPORT virtual void visit(const Stencil *, const Stmt &); - EXPORT virtual void visit(const StreamExpr *, const Expr &); - EXPORT virtual void visit(const StreamStmt *, const Stmt &); }; diff --git a/tvm/HalideIR/src/ir/IRPrinter.cpp b/tvm/HalideIR/src/ir/IRPrinter.cpp index b6f3e6082..6a3a5d651 100644 --- a/tvm/HalideIR/src/ir/IRPrinter.cpp +++ b/tvm/HalideIR/src/ir/IRPrinter.cpp @@ -336,19 +336,6 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) } }); -TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) -.set_dispatch([](const StreamStmt *op, IRPrinter* p) { - p->do_indent(); - p->stream << op->buffer_var << ".write("; - p->print(op->value); - p->stream << ")\n"; -}); - -TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) -.set_dispatch([](const StreamExpr *op, IRPrinter* p) { - p->stream << op->buffer_var << ".read()"; -}); - TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const Ramp *op, IRPrinter* p) { p->stream << "ramp("; @@ -736,16 +723,7 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) p->do_indent(); p->stream << "def " << op->name << "("; for (size_t i = 0; i < op->args.size(); i++) { - p->stream << op->args[i].type() << "("; // handle type p->print(op->args[i]); - if (op->api_args[i].size() > 1) { - p->stream << "["; - for (size_t j = 0; j < op->api_args[i].size(); j++) { - p->print(op->api_args[i][j]); - if (j < op->api_args[i].size() - 1) p->stream << "*"; - } - p->stream << "])"; - } if (i < op->args.size() - 1) { p->stream << ", "; } diff --git a/tvm/HalideIR/src/ir/IRVisitor.cpp b/tvm/HalideIR/src/ir/IRVisitor.cpp index 30e1fe86b..02880fdb4 100644 --- a/tvm/HalideIR/src/ir/IRVisitor.cpp +++ b/tvm/HalideIR/src/ir/IRVisitor.cpp @@ -137,9 +137,6 @@ void IRVisitor::visit(const Let *op, const Expr &) { op->body.accept(this); } -void IRVisitor::visit(const StreamExpr *op, const Expr &) { -} - void IRVisitor::visit(const LetStmt *op, const Stmt &) { op->value.accept(this); op->body.accept(this); @@ -172,10 +169,6 @@ void IRVisitor::visit(const Store *op, const Stmt &) { op->predicate.accept(this); } -void IRVisitor::visit(const StreamStmt *op, const Stmt &) { - op->value.accept(this); -} - void IRVisitor::visit(const Provide *op, const Stmt &) { op->value.accept(this); for (size_t i = 0; i < op->args.size(); i++) { @@ -273,10 +266,6 @@ void IRVisitor::visit(const Quantize *op, const Expr &) { void IRVisitor::visit(const KernelDef *op, const Stmt &) { for (size_t i = 0; i < op->args.size(); i++) { op->args[i].accept(this); - op->api_types[i].accept(this); - for (size_t j = 0; j < op->api_args[i].size(); j++) { - op->api_args[i][j].accept(this); - } } op->ret_void.accept(this); } @@ -585,10 +574,6 @@ void IRGraphVisitor::visit(const Quantize *op, const Expr &) { void IRGraphVisitor::visit(const KernelDef *op, const Stmt &) { for (size_t i = 0; i < op->args.size(); i++) { include(op->args[i]); - include(op->api_types[i]); - for (size_t j = 0; j < op->api_args[i].size(); j++) { - include(op->api_args[i][j]); - } } include(op->ret_void); } @@ -622,12 +607,6 @@ void IRGraphVisitor::visit(const Reuse *op, const Stmt &) { void IRGraphVisitor::visit(const Partition *op, const Stmt &) {} -void IRGraphVisitor::visit(const StreamExpr *op, const Expr &) {} - -void IRGraphVisitor::visit(const StreamStmt *op, const Stmt &) { - include(op->value); -} - void IRGraphVisitor::visit(const Stencil *op, const Stmt &) { include(op->body); } diff --git a/tvm/HalideIR/src/ir/IRVisitor.h b/tvm/HalideIR/src/ir/IRVisitor.h index a4faa4aba..931f1c5c9 100644 --- a/tvm/HalideIR/src/ir/IRVisitor.h +++ b/tvm/HalideIR/src/ir/IRVisitor.h @@ -79,8 +79,6 @@ class IRVisitor { EXPORT virtual void visit(const Reuse *, const Stmt &); EXPORT virtual void visit(const Partition *, const Stmt &); EXPORT virtual void visit(const Stencil *, const Stmt &); - EXPORT virtual void visit(const StreamStmt *, const Stmt &); - EXPORT virtual void visit(const StreamExpr *, const Expr &); }; /** A base class for algorithms that walk recursively over the IR @@ -161,8 +159,6 @@ class IRGraphVisitor : public IRVisitor { EXPORT virtual void visit(const Reuse *, const Stmt &); EXPORT virtual void visit(const Partition *, const Stmt &); EXPORT virtual void visit(const Stencil *, const Stmt &); - EXPORT virtual void visit(const StreamExpr *, const Expr &); - EXPORT virtual void visit(const StreamStmt *, const Stmt &); // @} }; diff --git a/tvm/Makefile b/tvm/Makefile index 1b2030645..1a78cbe7c 100644 --- a/tvm/Makefile +++ b/tvm/Makefile @@ -126,13 +126,6 @@ else CFLAGS += -DTVM_OPENCL_RUNTIME=0 endif -ifeq ($(USE_SDACCEL_HLS), 1) - CFLAGS += -DHCL_SDACCEL_RUNTIME=1 -else - CFLAGS += -DHCL_SDACCEL_RUNTIME=0 -endif - - ifeq ($(USE_VIVADO_HLS), 1) CFLAGS += -DHCL_VHLS_RUNTIME=1 else diff --git a/tvm/include/tvm/codegen.h b/tvm/include/tvm/codegen.h index 4d6be0230..3877db941 100644 --- a/tvm/include/tvm/codegen.h +++ b/tvm/include/tvm/codegen.h @@ -42,7 +42,6 @@ runtime::Module Build(const Array& funcs, * \return cstr The C string representation of the file. */ std::string PackImportsToC(const runtime::Module& m, bool system_lib); - } // namespace codegen } // namespace TVM diff --git a/tvm/include/tvm/ir.h b/tvm/include/tvm/ir.h index 8a26e551c..e66db3fb4 100644 --- a/tvm/include/tvm/ir.h +++ b/tvm/include/tvm/ir.h @@ -21,8 +21,6 @@ using Halide::Internal::StmtNode; using Halide::Internal::IRNodeType; using Halide::Internal::ForType; using Halide::Internal::PartitionType; -using Halide::Internal::StreamType; -using Halide::Internal::DeviceType; using Halide::DeviceAPI; // Node container for CommReducer @@ -234,8 +232,6 @@ constexpr const char* pipeline_exec_scope = "pipeline_exec_scope"; constexpr const char* opengl_stage_scope = "opengl_stage_scope"; constexpr const char* attach_scope = "attach_scope"; - -constexpr const char* device_scope = "device_scope"; } // namespace attr /*! \brief namespace of TVM Intrinsic functions */ @@ -505,8 +501,6 @@ using Halide::Internal::Quantize; using Halide::Internal::KernelDef; using Halide::Internal::KernelExpr; using Halide::Internal::KernelStmt; -using Halide::Internal::StreamExpr; -using Halide::Internal::StreamStmt; using Halide::Internal::Return; using Halide::Internal::Break; using Halide::Internal::While; diff --git a/tvm/include/tvm/ir_functor_ext.h b/tvm/include/tvm/ir_functor_ext.h index 39ce6d2b8..c4f18ba7e 100644 --- a/tvm/include/tvm/ir_functor_ext.h +++ b/tvm/include/tvm/ir_functor_ext.h @@ -148,7 +148,6 @@ class ExprFunctor { virtual R VisitExpr_(const SetSlice* op, Args... args) EXPR_FUNCTOR_DEFAULT; virtual R VisitExpr_(const Quantize* op, Args... args) EXPR_FUNCTOR_DEFAULT; virtual R VisitExpr_(const KernelExpr* op, Args... args) EXPR_FUNCTOR_DEFAULT; - virtual R VisitExpr_(const StreamExpr* op, Args... args) EXPR_FUNCTOR_DEFAULT; virtual R VisitExprDefault_(const Node* op, Args ...) { LOG(FATAL) << "Do not have a default for " << op->type_key(); return R(); @@ -194,7 +193,6 @@ class ExprFunctor { IR_EXPR_FUNCTOR_DISPATCH(SetSlice); IR_EXPR_FUNCTOR_DISPATCH(Quantize); IR_EXPR_FUNCTOR_DISPATCH(KernelExpr); - IR_EXPR_FUNCTOR_DISPATCH(StreamExpr); return vtable; } }; @@ -246,7 +244,6 @@ class StmtFunctor { virtual R VisitStmt_(const Evaluate* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const KernelDef* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const KernelStmt* op, Args... args) STMT_FUNCTOR_DEFAULT; - virtual R VisitStmt_(const StreamStmt* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const Return* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const Break* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const While* op, Args... args) STMT_FUNCTOR_DEFAULT; @@ -278,7 +275,6 @@ class StmtFunctor { IR_STMT_FUNCTOR_DISPATCH(Evaluate); IR_STMT_FUNCTOR_DISPATCH(KernelDef); IR_STMT_FUNCTOR_DISPATCH(KernelStmt); - IR_STMT_FUNCTOR_DISPATCH(StreamStmt); IR_STMT_FUNCTOR_DISPATCH(Return); IR_STMT_FUNCTOR_DISPATCH(Break); IR_STMT_FUNCTOR_DISPATCH(While); diff --git a/tvm/include/tvm/ir_mutator.h b/tvm/include/tvm/ir_mutator.h index 200534644..964684ec1 100644 --- a/tvm/include/tvm/ir_mutator.h +++ b/tvm/include/tvm/ir_mutator.h @@ -77,7 +77,6 @@ class TVM_DLL IRMutator { virtual Stmt Mutate_(const Reuse* op, const Stmt& s); virtual Stmt Mutate_(const Partition* op, const Stmt& s); virtual Stmt Mutate_(const Stencil* op, const Stmt& s); - virtual Stmt Mutate_(const StreamStmt* op, const Stmt& s); virtual Expr Mutate_(const Variable* op, const Expr& e); virtual Expr Mutate_(const Load* op, const Expr& e); @@ -115,7 +114,6 @@ class TVM_DLL IRMutator { virtual Expr Mutate_(const SetSlice* op, const Expr& e); virtual Expr Mutate_(const Quantize* op, const Expr& e); virtual Expr Mutate_(const KernelExpr* op, const Expr& e); - virtual Expr Mutate_(const StreamExpr* op, const Expr& e); }; /*! diff --git a/tvm/include/tvm/ir_pass.h b/tvm/include/tvm/ir_pass.h index dfba91d32..88c29f32c 100644 --- a/tvm/include/tvm/ir_pass.h +++ b/tvm/include/tvm/ir_pass.h @@ -214,14 +214,6 @@ Stmt StorageFlatten(Stmt stmt, */ Stmt RemoveNoOp(Stmt stmt); -/*! - * \brief Infer device scope. - * \param stmt The stmt to be trasnformed - * \param bus_bandwidth The bandwisth of the stream bus - * \return Transformed stmt. - */ -Stmt InferStream(Stmt stmt, int bus_bandwidth); - /*! * \brief Split statement into pipeine stages. * \param stmt The stmt to be splitted diff --git a/tvm/include/tvm/ir_visitor.h b/tvm/include/tvm/ir_visitor.h index 21ef77c32..6fe616aab 100644 --- a/tvm/include/tvm/ir_visitor.h +++ b/tvm/include/tvm/ir_visitor.h @@ -131,8 +131,6 @@ class TVM_DLL IRVisitor { virtual void Visit_(const KernelDef* op); virtual void Visit_(const KernelExpr* op); virtual void Visit_(const KernelStmt* op); - virtual void Visit_(const StreamExpr* op); - virtual void Visit_(const StreamStmt* op); virtual void Visit_(const Return* op); virtual void Visit_(const Break* op); virtual void Visit_(const While* op); diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index faacc7d96..9dc1956c8 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -351,31 +351,11 @@ class Schedule : public NodeRef { const IterVar& axis, int factor_axis = 0); - EXPORT Tensor reuse_at(const Tensor& target, - Stage parent, + EXPORT Tensor reuse_at(const Tensor& target, + Stage parent, IterVar axis, std::string name); - EXPORT void to_stage(const Tensor& target, - Stage dest, - int arg_pos, - ir::StreamType stream_type, - int channel_depth, - std::string name); - - EXPORT Tensor move_to(const Tensor& target, - ir::DeviceType device_type, - ir::StreamType stream_type, - int channel_depth, - std::string new_name); - - EXPORT void stream_to(const Tensor& target, - Stage dest, - Stage source, - ir::StreamType stream_type, - int channel_depth, - std::string new_name); - EXPORT Tensor partition(const Tensor& target, int dim, int factor, ir::PartitionType partition_type); @@ -401,8 +381,6 @@ class Schedule : public NodeRef { inline ScheduleNode* operator->(); // declare container type using ContainerType = ScheduleNode; - // insertion point for host & xcel separation - static int split_bound; }; /*! diff --git a/tvm/src/api/api_ir.cc b/tvm/src/api/api_ir.cc index 8edb1a0e8..825f7580d 100644 --- a/tvm/src/api/api_ir.cc +++ b/tvm/src/api/api_ir.cc @@ -176,20 +176,6 @@ TVM_REGISTER_API("make.Select") *ret = Node::make(args[0], args[1], args[2], args[3], args[4], args[5]); \ }) \ -#define REGISTER_MAKE7(Node) \ - TVM_REGISTER_API("make."#Node) \ - .set_body([](TVMArgs args, TVMRetValue *ret) { \ - *ret = Node::make(args[0], args[1], args[2], args[3], \ - args[4], args[5], args[6]); \ - }) \ - -#define REGISTER_MAKE8(Node) \ - TVM_REGISTER_API("make."#Node) \ - .set_body([](TVMArgs args, TVMRetValue *ret) { \ - *ret = Node::make(args[0], args[1], args[2], args[3], \ - args[4], args[5], args[6], args[7]); \ - }) \ - #define REGISTER_MAKE_BINARY_OP(Node) \ TVM_REGISTER_API("make."#Node) \ .set_body([](TVMArgs args, TVMRetValue *ret) { \ @@ -236,7 +222,7 @@ REGISTER_MAKE3(GetSlice); REGISTER_MAKE3(SetBit); REGISTER_MAKE4(SetSlice); REGISTER_MAKE2(Quantize); -REGISTER_MAKE8(KernelDef); +REGISTER_MAKE5(KernelDef); REGISTER_MAKE3(KernelExpr); REGISTER_MAKE2(KernelStmt); REGISTER_MAKE1(Return); diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index 543e816aa..f07d590a5 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -461,31 +461,6 @@ TVM_REGISTER_API("_SchedulePartition") static_cast(args[4].operator int())); }); -TVM_REGISTER_API("_ScheduleMoveToStage") - .set_body([](TVMArgs args, TVMRetValue *ret) { - args[0].operator Schedule() - .to_stage(args[1], args[2], args[3], - static_cast(args[4].operator int()), - args[5], args[6]); - }); - -TVM_REGISTER_API("_ScheduleMove") - .set_body([](TVMArgs args, TVMRetValue *ret) { - *ret = args[0].operator Schedule() - .move_to(args[1], - static_cast(args[2].operator int()), - static_cast(args[3].operator int()), - args[4], args[5]); - }); - -TVM_REGISTER_API("_ScheduleStream") - .set_body([](TVMArgs args, TVMRetValue *ret) { - args[0].operator Schedule() - .stream_to(args[1], args[2], args[3], - static_cast(args[4].operator int()), - args[5], args[6]); - }); - TVM_REGISTER_API("_ScheduleReshape") .set_body([](TVMArgs args, TVMRetValue *ret) { args[0].operator Schedule().reshape(args[1], args[2]); diff --git a/tvm/src/api/api_pass.cc b/tvm/src/api/api_pass.cc index 1728b0c23..348b8816e 100644 --- a/tvm/src/api/api_pass.cc +++ b/tvm/src/api/api_pass.cc @@ -122,7 +122,6 @@ REGISTER_PASS1(InjectPrefetch); REGISTER_PASS2(InjectDoubleBuffer); REGISTER_PASS2(LoopPartition); REGISTER_PASS1(RemoveNoOp); -REGISTER_PASS2(InferStream); REGISTER_PASS2(SplitPipeline); REGISTER_PASS2(LiftAttrScope); REGISTER_PASS1(NarrowChannelAccess); diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc deleted file mode 100644 index 8bdbf7e98..000000000 --- a/tvm/src/codegen/build_common.cc +++ /dev/null @@ -1,220 +0,0 @@ -/*! - * Copyright (c) 2019 by Contributors - * \file build_common.cc - * \brief Build unified simulation module - */ -#include -#include -#include -#include -#include -#include -#include "./build_common.h" -#include "./build_util.h" - -#include -#include -#include -#include -#include - -#include "merlinc/codeanalys_merlinc.h" -#include "hlsc/codegen_vhls.h" -#include "opencl/codegen_aocl.h" -#include "ppac/codegen_rv64_ppac.h" - -namespace TVM { -namespace runtime { - -class SimModuleNode final : public ModuleNode { - public: - SimModuleNode(LoweredFunc func, - std::string host_code, - argInfo arg_info, - std::string dev_code, std::string platform, - std::unordered_map options) - : func_(func), - host_(host_code), - arg_info_(arg_info), - dev_(dev_code), - platform_(platform), - options_(options) { - } - - const char* type_key() const { - return "unified_sim"; - } - - // unified simulation function - PackedFunc GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self) final { - return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ - - if (args.size() != (int)func_->args.size()) - LOG(FATAL) << "The function should take in " << func_->args.size() - << " inputs but get " << args.size(); - std::vector shmids; - std::vector arg_sizes; - std::vector arg_types; - - CollectArgInfo(args, func_, arg_sizes, arg_types); - GenSharedMem(args, shmids, arg_sizes); - - LOG(CLEAN) << "Generating harness files ..."; - system("rm -rf __tmp__; mkdir __tmp__"); - std::string path; - if (const auto* f = Registry::Get("get_util_path")) - path = (*f)(platform_).operator std::string(); - system(("cp -r " + path + "/* __tmp__/").c_str()); - LOG(CLEAN) << "Running SW simulation on " + platform_; - - if (platform_ == "sdaccel") { - GenWrapperCode(args, shmids, arg_types, arg_info_, func_); - GenHostCode(args, shmids, arg_types, func_, - platform_, host_, arg_info_); - GenKernelCode(dev_); - - LOG(CLEAN) << "Running SW simulation ..."; - system("cd __tmp__; source ./run_sw.sh"); - - } else if (platform_ == "rocket") { - // generate host and run proxy kernel test - GenHostCode(args, shmids, arg_types, func_, - platform_, host_, arg_info_); - std::string compile = "cd __tmp__;"; - compile += std::string("autoconf; mkdir build; cd build;") + - std::string("../configure --with-riscvtools=") + - options_["RISCV"] + std::string(";make -j8"); - system(compile.c_str()); - - } else if (platform_ == "vivado_hls") { - GenHostCode(args, shmids, arg_types, func_, - platform_, host_, arg_info_); - GenKernelCode(dev_); - system("cd __tmp__; make csim"); - } else { - LOG(FATAL) << "unrecognized platform " << platform_; - } - - // clean & extract resource information - FreeSharedMem(args, shmids, arg_sizes); - if (const auto* f = Registry::Get("tvm_callback_syn_postproc")) { - std::string code; - code = (*f)("test").operator std::string(); - LOG(CLEAN) << "extract res info"; - } - }); - } - - private: - LoweredFunc func_; - std::string host_; - argInfo arg_info_; - std::string dev_; - std::string platform_; - std::unordered_map options_; -}; - -using var2nameType = std::unordered_map>>; - -Module CreateSimModule( - LoweredFunc func, - std::string host_code, - std::string dev_code, - argInfo arg_types, - std::string platform, - std::unordered_map options) { - std::shared_ptr n = - std::make_shared(func, host_code, - arg_types, dev_code, - platform, options); - return Module(n); -} -} // namespace runtime - -namespace codegen { -using var2nameType = std::unordered_map>>; - -using argInfo = - std::vector>>; - -// unified simulation function for diff platforms -template -runtime::Module BuildSimModule(Array funcs, - Array attrs, - Array values) { - CodeAnalysMerlinC ca; - CGHost cg_host; - CGXcel cg_dev; - - for (LoweredFunc f : funcs) { - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); - cg_host.AddFunction(f, map_arg_type); - cg_dev.AddFunction(f, map_arg_type); - } - // vector {vars} - auto& arg_vars = cg_dev.arg_vars; - // map {var : is_streamed(bool) } - auto& stream_table = cg_dev.stream_table; - // map {var : (vid, Type, shape)} - auto& arg_top_vars = cg_dev.arg_top_vars; - - argInfo arg_info; - for (size_t i = 0 ; i < arg_vars.size(); i++) { - auto v = arg_vars[i]; - auto nameType = arg_top_vars[v]; - bool is_stream; - if (stream_table[v]) - is_stream = true; - else is_stream = false; - auto item = std::make_tuple( - /*var name*/std::get<0>(nameType), - /*whether is streamed*/is_stream, - /*data type*/std::get<1>(nameType), - /*shape*/std::get<2>(nameType)); - arg_info.push_back(item); - } - // tool option mapping and platform - std::string platform = values[0].as()->value; - std::unordered_map options; - for (size_t k = 1; k < attrs.size(); k++) { - auto key = attrs[k].as()->value; - auto val = values[k].as()->value; - options[key] = val; - } - return runtime::CreateSimModule(funcs[0], - cg_host.GetHost(), - cg_dev.GetDevice(), - arg_info, platform, options); -} - -TVM_REGISTER_API("codegen.build_sim") -.set_body([](TVMArgs args, TVMRetValue* rv) { - // dispatch to corr codegen - auto& sptr = args[2].node_sptr(); - CHECK(sptr->is_type()); - auto* n = static_cast(sptr.get()); - auto data = n->data[static_cast(0)]; - - // create module node for simulation - std::string type = Expr(data).as()->value; - if (type == "rocket") { - *rv = BuildSimModule - (args[0], args[1], args[2]); - } else if (type == "sdaccel") { - *rv = BuildSimModule - (args[0], args[1], args[2]); - } else if (type == "vivado_hls") { - *rv = BuildSimModule - (args[0], args[1], args[2]); - } else { - } - }); - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/build_common.h b/tvm/src/codegen/build_common.h index f9f42d219..ee8cbc509 100644 --- a/tvm/src/codegen/build_common.h +++ b/tvm/src/codegen/build_common.h @@ -29,7 +29,6 @@ ExtractFuncInfo(const Array& funcs) { } return fmap; } - } // namespace codegen } // namespace TVM #endif // TVM_CODEGEN_BUILD_COMMON_H_ diff --git a/tvm/src/codegen/build_opencl.cc b/tvm/src/codegen/build_opencl.cc new file mode 100644 index 000000000..5054085cd --- /dev/null +++ b/tvm/src/codegen/build_opencl.cc @@ -0,0 +1,44 @@ +/*! + * Copyright (c) 2017 by Contributors + * Build opencl modules from source. + * \file build_opencl.cc + */ +#include +#include +#include "./codegen_opencl.h" +#include "./build_common.h" + +#if TVM_OPENCL_RUNTIME +#include "../runtime/opencl/opencl_module.h" +#endif // TVM_OPENCL_RUNTIME + +namespace TVM { +namespace codegen { + +runtime::Module BuildOpenCL(Array funcs) { + using TVM::runtime::Registry; + bool output_ssa = false; + CodeGenOpenCL cg; + cg.Init(output_ssa); + for (LoweredFunc f : funcs) { + cg.AddFunction(f); + } + std::string code = cg.Finish(); + + if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { + code = (*f)(code).operator std::string(); + } +#if TVM_OPENCL_RUNTIME + return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs)); +#else + LOG(WARNING) << "OpenCL runtime not enabled, return a source module..."; + return DeviceSourceModuleCreate(code, "cl", ExtractFuncInfo(funcs), "opencl"); +#endif // TVM_OPENCL_RUNTIME +} + +TVM_REGISTER_API("codegen.build_opencl") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildOpenCL(args[0]); + }); +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/build_util.cc b/tvm/src/codegen/build_util.cc deleted file mode 100644 index e0a5f8b2d..000000000 --- a/tvm/src/codegen/build_util.cc +++ /dev/null @@ -1,812 +0,0 @@ -/*! - * Copyright (c) 2019 by Contributors - * \file build_common.cc - * \brief Build unified simulation module - */ -#include -#include -#include -#include -#include -#include -#include "./build_common.h" -#include "./build_util.h" - -#include -#include -#include -#include -#include - -#include "merlinc/codeanalys_merlinc.h" -#include "hlsc/codegen_vhls.h" -#include "opencl/codegen_aocl.h" -#include "ppac/codegen_rv64_ppac.h" - -namespace TVM { -namespace runtime { - -std::string getpath(void) { - char buff[256]; - getcwd(buff, 256); - std::string cwd(buff); - return cwd; -} - -void PrintIndent(std::ofstream& stream, int indent) { - for (int i = 0; i < indent; i++) - stream << ' '; -} - -inline size_t GetTypeSize(TVMType t) { - size_t byte = (t.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - return byte; -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size_t byte = (arr->dtype.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - size *= (byte * 8 * arr->dtype.lanes + 7) / 8; - return size; -} - -inline TVMType Type2TVMType(Type t) { - TVMType tt; - if (t.is_int()) tt.code = kDLInt; - else if (t.is_uint()) tt.code = kDLUInt; - else if (t.is_float()) tt.code = kDLFloat; - else LOG(FATAL) << "Unacceptable type: " << t; - tt.bits = static_cast(t.bits()); - tt.fracs = static_cast(t.fracs()); - return tt; -} - -inline std::string PrintHalideType(Type t) { - std::string str = ""; - if (t.is_uint() || t.is_int() || t.is_fixed() || t.is_ufixed()) { - if (t.is_uint()) str += "ap_uint<" + std::to_string(t.bits()) + ">"; - else if (t.is_int()) str += "ap_int<" + std::to_string(t.bits()) + ">"; - else if (t.is_ufixed()) str += "ap_ufixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; - else str += "ap_fixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; - } else { - LOG(FATAL) << "Cannot convert type " << t << " to C type"; - } - return str; -} - -inline std::string Type2Str(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) str += "ap_fixed<"; - else str += "ap_int<"; - str += std::to_string(static_cast(t.bits)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) str += "ap_ufixed<"; - else str += "ap_uint<"; - str += std::to_string(static_cast(t.bits)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2ExtStr(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) str += "ap_fixed<"; - else str += "ap_int<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) str += "ap_ufixed<"; - else str += "ap_uint<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2WrapStr(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) { - str += "ap_fixed<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - } else { - str += "ap_int<"; - if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - } - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) { - str += "ap_ufixed<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - } else { - str += "ap_uint<"; - if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - } - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2Byte(TVMType t) { - std::string str = ""; - if (t.code == kDLFloat) { - str += "float"; - } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "u"; - str += "int"; - if (t.bits <= 8) str += "8"; - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - str += "_t"; - } - return str; -} - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - arg_sizes.push_back(GetDataSize(arr)); - arg_types.push_back(arr->dtype); - } else { - const Variable* var = func->api_args[i].as(); - TVMType t = Type2TVMType(var->type); - arg_sizes.push_back(GetTypeSize(t)); - arg_types.push_back(t); - } - } -} - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } - } -} - -// copy values from the shared mem to local mem -void PrintCopy(TVMArray* arr, - argInfo& arg_info, - std::ofstream& stream, - int indent, size_t nth_arr) { - for (int i = 0; i < arr->ndim; i++) { - PrintIndent(stream, indent); - stream << "for (size_t i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - if (i == arr->ndim - 1) { - PrintIndent(stream, indent); - stream << std::get<0>(arg_info[nth_arr]); - stream << "[i" << arr->ndim-1; - int mul2 = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul2 *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul2; - } - stream << "]"; - - stream << " = ("; - // stream << Type2ExtStr(arr->dtype); - stream << Type2Byte(arr->dtype); - - stream << ")(arg_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << "])"; - if (arr->dtype.fracs > 0) - stream << " >> " << static_cast(arr->dtype.fracs); - stream << ";\n"; - } - } - for (int i = 0; i < arr->ndim; i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } -} - -// copy values from local mem back to shared mem -void PrintCopyBack(TVMArray* arr, - argInfo& arg_info, - std::ofstream& stream, - int indent, size_t nth_arr) { - for (int i = 0; i < arr->ndim; i++) { - PrintIndent(stream, indent); - stream << "for (size_t i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - if (i == arr->ndim-1) { - PrintIndent(stream, indent); - stream << "arg_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << "] = ("; - stream << Type2Byte(arr->dtype); - stream << ")(" << std::get<0>(arg_info[nth_arr]); - stream << "[i" << arr->ndim - 1; - int mul2 = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul2 *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul2; - } - - stream << "])"; - if (arr->dtype.fracs > 0) - stream << " << " << static_cast(arr->dtype.fracs); - stream << ";\n"; - } - } - for (int i = 0; i < arr->ndim; i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } -} - -void GenKernelCode(std::string test_file) { - std::ofstream stream; - stream.open("__tmp__/kernel.cpp"); - stream << test_file; - stream.close(); -} - -// interface pragma to specify mem and ctrl interface in sdx -void GenWrapperCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - argInfo& arg_stream_types, - LoweredFunc func) { - std::ofstream stream; - int indent = 0; - std::string path(getenv("PWD")); - stream.open("__tmp__/interface.cpp"); - stream << "#include \n"; - stream << "#include \"" + path + "/__tmp__/kernel.cpp\"\n"; - stream << "\n\n"; - stream << "extern \"C\" \n"; - stream << "{\n"; - indent += 2; - PrintIndent(stream, indent); - - // wrapper func interface - stream << "void App( "; - size_t ex_arg_count = 0; - ex_arg_count = arg_stream_types.size() - arg_types.size(); - for (size_t i = 0; i < arg_types.size(); i++) { - if (i != 0) stream << ", "; - stream << Type2WrapStr(arg_types[i]); - stream << "*"; - stream << " source_wrapper_" << i; - } - for (size_t k = 0; k < ex_arg_count; k++) { - if (k != ex_arg_count) stream << ", "; - stream << PrintHalideType(std::get<2>(arg_stream_types[k + arg_types.size()])); - stream << "*"; - stream << " source_wrapper_" << k + arg_types.size(); - } - stream << " ) {\n"; - - // memeory and control pragma - for (size_t i = 0; i < arg_stream_types.size(); i++) { - std::string interface; - if (std::get<1>(arg_stream_types[i])) interface = " m_axi "; - else interface = " m_axi "; - PrintIndent(stream, indent); - stream << "#pragma HLS INTERFACE" + interface + "port="; - stream << "source_wrapper_" << i; - stream << " offset=slave bundle=gmem\n"; - } - for (size_t i = 0; i < arg_stream_types.size(); i++) { - std::string interface; - if (std::get<1>(arg_stream_types[i])) interface = " s_axilite "; - else interface = " s_axilite "; - PrintIndent(stream, indent); - stream << "#pragma HLS INTERFACE" + interface + "port="; - stream << "source_wrapper_" << i; - stream << " bundle=control\n"; - } - PrintIndent(stream, indent); - stream << "#pragma HLS INTERFACE s_axilite port=return bundle=control\n"; - stream << "\n"; - - // intermediate vars init alloc - for (size_t i = 0; i < arg_stream_types.size(); i++) { - PrintIndent(stream, indent); - stream << PrintHalideType(std::get<2>(arg_stream_types[i])); - stream << " source_wrapper_temp_" << i; - auto shape = std::get<3>(arg_stream_types[i]); - for (size_t j = 0; j < shape.size(); j++) - stream << "[" << shape[j] << "]"; - if (shape.size() == 0) stream << "[1]"; - stream << ";\n"; - } - - // vars init for values - for (size_t i = 0; i < arg_stream_types.size(); i++) { - auto shape = std::get<3>(arg_stream_types[i]); - for (size_t j = 0; j < shape.size(); j++) { - PrintIndent(stream, indent); - stream << "for (int i" << j << " = 0; "; - stream << "i" << j << " < " << shape[j] << "; "; - stream << "i" << j << "++) {\n"; - indent += 2; - if (j == shape.size() - 1) { - PrintIndent(stream, indent); - stream << "source_wrapper_temp_" << i; - for (size_t k = 0; k < shape.size(); k++) { - stream << "[i" << k << "]"; - } - stream << " = "; - stream << "source_wrapper_" << i; - stream << "[i" << shape.size() - 1; - int mul = 1; - for (size_t k = shape.size() - 1; k > 0; k--) { - mul *= shape[k]; - stream << "+ i" << k - 1 << "*" << mul; - } - stream << "];\n"; - } - } - for (size_t j = 0; j < shape.size(); j++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } - if (shape.size() == 0) { - PrintIndent(stream, indent); - stream << "source_wrapper_temp_" << i; - stream << "[0] = source_wrapper_" << i << "[0];\n"; - } - } - - // print top func - stream << "\n"; - PrintIndent(stream, indent); - stream << "top( "; - for (size_t i = 0;i < arg_stream_types.size(); i++) { - if (i != arg_stream_types.size() - 1){ - stream << "source_wrapper_temp_" << i; - stream << ", "; - } else { - stream << "source_wrapper_temp_" << i; - stream << ");\n"; - } - - } - stream << "\n"; - - // read back return val - for (int k = arg_stream_types.size() - 1; - k > args.size() - 2; k--) { - auto shape = std::get<3>(arg_stream_types[k]); - for (size_t i = 0; i < shape.size(); i++) { - PrintIndent(stream, indent); - stream << "for (int i" << i << " = 0; "; - stream << "i" << i << " < " << shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - - if (i == shape.size() - 1) { - PrintIndent(stream, indent); - stream << "source_wrapper_" << k; - stream << "[i" << shape.size() - 1; - int mul = 1; - for (size_t j = shape.size() - 1; j > 0; j--) { - mul *= shape[j]; - stream << " + i" << j - 1 << "*" << mul; - } - stream << " ] = "; - - stream << "source_wrapper_temp_" << k; - for (size_t j = 0; j < shape.size(); j++) { - stream << "[i" << j << "]"; - } - stream <<";\n"; - } - } - for (size_t i = 0;i < shape.size(); i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } - } - stream << "}\n"; - indent -= 2; - stream << "}\n"; - stream.close(); -} - -// generate opencl wrapper for sdaccel sim -void GenHostHeaders(std::ofstream& stream, - std::string platform) { - stream << "#include \n"; - stream << "#include \n\n"; - stream << "// standard C/C++ headers\n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n\n"; - - if (platform == "sdaccel") { - stream << "// opencl harness headers\n"; - stream << "#include \"CLWorld.h\"\n"; - stream << "#include \"CLKernel.h\"\n"; - stream << "#include \"CLMemObj.h\"\n"; - stream << "#include \"utils.h\"\n"; - stream << "// harness namespace\n"; - stream << "using namespace rosetta;\n"; - } else if (platform == "vivado_hls") { - stream << "// vivado hls headers\n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \"kernel.cpp\"\n\n"; - } -} - -// initialization before executing kernel -void KernelInit(std::ofstream& stream, - std::string platform, - TVMArgs& args, - const std::vector& arg_types, - argInfo& arg_stream_types) { - int indent = 2; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// parse command line arguments for opencl version\n"; - PrintIndent(stream, indent); - stream << "std::string kernelFile(\"\");\n"; - PrintIndent(stream, indent); - stream << "parse_sdaccel_command_line_args(argc, argv, kernelFile);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// create OpenCL world\n"; - PrintIndent(stream, indent); - stream << "CLWorld world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// add the bitstream file\n"; - PrintIndent(stream, indent); - stream << "dworld.addProgram(kernelFile);\n"; - stream << "\n\n"; - PrintIndent(stream, indent); - stream << "// create kernels\n"; - PrintIndent(stream, indent); - stream << "CLKernel App(world.getContext(), world.getProgram(), \"App\", world.getDevice());\n"; - stream << "\n\n"; - - PrintIndent(stream, indent); - stream << "// create mem objects\n"; - for (int i = 0;i < args.size(); i++) { - PrintIndent(stream, indent); - stream << "CLMemObj source_" << i; - stream << "((void*)arg_top_" << i; - stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; - - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - if (j==0) { - stream << arr->shape[j] << " "; - } else { - stream << "* " << arr->shape[j]; - } - } - } else { - stream << "1"; - } - stream << ", "; - stream << "CL_MEM_READ_WRITE);\n"; - } - // additional streamed data - for (size_t k = args.size(); k < arg_stream_types.size(); k++) { - auto type = std::get<2>(arg_stream_types[k]); - auto shape = std::get<3>(arg_stream_types[k]); - PrintIndent(stream, indent); - stream << "CLMemObj source_" << k; - stream << "((void*)knn_mat"; - stream << ", sizeof(" << Type2Byte(Type2TVMType(type)) << "), "; - if (shape.size() > 0) { - for (size_t j = 0; j < shape.size(); j++) { - if (j == 0) { - stream << shape[j] << " "; - } else { - stream << "* " << shape[j]; - } - } - } else { - stream << "1"; - } - stream << ", "; - stream << "CL_MEM_READ_WRITE);\n"; - } - - stream << "\n"; - PrintIndent(stream, indent); - stream << "// add them to the world\n"; - for (size_t i = 0;i < arg_stream_types.size();i++) { - PrintIndent(stream, indent); - stream << "world.addMemObj(source_" << i; - stream << ");\n"; - } - - stream << "\n\n"; - PrintIndent(stream, indent); - stream << " // set work size\n"; - PrintIndent(stream, indent); - int size = arg_stream_types.size(); - std::string arr = "[" + std::to_string(size) + "] = {"; - for (int i = 0; i < size; i++) { - if (i != size -1) arr += "1, "; - else arr += "1};\n"; - } - stream << "int global_size" + arr; - PrintIndent(stream, indent); - stream << "int local_size" + arr; - PrintIndent(stream, indent); - stream << "App.set_global(global_size);\n"; - PrintIndent(stream, indent); - stream << "App.set_local(local_size);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// add them to the world\n"; - PrintIndent(stream, indent); - stream << "world.addKernel(App);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// set kernel arguments\n"; - for (size_t i = 0; i < arg_stream_types.size(); i++) { - PrintIndent(stream, indent); - stream << "world.setMemKernelArg(0, "<< i << ", " << i; - stream << ");\n"; - } - - stream << "\n"; - PrintIndent(stream, indent); - stream << "// run\n"; - PrintIndent(stream, indent); - stream << "world.runKernels();\n\n"; - PrintIndent(stream, indent); - stream << "// read the data back\n"; - for (size_t i = args.size() - 1; i < arg_stream_types.size(); i++) { - PrintIndent(stream, indent); - stream << "world.readMemObj(" << i << ");\n"; - } -} - -// generate host code according to platform type -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc lowered_func, - std::string platform, - std::string host_code, - argInfo& arg_info) { - int indent = 0; - std::ofstream stream; - stream.open("__tmp__/host.cpp"); - GenHostHeaders(stream, platform); - - stream << "int main(int argc, char ** argv) {\n"; - indent += 2; - - int cnt = 0; // label the constant value - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << "*)"; - stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - PrintIndent(stream, indent); - - stream << Type2Byte(arg_types[i]) << " "; - stream << std::get<0>(arg_info[i]); - TVMArray* arr = args[i]; - - stream << "["; - for (int j = 0; j < arr->ndim; j++) { - if (j == arr->ndim - 1) { - stream << arr->shape[j]; - } else { - stream << arr->shape[j]; - stream << " * "; - } - } - stream << "];\n"; - PrintCopy(arr, arg_info, stream, indent, i); - - } else { - // directly assign the value to the variable - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << ")"; - if (args[i].type_code() == kDLInt || - args[i].type_code() == kDLUInt) { - stream << int64_t(args[i]); - } - stream << ";\n"; - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_top_" << i; - stream << "[1] = { "; - - stream << "arg_" << i << " }"; - if (arg_types[i].fracs > 0) - stream << " >> " << static_cast(arg_types[i].fracs); - stream << ";\n"; - cnt += 1; - } - stream << "\n"; - } - - // allocate mem for stream vars - for (size_t k = args.size(); k < arg_info.size(); k++) { - auto type = std::get<2>(arg_info[k]); - auto shape = std::get<3>(arg_info[k]); - PrintIndent(stream, indent); - stream << Type2Byte(Type2TVMType(type)) << " " << "name["; - if (shape.size() > 0) { - for (size_t i = 0; i < shape.size(); i++) { - if (i != shape.size() - 1) - stream << shape[i] << " * "; - else stream << shape[i]; - } - } else { - stream << "1"; - } - stream << "];\n"; - } - - // generate host side (before kernel) - PrintIndent(stream, indent); - stream << "printf(\"Finished setting up shared memory\\n\");\n"; - PrintIndent(stream, indent); - stream << "// compute bofore kernel function\n"; - size_t pos = host_code.find("top("); - std::string pre_kernel = host_code.substr(0, pos -1); - std::string post_kernel = host_code.substr(host_code.find('\n', pos) + 1); - pre_kernel = pre_kernel.substr(pre_kernel.find_first_not_of("\n")); - pre_kernel = pre_kernel.substr(pre_kernel.find_first_not_of(" ")); - PrintIndent(stream, indent); - - if (platform == "sdaccel") { - // create variable wrapper - stream << pre_kernel << "\n"; - KernelInit(stream, platform, args, - arg_types, arg_info); - } else if (platform == "vivado_hls") { - // init hls stream channels - for (size_t k = 0; k < arg_info.size(); k++) { - auto info = arg_info[k]; - if (std::get<1>(info)) { - PrintIndent(stream, indent); - stream << "hls::stream<" - << PrintHalideType(std::get<2>(info)) - << "> " << "fd_" << std::get<0>(info) << ";\n"; - } - } - PrintIndent(stream, indent); - stream << pre_kernel << "\n"; - PrintIndent(stream, indent); - // create kernel call from host - stream << "top("; - for (size_t i = 0; i < arg_info.size(); i++) { - auto info = arg_info[i]; - auto name = std::get<0>(info); - if (i != 0) stream << ", "; - stream << "fd_" << name; - } - stream << ");\n"; - } - - // generate host (post-kernel) - PrintIndent(stream, indent); - stream << "// compute after kernel function\n"; - stream << post_kernel; - - // copy to shared mem - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, arg_info, stream, indent, i); - PrintIndent(stream, indent); - stream << "shmdt("; - stream << "arg_" << i << ");\n"; - } - } - - stream << "\n\n"; - PrintIndent(stream, indent); - stream << "}\n"; - stream.close(); - -} -} // namespace runtime -} // namespace TVM diff --git a/tvm/src/codegen/build_util.h b/tvm/src/codegen/build_util.h deleted file mode 100644 index ca95364c1..000000000 --- a/tvm/src/codegen/build_util.h +++ /dev/null @@ -1,70 +0,0 @@ -/*! - * Copyright (c) 2019 by Contributors - * Common build utilities - * \file build_util.h - */ -#ifndef TVM_CODEGEN_BUILD_HELPER_H_ -#define TVM_CODEGEN_BUILD_HELPER_H_ - -#include -#include -#include -#include "../runtime/meta_data.h" - -namespace TVM { -namespace runtime { - -using argInfo = - std::vector>>; - -// get current work directory -std::string getpath(void); -void PrintIndent(std::ofstream& stream, int indent); -inline size_t GetTypeSize(TVMType t); -inline size_t GetDataSize(TVMArray* arr); -inline TVMType Type2TVMType(Type t); -inline std::string PrintHalideType(Type t); -inline std::string Type2Str(TVMType t); -inline std::string Type2ExtStr(TVMType t); -inline std::string Type2WrapStr(TVMType t); -inline std::string Type2Byte(TVMType t); - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types); - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes); - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes); - -void PrintCopy(TVMArray* arr, - std::ofstream& stream, - int indent, size_t nth_arr); - -void PrintCopyBack(TVMArray* arr, - std::ofstream& stream, - int indent, size_t nth_arr); - -void GenKernelCode(std::string test_file); - -void GenWrapperCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - argInfo& arg_info, - LoweredFunc func); - -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string platform, - std::string host_code, - argInfo& arg_info); -} // namespace runtime -} // namespace TVM -#endif // TVM_CODEGEN_BUILD_HELPER_H_ diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 006edf933..7373711f4 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -2,12 +2,9 @@ * Copyright (c) 2017 by Contributors * \file codegen_c.cc */ -#include -#include #include #include #include "./codegen_c.h" -#include "./merlinc/codeanalys_merlinc.h" #include "../arithmetic/compute_expr.h" namespace TVM { @@ -15,123 +12,6 @@ namespace codegen { using namespace ir; -Type String2Type(std::string& s) { - if (s.front() == '\"' && s.back() == '\"') { - s.erase(0, 1); - s.pop_back(); - } - std::istringstream is(s); - halideir_type_code_t code = Type::Int; - if (s.substr(0, 3) == "int") { - code = Type::Int; s = s.substr(3); - } else if (s.substr(0, 4) == "uint") { - code = Type::UInt; s = s.substr(4); - } else if (s.substr(0, 5) == "float") { - code = Type::Float; s = s.substr(5); - } else if (s.substr(0, 5) == "float") { - code = Type::Float; s = s.substr(5); - } else if (s == "handle") { - return Handle(); - } else { - LOG(FATAL) << "unknown type " << s; - } - int bits = 32, lanes = 1; - if (sscanf(s.c_str(), "%dx%d", &bits, &lanes) == 0) { - LOG(FATAL) << "unknown type " << s; - } - return Type(code, bits, lanes); -} - -// generate row major index -std::string getIndex(std::vector shape) { - std::string str; - int mul = 1; - for (size_t i = shape.size(); i > 0; i--) { - mul = mul * shape[i-1]; - str += "i" + std::to_string(i-1) + - "*" + std::to_string(mul); - if (i != 1) str += "+ "; - } - return str; -} - -// collect type info for vars -void TypeCollector::Visit_(const Allocate *op) { - auto v = op->buffer_var.get(); - if (top_args_.count(v)) { - std::vector shape; - for (size_t i = 0; i < op->extents.size(); i++) - shape.push_back(op->extents[i].as()->value); - top_args_[v] = std::make_tuple(std::get<0>(top_args_[v]), op->type, shape); - } - IRVisitor::Visit_(op); -} - -void StreamCollector::Visit_(const Allocate *op) { - this->HandleDef(op->buffer_var.get()); - IRVisitor::Visit_(op); -} - -void StreamCollector::Visit_(const Load *op) { - this->HandleUse(op->buffer_var); - IRVisitor::Visit_(op); -} - -// update placeholder status -void StreamCollector::Visit_(const Store* op) { - if (auto val = op->value.as()) { - this->HandleDef(op->buffer_var.get()); - } - this->HandleUse(op->buffer_var); - IRVisitor::Visit_(op); -} - -void StreamCollector::Visit_(const StreamStmt* op) { - this->HandleDef(op->buffer_var.get()); - IRVisitor::Visit_(op); -} - -void StreamCollector::Visit_(const AttrStmt* op) { - if (op->attr_key == attr::device_scope) { - if (op->value.as()->value != scope_) - switch_on = true; - else switch_on = false; - } - IRVisitor::Visit_(op); -} - -// additional data saved into stream table -void StreamCollector::HandleDef(const Variable* v) { - if (!switch_on) { // def on host scope - CHECK(!host_def_count_.count(v)) - << "variable " << v->name_hint - << " has already been defined, the Stmt is not SSA"; - CHECK(!host_use_count_.count(v)) - << "variable " << v->name_hint - << " has been used before definition!"; - host_use_count_[v] = 0; - host_def_count_[v] = 1; - } -} - -void StreamCollector::HandleUse(const Expr& v) { - CHECK(v.as()); - Var var(v.node_); - auto it = host_use_count_.find(var.get()); - if (!switch_on) { // def on host scope - if (it != host_use_count_.end()) { - if (it->second >= 0) { - ++it->second; - } - } else { - if (!stream_table_.count(var.get())) { - host_undefined_.push_back(var); - host_use_count_[var.get()] = -1; - } - } - } -} - void CodeGenC::Init(bool output_ssa) { print_ssa_form_ = output_ssa; } @@ -139,50 +19,44 @@ void CodeGenC::Init(bool output_ssa) { void CodeGenC::InitFuncState(LoweredFunc f) { alloc_storage_scope_.clear(); handle_data_type_.clear(); - var_shape_map_.clear(); - range_.clear(); CodeGenSourceBase::ClearFuncState(); } - -void CodeGenC::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { +void CodeGenC::AddFunction(LoweredFunc f) { // clear previous generated state. this->InitFuncState(f); - map_arg_type_ = map_arg_type; + // skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { RegisterHandleType(kv.first.get(), kv.second.type()); } - // generate function signature this->stream << "void " << f->name << "("; for (size_t i = 0; i < f->args.size(); ++i) { Var v = f->args[i]; std::string vid = AllocVarID(v.get()); if (i != 0) stream << ", "; - // check type in the arg map - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } else { - auto arg = map_arg_type[vid]; - PrintType(std::get<1>(arg), this->stream); - this->stream << "* " << std::get<0>(arg); - const BufferNode* buf = f->api_args[i].as(); - if (v.type().is_handle() && buf) { - std::vector shape; - for (size_t i = 0; i < buf->shape.size(); i++) - shape.push_back(buf->shape[i].as()->value); - arg_shapes.push_back(shape); - var_shape_map_[buf->data.get()] = buf->shape; - auto it = alloc_storage_scope_.find(v.get()); - if (it != alloc_storage_scope_.end()) - PrintStorageScope(it->second, stream); + if (v.type().is_handle()) { + auto it = alloc_storage_scope_.find(v.get()); + if (it != alloc_storage_scope_.end()) + PrintStorageScope(it->second, stream); + stream << ' '; + + if (handle_data_type_.count(v.get())) { + PrintType(handle_data_type_.at(v.get()), stream); + } else { + stream << "void"; + } + stream << "*"; + + if (f->is_restricted && restrict_keyword_.length() != 0) { + stream << ' ' << restrict_keyword_; } + } else { + PrintType(v.type(), stream); } + stream << ' ' << vid; } - stream << ") {\n"; int func_scope = this->BeginScope(); this->PrintStmt(f->body); @@ -191,49 +65,8 @@ void CodeGenC::AddFunction(LoweredFunc f, this->stream << "}\n\n"; } -std::string CodeGenC::GetHost() { - if (!fpga_scope_) - host_stream << stream.str(); - std::string postproc = host_stream.str(); - postproc.erase(postproc.rfind("}") - 1, - postproc.length() - 1); - postproc.erase(0, postproc.find("{") + 1); - return postproc + "\n\n"; -} - -std::string CodeGenC::GetDevice() { - std::ostringstream device; - device << "void top(" << arg_stream.str() << "){\n"; - - // process device code - PreProcess(device); - // remove the kernel name alloc - auto text = device_stream.str(); - for (auto const& m : stream_arg_pos) { - std::string alloc = m.first + ";"; - size_t nFPos = text.find(alloc); - size_t secondNL = text.find('\n', nFPos); - size_t firstNL = text.rfind('\n', nFPos); - text.erase(firstNL, secondNL - firstNL); - } - device << text; - PostProcess(device); - - if (fpga_scope_) device << stream.str(); - return decl_stream.str() + module_stream.str() + - device.str() + "}\n\n"; -} - std::string CodeGenC::Finish() { - std::ostringstream device; - device << "void top(" << arg_stream.str() - << "){\n" << device_stream.str(); - if (fpga_scope_) device << stream.str(); - else host_stream << stream.str(); - device << "}\n"; - return decl_stream.str() + "\n{device}\n" + - module_stream.str() + device.str() + "\n{device}\n" + - "\n{host}\n" + host_stream.str() + "\n{host}\n"; + return decl_stream.str() + stream.str(); } void CodeGenC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) @@ -453,7 +286,7 @@ void CodeGenC::PrintStorageScope(const std::string& scope, std::ostream& os) { / void CodeGenC::PrintType(Type t, std::ostream& os) { // NOLINT(*) CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; + << "do not yet support vector types"; if (t.is_handle()) { os << "void*"; return; } @@ -481,6 +314,7 @@ void CodeGenC::PrintType(Type t, std::ostream& os) { // NOLINT(*) LOG(FATAL) << "Cannot convert type " << t << " to C type"; } + inline void PrintConst(const IntImm* op, std::ostream& os, CodeGenC* p) { // NOLINT(*) if (op->type == Int(32)) { std::ostringstream temp; @@ -785,7 +619,7 @@ void CodeGenC::VisitStmt_(const Store* op) { Type t = op->value.type(); if (t.lanes() == 1) { std::string value = this->PrintExpr(op->value); - std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); this->PrintIndent(); stream << ref << " = " << value << ";\n"; } else { @@ -880,92 +714,49 @@ void CodeGenC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) } void CodeGenC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "SetBit is not implemented yet in C"; + LOG(FATAL) << "SetBit is not implemented yet"; } void CodeGenC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "SetSlice is not implemented yet in C"; + LOG(FATAL) << "SetSlice is not implemented yet"; } void CodeGenC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support in C"; -} - -void CodeGenC::VisitExpr_(const StreamExpr *op, std::ostream& os) { // NOLINT(*) - auto v = op->buffer_var.get(); - auto it = var_idmap_.find(v); - CHECK(it != var_idmap_.end()) - << "variable " << v->name_hint << " not decalred"; + LOG(FATAL) << "Quantize is not yet support"; } void CodeGenC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - os << op->name << "("; - for (size_t i = 0; i < op->args.size(); ++i) { - PrintExpr(op->args[i], os); - if (i != op->args.size() - 1) os << ", "; - } - os << ")"; -} - -void CodeGenC::VisitStmt_(const StreamStmt *op) { // NOLINT(*) - CHECK(!var_idmap_.count(op->buffer_var.get())); - std::string vid = AllocVarID(op->buffer_var.get()); - vid = GetVarID(op->value.as()->buffer_var.get()); - PrintIndent(); - auto load_op = op->value.as(); - auto v = load_op->buffer_var.as(); - // placeholder args using recv name - if (stream_table.count(v)) { - auto tuple = arg_top_vars[v]; - arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), - std::get<2>(tuple)); - stream_table[v] = true; - } // else: streamed externop defined in analysis - // PrintExpr(op->value, stream); - // stream << vid << ".write()\n"; + LOG(FATAL) << "KernelExpr is not yet support"; } void CodeGenC::VisitStmt_(const LetStmt* op) { std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); if (print_ssa_form_) { CHECK(!var_idmap_.count(op->var.get())); var_idmap_[op->var.get()] = value; } else { PrintIndent(); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); + if (op->var.type() == Handle() && + handle_data_type_.count(op->var.get())) { + PrintType(handle_data_type_.at(op->var.get()), stream); + stream << "* " + << AllocVarID(op->var.get()) + << " = ("; + PrintType(handle_data_type_.at(op->var.get()), stream); + stream << "*)" << value << ";\n"; + } else { PrintType(op->var.type(), this->stream); this->stream << ' ' - << vid + << AllocVarID(op->var.get()) << " = " << value << ";\n"; - // modify var idmap for passed in args - } else if (value.find("data") != std::string::npos || - value.substr(0, 3) == "arg") { - auto v = op->var.get(); - arg_vars.push_back(v); - stream_table[v] = false; - std::string api_name = "arg" + std::to_string(arg_count); - auto arg = map_arg_type_[api_name]; - // PrintType(std::get<1>(arg), arg_stream); - CHECK(arg_count < arg_shapes.size()); - auto shape = arg_shapes[arg_count]; - arg_top_vars[v] = std::make_tuple(vid, std::get<1>(arg), shape); - arg_count += 1; } - PrintStmt(op->body); } + PrintStmt(op->body); } void CodeGenC::VisitStmt_(const Allocate* op) { CHECK(!is_zero(op->condition)); - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); + std::string vid = AllocVarID(op->buffer_var.get()); if (op->new_expr.defined()) { // Prefer global static allocation for the program CHECK_EQ(op->free_function, "nop"); @@ -1008,64 +799,6 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { const Variable* v = op->node.as(); CHECK(v); volatile_buf_.insert(v); - } else if (op->attr_key == ir::attr::device_scope) { - // print top( ... in host and enter fpga scope - if (op->value.as()->value == "fpga" && !fpga_scope_) { - fpga_scope_ = true; - PrintIndent(); - - // track the stream usage - StreamCollector collector(stream_table, "cpu"); - collector.Visit(op->body); - - // update data type and name - for (auto k : collector.host_undefined_) { - auto v = k.get(); - arg_vars.push_back(v); - stream_table[v] = true; - auto tuple = arg_top_vars[v]; - arg_top_vars[v] = std::make_tuple(v->name_hint, - std::get<1>(tuple), - std::get<2>(tuple)); - } - TypeCollector visitor(arg_top_vars); - visitor.Visit(op->body); - - // generte function calls - stream << "top("; - int index = 0; - for (size_t i = 0; i < arg_vars.size(); i++) { - auto v = arg_vars[i]; - std::string arg_name; - if (stream_table[v]) - arg_name = std::get<0>(arg_top_vars[v]); - else arg_name = GetVarID(v); - if (index !=0) stream << ", "; - stream << arg_name; - // print kernel func signature - if (index != 0) arg_stream << ", "; - PrintType(std::get<1>(arg_top_vars[v]), arg_stream); - auto shape = std::get<2>(arg_top_vars[v]); - arg_stream << " " << arg_name; - for (size_t k = 0; k < shape.size(); k++) - arg_stream << "[" << shape[k] << "]"; - index++; - } - stream << ");\n"; - - // switch context to device scope - host_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // swtich from device to host - } else if (op->value.as()->value == "cpu" && - fpga_scope_) { - fpga_scope_ = false; - device_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - } } this->PrintStmt(op->body); } @@ -1156,75 +889,17 @@ void CodeGenC::VisitStmt_(const ProducerConsumer *op) { PrintStmt(op->body); } -void CodeGenC::VisitStmt_(const KernelDef* op) { - LoweredFunc f; - // save func states - SaveFuncState(f); - InitFuncState(f); - std::ostringstream save; - save << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // skip the first underscore - GetUniqueName("_"); - // add to alloc buffer : type. - for (const auto & k : op->args) { - RegisterHandleType(k.get(), k.get()->type); - } - // print function signature - PrintType(op->ret_type, stream); - stream << " " << op->name << "("; - for (size_t k = 0; k < op->channels.size(); k+=2) { - int pos = op->channels[k].as()->value; - stream_arg_pos[op->name].insert(pos); - } - for (size_t i = 0; i < op->args.size(); ++i) { - VarExpr v = op->args[i]; - var_shape_map_[v.get()] = op->api_args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) stream << ", "; - std::string str = PrintExpr(op->api_types[i]); - Type type = String2Type(str); - PrintType(type, stream); - this->stream << " " << vid << "["; - if (v.type().is_handle()) { - for (size_t j = 0; j < op->api_args[i].size(); j++) { - if (j != 0) stream << "* "; - auto dim = op->api_args[i][j].as()->value; - this->stream << dim; - } - this->stream << ']'; - } - } - stream << ") {\n"; - int func_scope = BeginScope(); - range_ = CollectIterRange(op->body); - PrintStmt(op->body); - EndScope(func_scope); - stream << "}\n\n"; - - // restore default stream - module_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - this->stream << save.str(); - RestoreFuncState(f); +void CodeGenC::VisitStmt_(const KernelDef *op) { + LOG(FATAL) << "KernelDef is not yet support"; } void CodeGenC::VisitStmt_(const KernelStmt *op) { - PrintIndent(); - stream << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - PrintExpr(op->args[i], stream); - if (i < op->args.size() -1) stream << ", "; - } - stream << ");\n"; + LOG(FATAL) << "KernelStmt is not yet support"; } void CodeGenC::VisitStmt_(const Return *op) { this->stream << "return "; - PrintExpr(op->value, stream); + PrintExpr(op->value); this->stream << ";\n"; } @@ -1247,28 +922,5 @@ void CodeGenC::VisitStmt_(const While *op) { void CodeGenC::VisitStmt_(const Partition* op) { } -void CodeGenC::SaveFuncState(LoweredFunc f) { - // clear save info copy - alloc_storage_scope_save.clear(); - handle_data_type_save.clear(); - var_shape_map_save.clear(); - range_save.clear(); - // backup func info and clear - alloc_storage_scope_save = alloc_storage_scope_; - handle_data_type_save = handle_data_type_; - var_shape_map_save = var_shape_map_; - range_save = range_; - CodeGenSourceBase::SaveFuncState(); -} - -void CodeGenC::RestoreFuncState(LoweredFunc f) { - this->InitFuncState(f); - alloc_storage_scope_ = alloc_storage_scope_save; - handle_data_type_ = handle_data_type_save; - var_shape_map_ = var_shape_map_save; - range_ = range_save; - CodeGenSourceBase::RestoreFuncState(); -} - } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index d7292b38f..f579ca579 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -16,64 +15,11 @@ #include #include #include "./codegen_source_base.h" -#include "./merlinc/codeanalys_merlinc.h" -#include "../runtime/thread_storage_scope.h" namespace TVM { namespace codegen { using namespace ir; -template -using str2tupleMap = std::unordered_map>; -using var2nameType = std::unordered_map>>; - -Type String2Type(std::string& s); -std::string getIndex(std::vector shape); - -/*! - * \brief A data type collector - * - * CodeGenC TypeCollector gathers information - * of different types of each variable - * - */ -class TypeCollector final : public IRVisitor { - public: - var2nameType& top_args_; - TypeCollector(var2nameType& top_args) : top_args_(top_args) {}; - void Visit_(const Allocate *op); -}; - -/*! - * \brief An undefined variable collector - * - * CodeGenC stream data collector detects undefined - * variable and create channels for them - * - * */ -class StreamCollector final : public IRVisitor { - public: - Array host_undefined_; - std::unordered_map host_use_count_; - std::unordered_map host_def_count_; - StreamCollector(std::unordered_map& stream_table, - std::string initial_scope) - : stream_table_(stream_table), - scope_(initial_scope) {}; - void Visit_(const Allocate *op); - void Visit_(const Load *op); - void Visit_(const Store *op); - void Visit_(const StreamStmt *op); - void Visit_(const AttrStmt *op); - void HandleDef(const Variable* v); - void HandleUse(const Expr& v); - private: - std::unordered_map& stream_table_; - std::string scope_; - bool switch_on{true}; -}; - /*! * \brief A base class to generate C code. * @@ -98,22 +44,12 @@ class CodeGenC : * \brief Add the function to the generated module. * \param f The function to be compiled. */ - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + void AddFunction(LoweredFunc f); /*! * \brief Finalize the compilation and return the code. * \return The code. */ std::string Finish(); - /*! - * \brief Finalize the compilation and return the code. - * \return The host code. - */ - std::string GetHost(); - /*! - * \brief Finalize the compilation and return the code. - * \return The device code. - */ - std::string GetDevice(); /*! * \brief Print the Stmt n to CodeGenC->stream * \param n The statement to be printed. @@ -177,7 +113,6 @@ class CodeGenC : void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; // NOLINT(*) // statment void VisitStmt_(const LetStmt* op) override; void VisitStmt_(const Store* op) override; @@ -191,7 +126,6 @@ class CodeGenC : void VisitStmt_(const ProducerConsumer* op) override; void VisitStmt_(const KernelDef* op) override; void VisitStmt_(const KernelStmt* op) override; - void VisitStmt_(const StreamStmt* op) override; void VisitStmt_(const Return* op) override; void VisitStmt_(const Break* op) override; void VisitStmt_(const While* op) override; @@ -225,38 +159,10 @@ class CodeGenC : // print store of single element. virtual void PrintVecElemStore( const std::string& vec, Type t, int i, const std::string& value); - // get a cast type from to + // Get a cast type from to virtual std::string CastFromTo(std::string value, Type from, Type target); - // map from var to shape, range and type - std::map > var_shape_map_; - std::unordered_map range_; - str2tupleMap map_arg_type_; - - // save for kernel - std::map > var_shape_map_save; - std::unordered_map range_save; - - // index into ap_arg_type - size_t arg_count{0}; - // map {var : (vid, Type, shape)} - var2nameType arg_top_vars; - // vector {vars} in top function - std::vector arg_vars; - // vector of top function arg dimension - std::vector> arg_shapes; - // whether the function arg is streamed - std::unordered_map stream_table; - // map from kernel name to set of streamed arg position index - std::unordered_map> stream_arg_pos; - // pre and post processing device code - virtual void PreProcess(std::ostringstream& os) {}; - virtual void PostProcess(std::ostringstream& os) {}; - protected: - void SaveFuncState(LoweredFunc f); - void RestoreFuncState(LoweredFunc f); - // Print reference to struct location std::string GetStructRef( Type t, const Expr& buffer, const Expr& index, int kind); @@ -280,22 +186,12 @@ class CodeGenC : const std::string& target, const std::string& src, Type t) final; /*! \brief restrict keyword */ std::string restrict_keyword_{""}; - /*! \brief the func arg decl stream */ - std::ostringstream arg_stream; /*! \brief the storage scope of allocation */ std::unordered_map alloc_storage_scope_; /*! \brief the data type of allocated buffers */ std::unordered_map handle_data_type_; std::unordered_map buf_length_map_; - // save for kernel gen - std::unordered_map alloc_storage_scope_save; - std::unordered_map handle_data_type_save; - std::unordered_map var_idmap_save; - std::unordered_map name_alloc_map_save; - std::unordered_map ssa_assign_map_save; - std::vector scope_mark_save; - private: /*! \brief whether to print in SSA form */ bool print_ssa_form_{false}; diff --git a/tvm/src/codegen/codegen_cuda.cc b/tvm/src/codegen/codegen_cuda.cc index 3c675ad06..badbf2849 100644 --- a/tvm/src/codegen/codegen_cuda.cc +++ b/tvm/src/codegen/codegen_cuda.cc @@ -25,10 +25,9 @@ void CodeGenCUDA::Init(bool output_ssa) { CHECK_EQ(vid_global_barrier_state_, runtime::symbol::tvm_global_barrier_state); } -void CodeGenCUDA::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { +void CodeGenCUDA::AddFunction(LoweredFunc f) { this->stream << "extern \"C\" __global__ "; - CodeGenC::AddFunction(f, map_arg_type); + CodeGenC::AddFunction(f); } void CodeGenCUDA::VisitStmt_(const ir::For* op) { diff --git a/tvm/src/codegen/codegen_cuda.h b/tvm/src/codegen/codegen_cuda.h index e0c4f1a41..e49a47ae3 100644 --- a/tvm/src/codegen/codegen_cuda.h +++ b/tvm/src/codegen/codegen_cuda.h @@ -10,7 +10,6 @@ #include #include #include "./codegen_c.h" -#include "./merlinc/codeanalys_merlinc.h" namespace TVM { namespace codegen { @@ -19,8 +18,7 @@ class CodeGenCUDA final : public CodeGenC { public: CodeGenCUDA(); void Init(bool output_ssa); - void AddFunction(LoweredFunc f, - str2tupleMap map_arg_type); + void AddFunction(LoweredFunc f); // override behavior void VisitStmt_(const ir::For* op) final; void PrintStorageSync(const Call* op) final; diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/codegen_opencl.cc old mode 100755 new mode 100644 similarity index 53% rename from tvm/src/codegen/opencl/codegen_opencl.cc rename to tvm/src/codegen/codegen_opencl.cc index 979a19e0f..d0297a1d9 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/codegen_opencl.cc @@ -1,239 +1,206 @@ -# include -# include -# include -# include -# include -# include -# include "./codegen_opencl.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM{ -namespace codegen{ - -CodeGenOpenCL::CodeGenOpenCL(){ - restrict_keyword_ = "restrict"; -} - -std::string CodeGenOpenCL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - - -void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenOpenCL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenOpenCL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenOpenCL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - -void CodeGenOpenCL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - // os << "global "; - } else if (scope == "shared") { - // os << "local "; - } -} - -std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} - -void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) - if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintType(op->args[2].type(), os); - os << ")"; - } - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - - -void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) - if (std::isinf(op->value)) { - if ( op->value < 0) { - os << "-"; - } - os << "INFINITY"; - } else if (std::isnan(op->value)) { - os << "NAN"; - } else { - CodeGenC::VisitExpr_(op, os); - } -} - -void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) - os << "("; - PrintType(op->true_value.type(), os); - os << ")"; - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { - std::string extent = PrintExpr(op->extent); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - if (before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - PrintIndent(); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - if (!before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -} // namespace codegen -} // namespace TVM +/*! + * Copyright (c) 2017 by Contributors + * \file codegen_opencl.cc + */ +#include +#include +#include +#include +#include "./codegen_opencl.h" +#include "../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +CodeGenOpenCL::CodeGenOpenCL() { + restrict_keyword_ = "restrict"; +} + +void CodeGenOpenCL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + for (Var arg : f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } +} + +void CodeGenOpenCL::AddFunction(LoweredFunc f) { + this->stream << "__kernel "; + CodeGenC::AddFunction(f); +} + +std::string CodeGenOpenCL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + +void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: os << "float"; break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; +} + +void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenOpenCL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenOpenCL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenOpenCL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + +void CodeGenOpenCL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "__global"; + } else if (scope == "shared") { + os << "__local"; + } +} + +std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/codegen_opencl.h b/tvm/src/codegen/codegen_opencl.h new file mode 100644 index 000000000..088ab089a --- /dev/null +++ b/tvm/src/codegen/codegen_opencl.h @@ -0,0 +1,51 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file codegen_opencl.h + * \brief Generate OpenCL device code. + */ +#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ +#define TVM_CODEGEN_CODEGEN_OPENCL_H_ + +#include +#include +#include +#include "./codegen_c.h" + +namespace TVM { +namespace codegen { + +class CodeGenOpenCL final : public CodeGenC { + public: + CodeGenOpenCL(); + void AddFunction(LoweredFunc f); + std::string Finish(); + + // override print thread tag. + void InitFuncState(LoweredFunc f) final; + void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) final; // NOLINT(*) + void PrintStorageSync(const Call* op) final; // NOLINT(*) + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) + std::string GetVecLoad(Type t, const Variable* buffer, + Expr base) final; + void PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) final; // NOLINT(*) + // the address of load/store + void PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os); // NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target); // NOLINT(*) + + // overload visitor + void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) + + private: + // whether enable fp16 and fp64 extension + bool enable_fp16_{false}; + bool enable_fp64_{false}; +}; + +} // namespace codegen +} // namespace TVM + +#endif // TVM_CODEGEN_CODEGEN_OPENCL_H_ diff --git a/tvm/src/codegen/codegen_source_base.cc b/tvm/src/codegen/codegen_source_base.cc index 9fc6fc706..0df1ad276 100644 --- a/tvm/src/codegen/codegen_source_base.cc +++ b/tvm/src/codegen/codegen_source_base.cc @@ -8,79 +8,34 @@ namespace TVM { namespace codegen { void CodeGenSourceBase::ClearFuncState() { - host_name_alloc_map_.clear(); - device_name_alloc_map_.clear(); + name_alloc_map_.clear(); ssa_assign_map_.clear(); var_idmap_.clear(); scope_mark_.clear(); } -void CodeGenSourceBase::SaveFuncState() { - host_name_alloc_map_save.clear(); - device_name_alloc_map_save.clear(); - ssa_assign_map_save.clear(); - var_idmap_save.clear(); - scope_mark_save.clear(); - // save state into private member - host_name_alloc_map_save = host_name_alloc_map_; - device_name_alloc_map_save = device_name_alloc_map_; - ssa_assign_map_save = ssa_assign_map_; - var_idmap_save = var_idmap_; - scope_mark_save = scope_mark_; -} - -void CodeGenSourceBase::RestoreFuncState() { - this->ClearFuncState(); - host_name_alloc_map_ = host_name_alloc_map_save; - device_name_alloc_map_ = device_name_alloc_map_save; - ssa_assign_map_ = ssa_assign_map_save; - var_idmap_ = var_idmap_save; - scope_mark_ = scope_mark_save; -} - std::string CodeGenSourceBase::GetUniqueName(std::string prefix) { for (size_t i = 0; i < prefix.size(); ++i) { if (prefix[i] == '.') prefix[i] = '_'; } - if (fpga_scope_) { - auto it = device_name_alloc_map_.find(prefix); - if (it != device_name_alloc_map_.end()) { - while (true) { - std::ostringstream os; - os << prefix << (++it->second); - std::string name = os.str(); - if (device_name_alloc_map_.count(name) == 0) { - prefix = name; - break; - } + auto it = name_alloc_map_.find(prefix); + if (it != name_alloc_map_.end()) { + while (true) { + std::ostringstream os; + os << prefix << (++it->second); + std::string name = os.str(); + if (name_alloc_map_.count(name) == 0) { + prefix = name; + break; } } - device_name_alloc_map_[prefix] = 0; - return prefix; - } else { - auto it = host_name_alloc_map_.find(prefix); - if (it != host_name_alloc_map_.end()) { - while (true) { - std::ostringstream os; - os << prefix << (++it->second); - std::string name = os.str(); - if (host_name_alloc_map_.count(name) == 0) { - prefix = name; - break; - } - } - } - host_name_alloc_map_[prefix] = 0; - return prefix; } + name_alloc_map_[prefix] = 0; + return prefix; } std::string CodeGenSourceBase::SSAGetID(std::string src, Type t) { - if (fpga_scope_) { - if (device_name_alloc_map_.count(src)) return src; - } else { - if (host_name_alloc_map_.count(src)) return src; - } + if (name_alloc_map_.count(src)) return src; auto it = ssa_assign_map_.find(src); if (it != ssa_assign_map_.end()) { if (scope_mark_.at(it->second.scope_id)) { diff --git a/tvm/src/codegen/codegen_source_base.h b/tvm/src/codegen/codegen_source_base.h index befc3f8ec..e140662c1 100644 --- a/tvm/src/codegen/codegen_source_base.h +++ b/tvm/src/codegen/codegen_source_base.h @@ -39,10 +39,6 @@ class CodeGenSourceBase { }; /*! \brief Clear the states that might relates to function generation */ void ClearFuncState(); - /*! \brief Save the states that might relates to function generation */ - void SaveFuncState(); - /*! \brief Restore the states that might relates to function generation */ - void RestoreFuncState(); /*! \brief print the current indented value */ void PrintIndent(); /*! @@ -93,36 +89,18 @@ class CodeGenSourceBase { std::ostringstream decl_stream; /*! \brief the stream to be printed */ std::ostringstream stream; - /*! \brief the stream for mocule */ - std::ostringstream module_stream; - /*! \brief the stream host */ - std::ostringstream host_stream; - /*! \brief the stream device */ - std::ostringstream device_stream; /*! \brief name of each variable */ std::unordered_map var_idmap_; - /*! \brief save states as copy */ - std::unordered_map var_idmap_save; - /*! \brief whether generate code for fpga */ - bool fpga_scope_{false}; - /*! \brief name allocation map for host */ - std::unordered_map host_name_alloc_map_; - /*! \brief name allocation map for device */ - std::unordered_map device_name_alloc_map_; private: /*! \brief assignment map of ssa */ std::unordered_map ssa_assign_map_; + /*! \brief name allocation map */ + std::unordered_map name_alloc_map_; /*! \brief array to check whether we are inside certain scope */ std::vector scope_mark_; /*! \brief The current indentation value */ int indent_{0}; - /*! \brief Save states as copy */ - std::unordered_map ssa_assign_map_save; - std::unordered_map host_name_alloc_map_save; - std::unordered_map device_name_alloc_map_save; - std::vector scope_mark_save; - }; /*! diff --git a/tvm/src/codegen/hlsc/build_hlsc.cc b/tvm/src/codegen/hlsc/build_hlsc.cc index 2494ee66f..42fb68089 100644 --- a/tvm/src/codegen/hlsc/build_hlsc.cc +++ b/tvm/src/codegen/hlsc/build_hlsc.cc @@ -24,6 +24,7 @@ runtime::Module BuildVivadoHLSCSim(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); + return runtime::CreateVivadoHLSModule(funcs[0], code); } @@ -46,6 +47,7 @@ std::string BuildHLSC(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); + LOG(WARNING) << "HLS C doesn't have runtime, return kernel code"; return code; } diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index d7fc610d7..3e8696fba 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -15,50 +15,49 @@ namespace codegen { void CodeGenHLSC::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { - CodeGenC::AddFunction(f, map_arg_type); - // // Write header files - // // TODO: Insert header files here - // // Clear previous generated state - // this->InitFuncState(f); - // // Register alloc buffer type - // for (const auto & kv : f->handle_data_type) { - // RegisterHandleType(kv.first.get(), kv.second.type()); - // } - // // Write entry function name - // this->stream << "void " << f->name << "("; - // // Write arguments - // for (size_t i = 0; i < f->args.size(); ++i) { - // Var v = f->args[i]; - // std::string vid = AllocVarID(v.get()); - // if (i != 0) this->stream << ", "; - // if (map_arg_type.find(vid) == map_arg_type.end()) { - // LOG(WARNING) << vid << " type not found\n"; - // PrintType(v.type(), this->stream); - // this->stream << ' ' << vid; - // } - // else { - // auto arg = map_arg_type[vid]; - // PrintType(std::get<1>(arg), this->stream); - // this->stream << ' ' << std::get<0>(arg); - // const BufferNode* buf = f->api_args[i].as(); - // if (v.type().is_handle() && buf) { - // var_shape_map_[buf->data.get()] = buf->shape; - // for (size_t i = 0; i < buf->shape.size(); i++) { - // this->stream << '['; - // this->PrintExpr(buf->shape[i], this->stream); - // this->stream << ']'; - // } - // } - // // this->stream << "*"; TODO: create an option for this - // } - // } - // stream << ") {\n"; - // int func_scope = this->BeginScope(); - // range_ = CollectIterRange(f->body); - // this->PrintStmt(f->body); - // this->EndScope(func_scope); - // this->PrintIndent(); - // this->stream << "}\n\n"; + // Write header files + // TODO: Insert header files here + // Clear previous generated state + this->InitFuncState(f); + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + // Write entry function name + this->stream << "void " << f->name << "("; + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + PrintType(std::get<1>(arg), this->stream); + this->stream << ' ' << std::get<0>(arg); + const BufferNode* buf = f->api_args[i].as(); + if (v.type().is_handle() && buf) { + var_shape_map_[buf->data.get()] = buf->shape; + for (size_t i = 0; i < buf->shape.size(); i++) { + this->stream << '['; + this->PrintExpr(buf->shape[i], this->stream); + this->stream << ']'; + } + } + // this->stream << "*"; TODO: create an option for this + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + range_ = CollectIterRange(f->body); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + this->stream << "}\n\n"; } std::string CodeGenHLSC::GetBufferRef(Type t, const Variable* buffer, Expr index) { @@ -69,16 +68,14 @@ std::string CodeGenHLSC::GetBufferRef(Type t, const Variable* buffer, Expr index buf_length_map_[buffer] == 1); if (is_scalar) { os << vid; - } else { - os << vid << "["; - PrintExpr(index, os); - os << "]"; - // std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); - // for (size_t i = 0; i < indices.size(); i++) { - // os << '['; - // PrintExpr(indices[i], os); - // os << ']'; - // } + } else { + os << vid; + std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); + for (size_t i = 0; i < indices.size(); i++) { + os << '['; + PrintExpr(indices[i], os); + os << ']'; + } } } return os.str(); @@ -91,7 +88,6 @@ void CodeGenHLSC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) PrintExpr(op->b, os); os << ")"; } - void CodeGenHLSC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) os << "std::max("; PrintExpr(op->a, os); @@ -101,20 +97,19 @@ void CodeGenHLSC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) } void CodeGenHLSC::VisitStmt_(const LetStmt* op) { - CodeGenC::VisitStmt_(op); - // std::string value = PrintExpr(op->value); - // // Skip the argument retrieving assign statement - // std::string vid = AllocVarID(op->var.get()); - // if (op->var.type() != Handle() && - // value.find("TVMArray") == std::string::npos && - // value.find("arg") != 0) { - // PrintIndent(); - // PrintType(op->var.type(), this->stream); - // this->stream << ' ' - // << vid - // << " = " << value << ";\n"; - // } - // PrintStmt(op->body); + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); } void CodeGenHLSC::GenForStmt(const For* op, std::string pragma, bool before) { @@ -169,10 +164,7 @@ void CodeGenHLSC::VisitStmt_(const IfThenElse* op) { void CodeGenHLSC::VisitStmt_(const Allocate* op) { CHECK(!is_zero(op->condition)); - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); + std::string vid = AllocVarID(op->buffer_var.get()); this->PrintIndent(); int32_t constant_size = op->constant_allocation_size(); CHECK_GT(constant_size, 0) @@ -181,22 +173,16 @@ void CodeGenHLSC::VisitStmt_(const Allocate* op) { var_shape_map_[buffer] = op->extents; std::string scope = alloc_storage_scope_.at(buffer); PrintStorageScope(scope, stream); - - if (vid.find("stream_") != std::string::npos) { - void(0); // alloc stream channel in pre-processing - } else { - PrintType(op->type, stream); - stream << ' '<< vid; - if (constant_size > 1) {// Transfer length one array to scalar - stream << "["; - for (size_t i = 0; i < op->extents.size(); i++) { - PrintExpr(op->extents[i], stream); - if (i != op->extents.size()-1) stream << "*"; - } + PrintType(op->type, stream); + stream << ' '<< vid; + if (constant_size > 1) {// Transfer length one array to scalar + for (size_t i = 0; i < op->extents.size(); i++) { + stream << '['; + PrintExpr(op->extents[i], stream); stream << "]"; } - stream << ";\n"; } + stream << ";\n"; buf_length_map_[buffer] = constant_size; RegisterHandleType(op->buffer_var.get(), op->type); for (size_t i = 0; i < op->attrs.size(); i++) { diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.h b/tvm/src/codegen/hlsc/codegen_hlsc.h index fdd1747fa..c85cbc699 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.h +++ b/tvm/src/codegen/hlsc/codegen_hlsc.h @@ -27,7 +27,9 @@ class CodeGenHLSC : public CodeGenC { void VisitStmt_(const Allocate* op) override; void GenForStmt(const For* op, std::string pragma, bool before); - + + std::map > var_shape_map_; + std::unordered_map range_; protected: std::string GetBufferRef(Type t, const Variable* buffer, Expr index); }; diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index f944bef83..6a0977e40 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -21,83 +21,12 @@ namespace TVM { namespace codegen { -void CodeGenVivadoHLS::PreProcess(std::ostringstream& os) { - os << "\n"; - int indent = 2; - for (size_t i = 0; i < arg_vars.size(); i++) { - auto v = arg_vars[i]; - std::string arg_name; - if (stream_table[v]) - arg_name = std::get<0>(arg_top_vars[v]); - else arg_name = GetVarID(v); - - // create local buffer saving result - auto shape = std::get<2>(arg_top_vars[v]); - auto dtype = std::get<1>(arg_top_vars[v]); - if (!stream_table[v]) { // unstreamed args - // allocate local buffer - for (int k = 0; k < indent; k++) os << ' '; - PrintType(dtype, os); - os << " " << arg_name << "["; - for (size_t n = 0; n < shape.size(); n++) { - os << shape[n]; - if (n != shape.size() - 1) os << "* "; - } - os << "];\n"; - - for (size_t j = 0; j < shape.size(); j++) { - for (int k = 0; k < indent; k++) os << ' '; - os << "for (int i" << j << " = 0; i" - << j << "< " << shape[j] << "; i" - << j << "++) {\n"; - // pass stream reference - if (j == shape.size() - 1) { - for (int k = 0; k < indent; k++) os << ' '; - os << " " << arg_name << "[" - << getIndex(shape) << "] = " - << "fd_" << arg_name << ".read();\n"; - } - indent += 2; - } - for (size_t m = 0; m < shape.size(); m++) { - indent -= 2; - for (int k = 0; k < indent; k++) os << ' '; - os << "}\n"; - } - } else if (i == arg_vars.size() - 1 || true) { - // allocate for return variable - for (int k = 0; k < indent; k++) os << ' '; - PrintType(dtype, os); - os << " " << arg_name << "["; - for (size_t n = 0; n < shape.size(); n++) { - os << shape[n]; - if (n != shape.size() - 1) os << "* "; - } - os << "];\n"; - } - } -} - -void CodeGenVivadoHLS::PostProcess(std::ostringstream& os) { -// os << "\n"; -// int indent = 2; -// for (size_t i = 0; i < arg_vars.size(); i++) { -// auto v = arg_vars[i]; -// std::string arg_name; -// if (stream_table[v]) -// arg_name = std::get<0>(arg_top_vars[v]); -// else arg_name = GetVarID(v); -// os << arg_name << " = " << "fd_" -// << arg_name << ".write();\n"; -} - void CodeGenVivadoHLS::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Write header files - this->decl_stream << "#include \n"; - this->decl_stream << "#include \n"; - this->decl_stream << "#include \n"; - this->decl_stream << "#include \n\n"; + this->stream << "#include \n"; + this->stream << "#include \n"; + this->stream << "#include \n\n"; CodeGenHLSC::AddFunction(f, map_arg_type); if (soda_header_.is_open()) soda_header_.close(); @@ -148,13 +77,6 @@ void CodeGenVivadoHLS::VisitStmt_(const Store* op) { this->stream << ref << "[" << PrintExpr(sb->index) << "] = " << PrintExpr(sb->value) << ";\n"; - } else if (const StreamExpr* se = op->value.as()) { - std::string vid = GetVarID(se->buffer_var.get()); - vid = vid.substr(0, vid.find("_stream_send")); - PrintIndent(); - this->stream << vid << "[" - << op->index << "] = " - << "fd_" << vid << ".read();\n"; } else { CodeGenC::VisitStmt_(op); } @@ -221,30 +143,6 @@ void CodeGenVivadoHLS::VisitStmt_(const Partition* op) { stream << "\n"; } -void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { - CodeGenC::VisitExpr_(op, os); - std::string vid = GetVarID(op->buffer_var.get()); - vid = vid.substr(0, vid.find("_stream_send")); - os << vid << ".read()"; -} - -void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { - CodeGenC::VisitStmt_(op); - std::string vid = GetVarID(op->buffer_var.get()); - switch (op->stream_type) { - case StreamType::Channel: - break; - case StreamType::FIFO: - break; - case StreamType::Pipe: - break; - } - vid = vid.substr(0, vid.find("_stream_send")); - auto load = op->value.as(); - stream << "fd_" << vid << ".write(" - << vid << "["<< load->index << "]);\n"; -} - class AllocateCollector final : public IRVisitor { public: AllocateCollector(std::vector& alloc_list, @@ -262,144 +160,6 @@ class AllocateCollector final : public IRVisitor { VarExprUnorderedSet& outputs_; }; -void CodeGenVivadoHLS::VisitStmt_(const AttrStmt* op) { - if (op->attr_key == ir::attr::device_scope) { - // print top( ... in host and enter fpga scope - if (op->value.as()->value == "fpga" && !fpga_scope_) { - fpga_scope_ = true; - PrintIndent(); - - // track the stream usage - StreamCollector collector(stream_table, "cpu"); - collector.Visit(op->body); - - // update data type and name - for (auto k : collector.host_undefined_) { - auto v = k.get(); - arg_vars.push_back(v); - stream_table[v] = true; - auto tuple = arg_top_vars[v]; - arg_top_vars[v] = std::make_tuple(v->name_hint, - std::get<1>(tuple), - std::get<2>(tuple)); - } - TypeCollector visitor(arg_top_vars); - visitor.Visit(op->body); - - // generte function calls - stream << "top("; - for (size_t i = 0; i < arg_vars.size(); i++) { - auto v = arg_vars[i]; - std::string arg_name; - if (stream_table[v]) - arg_name = std::get<0>(arg_top_vars[v]); - else arg_name = GetVarID(v); - if (i != 0) stream << ", "; - stream << "fd_" << arg_name; - - // generate kernel func definition - if (i != 0) arg_stream << ", "; - arg_stream << "hls::stream<"; - PrintType(std::get<1>(arg_top_vars[v]), arg_stream); - auto shape = std::get<2>(arg_top_vars[v]); - arg_stream << ">& fd_" << arg_name; - } - stream << ");\n"; - - // switch context to device scope - host_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // swtich from device to host - } else if (op->value.as()->value == "cpu" && - fpga_scope_) { - fpga_scope_ = false; - device_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - } - this->PrintStmt(op->body); - } else { - CodeGenC::VisitStmt_(op); - } -} - -void CodeGenVivadoHLS::VisitStmt_(const KernelStmt *op) { - PrintIndent(); - stream << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - if (stream_arg_pos[op->name].count(i)) - stream << "fd_"; - PrintExpr(op->args[i], stream); - if (i < op->args.size() -1) stream << ", "; - } - stream << ");\n"; -} - -void CodeGenVivadoHLS::VisitStmt_(const KernelDef* op) { - LoweredFunc f; - // save func states - CodeGenC::SaveFuncState(f); - CodeGenC::InitFuncState(f); - std::ostringstream save; - save << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // skip the first underscore - GetUniqueName("_"); - // add to alloc buffer : type. - for (const auto & k : op->args) { - RegisterHandleType(k.get(), k.get()->type); - } - // print function signature - PrintType(op->ret_type, stream); - stream << " " << op->name << "("; - for (size_t k = 0; k < op->channels.size(); k+=2) { - int pos = op->channels[k].as()->value; - stream_arg_pos[op->name].insert(pos); - } - for (size_t i = 0; i < op->args.size(); ++i) { - VarExpr v = op->args[i]; - var_shape_map_[v.get()] = op->api_args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) stream << ", "; - std::string str = PrintExpr(op->api_types[i]); - Type type = String2Type(str); - - // pass the stream channel reference - // TODO: broadcast in hlsc (one wr multi read) - if (stream_arg_pos[op->name].count(i)) { - stream << "hls::stream<"; - PrintType(type, stream); - stream << ">& " << vid; - } else { - PrintType(type, stream); - this->stream << " " << vid << "["; - int mul = 1; - for (size_t j = 0; j < op->api_args[i].size(); j++) { - auto dim = op->api_args[i][j].as()->value; - mul = mul * dim; - } - this->stream << mul << "]"; - } - } - stream << ") {\n"; - int func_scope = BeginScope(); - range_ = CollectIterRange(op->body); - PrintStmt(op->body); - EndScope(func_scope); - stream << "}\n\n"; - - // restore default stream - module_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - this->stream << save.str(); - RestoreFuncState(f); -} - void CodeGenVivadoHLS::VisitStmt_(const Stencil* op) { // Use SODA codegen for stencil analysis CodeGenSODA cg_soda; diff --git a/tvm/src/codegen/hlsc/codegen_vhls.h b/tvm/src/codegen/hlsc/codegen_vhls.h index 6462251db..5486be1dc 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.h +++ b/tvm/src/codegen/hlsc/codegen_vhls.h @@ -23,19 +23,11 @@ class CodeGenVivadoHLS final : public CodeGenHLSC { void VisitExpr_(const GetBit* op, std::ostream& os) override; void VisitExpr_(const GetSlice* op, std::ostream& os) override; - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; void VisitStmt_(const Store* op) override; void VisitStmt_(const For* op) override; void VisitStmt_(const Partition* op) override; void VisitStmt_(const Stencil* op) override; - void VisitStmt_(const StreamStmt* op) override; - void VisitStmt_(const AttrStmt* op) override; - void VisitStmt_(const KernelDef* op) override; - void VisitStmt_(const KernelStmt* op) override; - - void PreProcess(std::ostringstream& os); - void PostProcess(std::ostringstream& os); private: std::ofstream soda_header_; }; diff --git a/tvm/src/codegen/merlinc/codeanalys_merlinc.cc b/tvm/src/codegen/merlinc/codeanalys_merlinc.cc index d6fa1c6ba..56b4e1d97 100644 --- a/tvm/src/codegen/merlinc/codeanalys_merlinc.cc +++ b/tvm/src/codegen/merlinc/codeanalys_merlinc.cc @@ -652,9 +652,6 @@ void CodeAnalysMerlinC::VisitExpr_(const Broadcast* op, std::ostream& os) { // LOG(FATAL) << "Broadcast: not supported "; } -void CodeAnalysMerlinC::VisitExpr_(const StreamExpr* op, std::ostream& os) { // NOLINT(*) -} - void CodeAnalysMerlinC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) os << "("; PrintExpr(op->condition, os); @@ -719,8 +716,10 @@ void CodeAnalysMerlinC::VisitExpr_(const Quantize *op, std::ostream& os) { // NO } void CodeAnalysMerlinC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "KernelExpr is not yet support"; } + void CodeAnalysMerlinC::VisitStmt_(const LetStmt* op) { // TODO comaniac //std::vector vec_var = GetNodesByType(op->value); @@ -883,9 +882,11 @@ void CodeAnalysMerlinC::VisitStmt_(const ProducerConsumer *op) { } void CodeAnalysMerlinC::VisitStmt_(const KernelDef *op) { + LOG(FATAL) << "KernelDef is not yet support"; } void CodeAnalysMerlinC::VisitStmt_(const KernelStmt *op) { + LOG(FATAL) << "KernelStmt is not yet support"; } void CodeAnalysMerlinC::VisitStmt_(const Return *op) { @@ -916,8 +917,6 @@ void CodeAnalysMerlinC::VisitStmt_(const Reuse *op) { void CodeAnalysMerlinC::VisitStmt_(const Partition *op) {} -void CodeAnalysMerlinC::VisitStmt_(const StreamStmt *op) {} - void CodeAnalysMerlinC::VisitStmt_(const Stencil *op) { PrintStmt(op->body); } diff --git a/tvm/src/codegen/merlinc/codeanalys_merlinc.h b/tvm/src/codegen/merlinc/codeanalys_merlinc.h index 421f0d96f..6ba082f09 100644 --- a/tvm/src/codegen/merlinc/codeanalys_merlinc.h +++ b/tvm/src/codegen/merlinc/codeanalys_merlinc.h @@ -112,7 +112,6 @@ class CodeAnalysMerlinC : void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; // NOLINT(*) // statment void VisitStmt_(const LetStmt* op) override; void VisitStmt_(const Store* op) override; @@ -132,7 +131,6 @@ class CodeAnalysMerlinC : void VisitStmt_(const Reuse* op) override; void VisitStmt_(const Partition* op) override; void VisitStmt_(const Stencil* op) override; - void VisitStmt_(const StreamStmt* op) override; /*! * Print Type represetnation of type t. * \param t The type representation. diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc deleted file mode 100755 index f5b1352a7..000000000 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include "./codegen_aocl.h" -#include "./codegen_sdaccel.h" -#include "../build_common.h" -#include "./sdaccel_module.h" -#include "../merlinc/codeanalys_merlinc.h" - -namespace TVM { -namespace codegen { - -#if HCL_SDACCEL_RUNTIME -runtime::Module BuildSDAccelSim(Array funcs) { - CodeAnalysMerlinC ca; - CodeGenSDACCEL cg; - for (LoweredFunc f : funcs) { - // 1st pass: Analyze AST and collect necessary information - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); - // 2nd pass: Generate kernel code - cg.AddFunction(f, map_arg_type); - } - std::string code = cg.Finish(); - return runtime::CreateSDAccelModule(funcs[0], code); -} - -TVM_REGISTER_API("codegen.build_sdaccel_csim") -.set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = BuildSDAccelSim(args[0]); - }); -#endif - - -template -std::string BuildOpenCL(Array funcs){ - using TVM::runtime::Registry; - CodeAnalysMerlinC ca; - CodeGen cg; - for(LoweredFunc f: funcs){ - ca.AddFunction(f); - str2tupleMapmap_arg_type; - map_arg_type = ca.Finish(); - cg.AddFunction(f, map_arg_type); - } - std::string code = cg.Finish(); - - LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; - return code; -} - - -TVM_REGISTER_API("codegen.build_sdaccel") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); - }); - -TVM_REGISTER_API("codegen.build_aocl") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); - }); -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc deleted file mode 100644 index 6d3247d02..000000000 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ /dev/null @@ -1,354 +0,0 @@ -#include -#include -#include -#include -#include -#include "./codegen_aocl.h" -#include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -inline Type String2Type(std::string& s) { - if (s.front() == '\"' && s.back() == '\"') { - s.erase(0, 1); - s.pop_back(); - } - std::istringstream is(s); - halideir_type_code_t code = Type::Int; - if (s.substr(0, 3) == "int") { - code = Type::Int; s = s.substr(3); - } else if (s.substr(0, 4) == "uint") { - code = Type::UInt; s = s.substr(4); - } else if (s.substr(0, 5) == "float") { - code = Type::Float; s = s.substr(5); - } else if (s.substr(0, 5) == "float") { - code = Type::Float; s = s.substr(5); - } else if (s == "handle") { - return Handle(); - } else { - LOG(FATAL) << "unknown type " << s; - } - int bits = 32, lanes = 1; - if (sscanf(s.c_str(), "%dx%d", &bits, &lanes) == 0) { - LOG(FATAL) << "unknown type " << s; - } - return Type(code, bits, lanes); -} - -void CodeGenAOCL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - this->decl_stream << "#include \"ihc_apint.h\"" << "\n"; - this->decl_stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable\n"; - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - // alloc or get var name - Var v = f->args[i]; - std::string vid; - if (!var_idmap_.count(v.get())) - vid = AllocVarID(v.get()); - else vid = GetVarID(v.get()); - - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << "restrict "; - this->stream << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - -void CodeGenAOCL::PrintType(Type t, std::ostream &os) -{ - int lanes = t.lanes(); - if(t.is_handle()) { - os << "void*";return; - } - if(t == Bool()) { - os <<"bool"; return; - } - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - - bool fail = false; - if(t.is_float()) { - switch(t.bits()) - { - case 16: - os<<"half"; - // enable_fp16_ = true; - break; - case 32: - os<<"float"; - break; - case 64: - os<< "double"; - // enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if(!fail && lanes ==1) return; - if(!fail&&(lanes >= 2 && lanes <=16)) - { - os<=2 && lanes <= 16)) { - os << lanes; return; - } - if(fail && lanes==1) { - if(t.is_uint()) { - if (t.bits() > 64) { - os << "uint" << "64" << "_t"; return; - } else { - os<< "ap_uint<"<< t.bits() <<"> uintd_t"; return; - } - } - if(t.is_int()) { - if (t.bits() > 64) { - os << "int" << "64" << "_t"; return; - } else { - os << "ap_int<" << t.bits() << "> intd_t"; return; - } - } - } - } - - LOG(FATAL) << "Cannot convert type"<for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - os << "#pragma unroll"; - if (unroll_factor > 0) os << " " << unroll_factor << "\n"; - else os << "\n"; - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "#pragma"; - os << " ii " << II << "\n"; - } - CodeGenAOCL::GenForStmt(op, os.str(), true); -} - -void CodeGenAOCL::VisitExpr_(const StreamExpr* op, std::ostream& os) { - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); - int i = 0; - for (auto key : op->annotate_keys) { - auto str = key.as(); - auto val = op->annotate_values[i].as(); - if (str->value == "name" && val != nullptr) { - vid = val->value; - decl_stream << "channel "; - PrintType(op->type, decl_stream); - decl_stream << " " << vid << ";\n"; - } - i++; - } - switch (op->stream_type) { - case StreamType::Channel: - os << "read_channel_intel("; - os << vid << ")"; - break; - case StreamType::Pipe: - os << "read_pipe("; - break; - case StreamType::FIFO: - // buffered channel - os << "fifo"; - break; - } -} - -void CodeGenAOCL::VisitStmt_(const KernelDef* op) { - LoweredFunc f; - SaveFuncState(f); - InitFuncState(f); - std::ostringstream save; - save << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // skip the first underscore - GetUniqueName("_"); - // add to alloc buffer : type. - for (const auto & k : op->args) { - RegisterHandleType(k.get(), k.get()->type); - } - stream << "__kernel "; - const UIntImm* is_void = op->ret_void.as(); - if (is_void) stream << "void"; - else PrintType(op->ret_type, stream); - stream << " " << op->name << "("; - - // streamed arg position to channel index - std::unordered_map stream_args; - for (size_t j = 0; j < op->channels.size(); j=j+2) { - int pos = op->channels[j].as()->value; - int idx = op->channels[j+1].as()->value; - stream_args[pos] = idx; - } - for (size_t i = 0; i < op->args.size(); ++i) { - VarExpr v = op->args[i]; - var_shape_map_[v.get()] = op->api_args[i]; - std::string vid = AllocVarID(v.get()); - if (stream_args.count(i)) { - stream_arg_pos[op->name].insert(i); - if (!stream_pragma) { - decl_stream << "#pragma OPENCL EXTENSION cl_intel_channels : enable\n"; - stream_pragma = true; - } - } else { - if (i != 0) { - if (stream_args.count(i-1)) void(0); - else stream << ", "; - } // un-streamed argument - this->stream << "__global "; - std::string str = PrintExpr(op->api_types[i]); - Type type = String2Type(str); - PrintType(type, stream); - this->stream << "* restrict " << vid; - } - } - stream << ") {\n"; - int func_scope = BeginScope(); - range_ = CollectIterRange(op->body); - PrintStmt(op->body); - EndScope(func_scope); - stream << "}\n\n"; - - // restore default stream - module_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - this->stream << save.str(); - RestoreFuncState(f); -} - -void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { - PrintIndent(); - stream << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - std::string str = op->name + "." + PrintExpr(op->args[i]); - if (!stream_arg_pos[op->name].count(i)) { - if (i != 0) { - if (stream_arg_pos[op->name].count(i-1)) void(0); - else stream << ", "; - } - PrintExpr(op->args[i], stream); - } - } - stream << ");\n"; -} - -void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - os << op->name << "("; - for (size_t i = 0; i < op->args.size(); ++i) { - if (!stream_arg_pos[op->name].count(i)) { - if (i != 0) { - if (stream_arg_pos[op->name].count(i-1)) void(0); - else stream << ", "; - } - PrintExpr(op->args[i], stream); - } - } - os << ")"; -} - -void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); - PrintIndent(); - int i = 0; - for (auto key : op->annotate_keys) { - auto str = key.as(); - auto val = op->annotate_values[i].as(); - if (str->value == "name" && val != nullptr) vid = val->value; - i++; - } - switch (op->stream_type) { - case StreamType::Channel: - stream << "write_channel_intel("; - stream << vid << ", "; - break; - case StreamType::Pipe: - stream << "write_pipe("; - stream << vid << ", "; - break; - case StreamType::FIFO: - stream << "fifo("; - break; - } - PrintExpr(op->value, stream); - stream << ");\n"; -} - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h deleted file mode 100755 index 5778b70ec..000000000 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ -#define TVM_CODEGEN_CODEGEN_AOCL_H_ - -# include -# include -# include "./codegen_opencl.h" - -namespace TVM { -namespace codegen { - -class CodeGenAOCL : public CodeGenOpenCL { - public: - CodeGenAOCL(){} - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - void VisitStmt_(const For* op) override; //NOLINT(*) - void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) - void VisitStmt_(const KernelDef* op) override; //NOLINT(*) - void VisitStmt_(const KernelStmt* op) override; //NOLINT(*) - - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const KernelExpr* op, std::ostream& os) override; //NOLINT(*) - - private: - // whether to enable streaming - bool stream_pragma{false}; - // map from kernel name to set of streamed arg position index - std::unordered_map> stream_arg_pos; -}; -} // namespace codegen -} // namespace TVM - -#endif // TVM_CODEGEN_CODEGEN_AOCL_H_ diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h deleted file mode 100755 index 4f9a15fe5..000000000 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ -#define TVM_CODEGEN_CODEGEN_OPENCL_H_ - -# include -# include -# include -# include "../codegen_c.h" - -namespace TVM{ -namespace codegen{ - -class CodeGenOpenCL : public CodeGenC{ - public: - // void AddFunction(LoweredFunc f); - CodeGenOpenCL(); - virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; - std::string Finish(); - void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void PrintStorageSync(const Call* op) override; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - virtual void PrintType(Type t, std::ostream& os) = 0; //NOLINT - std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) override; // NOLINT(*) - void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) override; //NOLINT(*) - void PrintVecAddr(const Variable * buffer, Type t, - Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - - //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) - void VisitStmt_(const LetStmt* op) override; // NOLINT - void GenForStmt(const For* op, std::string pragma, bool before); - virtual void VisitStmt_(const For* op) = 0; - -protected: - // fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; -}; - -} // namespace codegen -} // namespace TVM - -#endif diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc deleted file mode 100644 index cba08fa2d..000000000 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ /dev/null @@ -1,219 +0,0 @@ -# include -# include -# include -# include -# include "./codegen_sdaccel.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -void CodeGenSDACCEL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - -void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); - if (t.is_handle()) { - //LOG(FATAL) << "The buffer shouldn't call PrintType for printing type"; - os << "void*"; - return ; - } - bool fail = false; - if (t.is_float()) { - switch (t.bits()) { - case 16: os << "half"; break; - case 32: os << "float"; break; - case 64: os << "double"; break; - // case 128: os << "double double"; break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << "unsigned "; - } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; - } - - int target_bit = 1; - while (target_bit < t.bits()) - target_bit <<= 1; - - switch (target_bit) { - case 1: os << "int"; break; - case 2: os << "char"; break; - case 4: os << "char"; break; - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 128: os << "long"; break; // FIXME: Should use long long - default: fail = true; break; - } - if (!fail && lanes == 1) return; - // FIXME: Not yet support multiple lanes - //if (!fail && (lanes >= 2 && lanes <= 16)) { - // os << lanes; return; - //} - } - os << t; - LOG(WARNING) << "Cannot convert type " << t ; - return ; -} - -void CodeGenSDACCEL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global" || scope == "shared") { - os << "__local "; - } -} - -void CodeGenSDACCEL::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - if (unroll_factor > 0) { - os << "__attribute__((opencl_unroll_hint("; - os << unroll_factor << ")))\n"; - } else { - os << "\n"; - } - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "__attribute__((xcl_pipeline_loop("; - os << II << ")))\n"; - } - CodeGenSDACCEL::GenForStmt(op, os.str(), true); -} - -void CodeGenSDACCEL::VisitStmt_(const Partition* op) { - std::string vid = GetVarID(op->buffer_var.get()); - stream << vid << " "; - if (op->partition_type != PartitionType::Complete) { - stream << "__attribute__((xcl_array_partition("; - switch (op->partition_type) { - // case PartitionType::Complete: - // stream << "complete,"; - // break; - case PartitionType::Block: - stream << "block,"; - break; - case PartitionType::Cyclic: - stream << "cyclic,"; - break; - } - stream << op->factor << ","; - stream << op->dim << ")))\n"; - } else { - if (op->dim == 0) { - stream << "__attribute__((xcl_array_partition))\n"; - } else { - stream << "__attribute__((xcl_array_partition("; - stream << "complete,"; - stream << op->factor << ","; - stream << op->dim << ")))\n"; - } - } -} - -void CodeGenSDACCEL::VisitStmt_(const StreamStmt* op) { - std::string vid = GetVarID(op->buffer_var.get()); - PrintIndent(); - stream << vid; - switch (op->stream_type) { - case StreamType::Channel: - stream << "[channel]"; - break; - case StreamType::FIFO: - stream << "[fifo]"; - break; - case StreamType::Pipe: - stream << "[pipe]"; - break; - } - stream << ".write"; - PrintExpr(op->value, stream); - stream << ";\n"; -} - -void CodeGenSDACCEL::VisitExpr_(const StreamExpr* op, std::ostream& os) { - std::string vid = GetVarID(op->buffer_var.get()); - os << vid << ".read()"; -} - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h deleted file mode 100755 index 4f1cfa053..000000000 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ -#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ - -# include -# include -# include "./codegen_opencl.h" - -namespace TVM { -namespace codegen { - -class CodeGenSDACCEL : public CodeGenOpenCL { - public: - CodeGenSDACCEL(){} - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - - void VisitStmt_(const For* op) override; //NOLINT(*) - void VisitStmt_(const Partition* op) override; //NOLINT(*) - void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) - - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; //NOLINT(*) - -}; -} // namespace codegen -} // namespace TVM - -#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc deleted file mode 100644 index 63f12e86b..000000000 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ /dev/null @@ -1,645 +0,0 @@ -#include "./sdaccel_module.h" -#include -#include -#include -#include -#include -#include -#include - -namespace TVM { -namespace runtime { - -namespace { - -void PrintIndent(std::ofstream& stream, int indent) { - for (int i = 0; i < indent; i++) - stream << ' '; -} - -inline size_t GetTypeSize(TVMType t) { - size_t byte = (t.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - return byte; -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size_t byte = (arr->dtype.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - size *= (byte * 8 * arr->dtype.lanes + 7) / 8; - return size; -} - -inline TVMType Type2TVMType(Type t) { - TVMType tt; - if (t.is_int()) tt.code = kDLInt; - else if (t.is_uint()) tt.code = kDLUInt; - else if (t.is_float()) tt.code = kDLFloat; - else LOG(FATAL) << "Unacceptable type: " << t; - tt.bits = static_cast(t.bits()); - tt.fracs = static_cast(t.fracs()); - return tt; -} - -inline std::string Type2Str(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - str += "int"; - } else if (t.code == kDLUInt) { - str += "unsigned int"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2ExtStr(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) str += "ap_fixed<"; - else str += "ap_int<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) str += "ap_ufixed<"; - else str += "ap_uint<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2Byte(TVMType t) { - std::string str = ""; - if (t.code == kDLFloat) { - str += "float"; - } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "unsigned"; - str += "int"; - if (t.bits <= 8) str += "8"; - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - // str += "_t"; - } - return str; -} - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - arg_sizes.push_back(GetDataSize(arr)); - arg_types.push_back(arr->dtype); - } else { - const Variable* var = func->api_args[i].as(); - TVMType t = Type2TVMType(var->type); - arg_sizes.push_back(GetTypeSize(t)); - arg_types.push_back(t); - } - } -} - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } -} - -// copy values from the shared mem to local mem -void PrintCopy(TVMArray* arr, - std::ofstream& stream, - int indent, size_t nth_arr) { - for (int i = 0; i < arr->ndim; i++) { - PrintIndent(stream, indent); - stream << "for (size_t i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - if (i == arr->ndim-1) { - PrintIndent(stream, indent); - stream << "source_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2;j >= 0;j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << "] = "; - stream << "arg_" << nth_arr; - stream << "[i" << arr->ndim - 1; - - int mul2 = 1; - for (int j = arr->ndim-2;j >= 0;j--) { - mul2 *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul2; - } - stream << "]"; - if (arr->dtype.fracs > 0) - stream << " >> " << static_cast(arr->dtype.fracs); - stream << ";\n"; - } - } - for (int i = 0; i < arr->ndim; i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } -} - -// copy values from local mem back to shared mem -void PrintCopyBack(TVMArray* arr, - std::ofstream& stream, - int indent, size_t nth_arr) { - for (int i = 0; i < arr->ndim; i++) { - PrintIndent(stream, indent); - stream << "for (size_t i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - if (i == arr->ndim-1) { - PrintIndent(stream, indent); - stream << "arg_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << "] = "; - // stream << Type2ExtStr(arr->dtype); - stream << "source_" << nth_arr; - stream << "[i" << arr->ndim - 1; - int mul2 = 1; - for (int j = arr->ndim-2;j >=0;j--) { - mul2 *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul2; - } - stream << "]"; - if (arr->dtype.fracs > 0) - stream << " << " << static_cast(arr->dtype.fracs); - stream << ";\n"; - } - } - for (int i = 0; i < arr->ndim; i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } -} - -void GenMakFile() { - int indent = 0; - std::ofstream stream; - stream.open("sdaccel.mk"); - indent += 4; - - stream << "ifndef XILINX_SDX\n"; - stream << "$(error Environment variable XILINX_SDX is required and should point to SDAccel install area)\n"; - stream << "endif\n"; - - stream << "SDA_FLOW = cpu_emu\n"; - stream << "HOST_SRCS = host.cpp\n"; - stream << "HOST_EXE_DIR=.\n"; - stream << "HOST_EXE = host\n"; - stream << "HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL\n"; - stream << "HOST_LFLAGS = \n"; - stream << "KERNEL_SRCS = default_function.cl\n"; - stream << "KERNEL_NAME = default_function\n"; - stream << "KERNEL_DEFS =\n"; - stream << "KERNEL_INCS =\n"; - stream << "XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0\n"; - stream << "XDEVICE_REPO_PATH=\n"; - stream << "KEEP_TEMP=1\n"; - stream << "KERNEL_DEBUG=\n"; - stream << "XCLBIN_NAME=bin_krnl\n"; - stream << "HOST_CFLAGS+=-DTARGET_DEVICE=\\\"${XDEVICE}\\\"\n"; - stream << "BOARD_SETUP_FILE=setup.sh\n"; - stream << "ifeq (${SDA_FLOW},cpu_emu)\n"; - PrintIndent(stream, indent); - stream << "CLCC_OPT += -t sw_emu\n"; - PrintIndent(stream, indent); - stream << "XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin\n"; - stream << "else ifeq (${SDA_FLOW},hw_emu)\n"; - PrintIndent(stream, indent); - stream << "CLCC_OPT += -t hw_emu\n"; - PrintIndent(stream, indent); - stream << "XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin\n"; - stream << "else ifeq (${SDA_FLOW},hw)\n"; - PrintIndent(stream, indent); - stream << "XCLBIN = ${XCLBIN_NAME}_hw.xclbin\n"; - stream << "CLCC_OPT += -t hw\n"; - stream << "endif\n"; - - stream << "HOST_ARGS = ${XCLBIN}\n"; - stream << "COMMON_DIR = ./common\n"; - stream << "include ${COMMON_DIR}/common.mk\n"; - - stream.close(); -} - -void GenCommonFile() { - int indent = 0; - std::ofstream stream; - stream.open("./common/common.mk"); - indent += 4; - stream << "SHELL = /bin/bash\n"; - stream << "VPATH = ./\n"; - stream << "CC = xcpp\n"; - stream << "CLCC = xocc\n"; - stream << "ifeq ($(XDEVICE_REPO_PATH),)\n"; - PrintIndent(stream, indent); - stream << "DEVICE_REPO_OPT = \n"; - stream << "else\n"; - stream << "DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH}\n"; - stream << "endif\n"; - stream << "HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2\n"; - stream << "HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread\n"; - stream << "CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS}\n"; - stream << "ifeq (${KEEP_TEMP},1)\n"; - PrintIndent(stream, indent); - stream << "CLCC_OPT += -s\n"; - stream << "endif\n"; - stream << "ifeq (${KERNEL_DEBUG},1)\n"; - PrintIndent(stream, indent); - stream << "CLCC_OPT += -g\n"; - stream << "endif\n"; - stream << "CLCC_OPT += --kernel ${KERNEL_NAME}\n"; - stream << "OBJECTS := $(HOST_SRCS:.cpp=.o)\n"; - stream << ".PHONY: all\n"; - stream << "all: run\n"; - - stream << "host: ${HOST_EXE_DIR}/${HOST_EXE}\n"; - stream << "xbin_cpu_em:\n"; - PrintIndent(stream, indent); - stream << "make SDA_FLOW=cpu_emu xbin -f sdaccel.mk\n"; - stream << "xbin_hw_em:\n"; - PrintIndent(stream, indent); - stream << "make SDA_FLOW=hw_emu xbin -f sdaccel.mk\n"; - stream << "xbin_hw :\n"; - PrintIndent(stream, indent); - stream << "make SDA_FLOW=hw xbin -f sdaccel.mk\n"; - stream << "xbin: ${XCLBIN}\n"; - stream << "run_cpu_em: \n"; - PrintIndent(stream, indent); - stream << "make SDA_FLOW=cpu_emu run_em -f sdaccel.mk\n"; - stream << "run_hw_em: \n"; - PrintIndent(stream, indent); - stream << "make SDA_FLOW=hw_emu run_em -f sdaccel.mk\n"; - stream << "run_hw : \n"; - PrintIndent(stream, indent); - stream << "make SDA_FLOW=hw run_hw_int -f sdaccel.mk\n"; - stream << "run_em: xconfig host xbin\n"; - PrintIndent(stream, indent); - stream << "XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS}\n"; - stream << "run_hw_int : host xbin_hw\n"; - PrintIndent(stream, indent); - stream << "source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS}\n"; - stream << "estimate : \n"; - PrintIndent(stream, indent); - stream << "${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS}\n"; - stream << "xconfig : emconfig.json\n"; - stream << "emconfig.json :\n"; - PrintIndent(stream, indent); - stream << "emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od .\n"; - stream << "${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS}\n"; - PrintIndent(stream, indent); - stream << "${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@\n"; - stream << "${XCLBIN}:\n"; - PrintIndent(stream, indent); - stream << "${CLCC} ${CLCC_OPT} ${KERNEL_SRCS}\n"; - stream << "%.o: %.cpp\n"; - PrintIndent(stream, indent); - stream << "${CC} ${HOST_CFLAGS} -c $< -o $@\n"; - stream << "clean:\n"; - PrintIndent(stream, indent); - stream << "${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil\n"; - stream << "cleanall: clean\n"; - PrintIndent(stream, indent); - stream << "${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou\n"; - - stream.close(); -} - -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string test_file) { - int indent = 0; - std::ofstream stream; - stream.open("host.cpp"); - indent += 2; - - stream << "#define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - stream << "#define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - stream << "#define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - stream << "#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - // stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#pragma once\n"; - stream << "\n\n"; - - // stream << test_file; - stream << "\n\n"; - - stream << "int main(void) { \n"; - - stream << "#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; - indent += 2; - stream << " #define STR_VALUE(arg) #arg\n"; - stream << " #define GET_STRING(name) STR_VALUE(name)\n"; - stream << " #define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n"; - stream << "#endif\n"; - - // get the krnl code - PrintIndent(stream, indent); - stream << "char* xclbinFilename = argv[1];\n"; - stream << "\n"; - - for (int i = 0;i < args.size();i++) { - PrintIndent(stream, indent); - stream << "std::vector<" << Type2Str(arg_types[i]); - stream << "> "; - stream << "source_" << i << "("; - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - if (j == arr->ndim-1) { - stream << arr->shape[j] << ")"; - } else { - // stream << " * " << arr->shape[j] << ")"; - stream << arr->shape[j] << " * "; - } - } - stream << ";\n"; - } - stream << "\n"; - - for (int i = 0;i < args.size();i++) { - PrintIndent(stream, indent); - stream << "size_t vector_size_bytes_" << i; - stream << " = sizeof(" << Type2Str(arg_types[i]); - stream << ")"; - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - stream << " * " << arr->shape[j]; - } - stream << ";\n"; - } - stream << "\n"; - - for (int i = 0;i < args.size();i++ ) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Str(arg_types[i]) << "* "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Str(arg_types[i]) << "*)"; - stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - TVMArray* arr = args[i]; - // copy from shared mem - PrintCopy(arr, stream, indent, i); - } - - // Getting First Platform - PrintIndent(stream, indent); - stream << "std::vector platforms;\n"; - PrintIndent(stream, indent); - stream << "cl::Platform::get(&platforms);\n"; - PrintIndent(stream, indent); - stream << "cl::Platform platform = platforms[0];\n"; - stream << "\n"; - - // Getting ACCELERATOR Devices and selecting 1st such device - PrintIndent(stream, indent); - stream << "std::vector devices;\n"; - PrintIndent(stream, indent); - stream << "platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);\n"; - PrintIndent(stream, indent); - stream << "cl::Device device = devices[0];\n"; - stream << "\n"; - - // Creating Context and Command Queue for selected Device - PrintIndent(stream, indent); - stream << "cl::Context context(device);\n"; - PrintIndent(stream, indent); - stream << "cl::CommandQueue q(context, device);\n"; - stream << "\n"; - - // Loading XCL Bin into char buffer - PrintIndent(stream, indent); - stream << "std::ifstream bin_file(xclbinFilename, std::ifstream::binary);\n"; - PrintIndent(stream, indent); - stream << "bin_file.seekg (0, bin_file.end);\n"; - PrintIndent(stream, indent); - stream << "unsigned nb = bin_file.tellg();\n"; - PrintIndent(stream, indent); - stream << "bin_file.seekg (0, bin_file.beg);\n"; - PrintIndent(stream, indent); - stream << "char *buf = new char [nb];\n"; - PrintIndent(stream, indent); - stream << "bin_file.read(buf, nb);\n"; - stream << "\n"; - - // Creating Program from Binary File - PrintIndent(stream, indent); - stream << "cl::Program::Binaries bins;\n"; - PrintIndent(stream, indent); - stream << "bins.push_back({buf,nb});\n"; - PrintIndent(stream, indent); - stream << "devices.resize(1);\n"; - PrintIndent(stream, indent); - stream << "cl::Program program(context, devices, bins);\n"; - stream << "\n"; - - // Creating Kernel and Functor of Kernel - PrintIndent(stream, indent); - stream << "int err1;\n"; - PrintIndent(stream, indent); - stream << "cl::Kernel kernel(program, \"default_function\", &err1);\n"; - PrintIndent(stream, indent); - stream << "auto default_function = cl::KernelFunctor<"; - for (int i = 0;i < args.size();i++) { - if (i == args.size() - 1) { - stream << "cl::Buffer&>(kernel);\n"; - } else { - stream << "cl::Buffer&, "; - } - } - stream << "\n"; - - // Creating Buffers inside Device - for (int i = 0;i < args.size();i++) { - PrintIndent(stream, indent); - stream << "cl::Buffer buffer_" << i; - stream << "(context, CL_MEM_READ_WRITE, vector_size_bytes_" << i << ");\n"; - } - stream << "\n"; - - // Copying input data to Device buffer from host memory - for (int i = 0;i < args.size();i++) { - PrintIndent(stream, indent); - stream << "q.enqueueWriteBuffer(buffer_" << i; - stream << ", CL_TRUE, 0, vector_size_bytes_" << i; - stream << ", source_" << i << ".data());\n"; - } - stream << "\n"; - - // Running Kernel - PrintIndent(stream, indent); - stream << func->name << "("; - stream << "cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),"; - for (int i = 0; i < args.size(); i++) { - stream << "buffer_" << i; - if (i != args.size()-1) - stream << ", "; - } - stream << ");\n"; - PrintIndent(stream, indent); - stream << "q.finish();\n"; - stream << "\n"; - - // Copying Device result data to Host memory - for (int i = 0;i < args.size(); i++) { - PrintIndent(stream, indent); - stream << "q.enqueueReadBuffer(buffer_" << i; - stream << ", CL_TRUE, 0, vector_size_bytes_" << i; - stream << ", source_" << i << ".data());\n"; - } - stream << "\n"; - - // copy to shared mem - for (int i = 0;i < args.size();i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - PrintIndent(stream, indent); - stream << "shmdt("; - stream << "arg_" << i << ");\n"; - } - } - - stream << "}\n"; - stream.close(); -} -} // namespace - - -class SDAccelModuleNode final : public ModuleNode { - public: - SDAccelModuleNode(LoweredFunc func, std::string test_file) - : func_(func), test_file_(test_file) {} - - const char* type_key() const { - return "sdaccel_sw_emu"; - } - - PackedFunc GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self) final { - return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ - - if (args.size() != (int)func_->args.size()) - LOG(FATAL) << "The function should take in " << func_->args.size() - << " inputs but get " << args.size(); - std::vector arg_sizes; - std::vector arg_types; - std::vector shmids; - CollectArgInfo(args, func_, arg_sizes, arg_types); - GenSharedMem(args, shmids, arg_sizes); - LOG(CLEAN) << "Creating a Host file for SDAccel Runtime ..."; - GenHostCode(args, shmids, arg_types, func_, test_file_); - - LOG(CLEAN) << "Creating a Common folder for common.mk ..."; - system("mkdir common"); - GenCommonFile(); - - LOG(CLEAN) << "Creating a Makfile for compling the SDAccel OpenCL Code ..."; - GenMakFile(); - // TODO: find a better way to do the following - LOG(CLEAN) << "Compiling the generated SDAccel OpenCL Code ..."; - // system("make -f ./sdaccel.mk run_cpu_em"); - LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; - LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; - // system("make -f sdaccel.mk cleanall"); - FreeSharedMem(args, shmids, arg_sizes); - }); - } - - private: - LoweredFunc func_; - std::string test_file_; -}; - -Module CreateSDAccelModule(LoweredFunc func, - std::string code) { - std::shared_ptr n = - std::make_shared(func, code); - - return Module(n); -} - -} // namespace runtime -} // namespace TVM diff --git a/tvm/src/codegen/opencl/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel_module.h deleted file mode 100644 index 01f361dba..000000000 --- a/tvm/src/codegen/opencl/sdaccel_module.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef SDACCEL_MODULE_H -#define SDACCEL_MODULE_H - -#include -#include -#include "../build_common.h" - -namespace TVM { -namespace runtime { - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code); - -} // namespace runtime -} // namespace TVM - -#endif diff --git a/tvm/src/codegen/ppac/build_rv64_ppac.cc b/tvm/src/codegen/ppac/build_rv64_ppac.cc deleted file mode 100644 index c14a1cdf3..000000000 --- a/tvm/src/codegen/ppac/build_rv64_ppac.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* - * \file build_rv64_ppac.cc - */ - -#include "./codegen_rv64_ppac.h" -#include "../build_common.h" - -namespace TVM{ -namespace codegen{ - -std::string BuildRV64PPAC(Array funcs) { - CodeAnalysMerlinC ca; - CodeGenRV64PPAC cg; - for (LoweredFunc f: funcs) { - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); - cg.AddFunction(f, map_arg_type); - } - std::string code = cg.Finish(); - - LOG(WARNING) << "RV64_PPAC backend doesn't have runtime, return kernel code"; - return code; -} - -TVM_REGISTER_API("codegen.build_rv64_ppac") -.set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = BuildRV64PPAC(args[0]); - }); - -} // namespace codegen -} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/ppac/codegen_rv64_ppac.cc b/tvm/src/codegen/ppac/codegen_rv64_ppac.cc deleted file mode 100644 index 1fd5e2b6e..000000000 --- a/tvm/src/codegen/ppac/codegen_rv64_ppac.cc +++ /dev/null @@ -1,202 +0,0 @@ -/* - * \file codegen_rv64_ppac.cc - */ - -#include -#include -#include -#include -#include -#include -#include -#include "./codegen_rv64_ppac.h" -#include "../build_common.h" - -namespace TVM { -namespace codegen { - -void CodeGenRV64PPAC::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - // Write entry function name - this->stream << "void " << f->name << "("; - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - PrintType(std::get<1>(arg), this->stream); - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - this->stream << "}\n\n"; -} - -void CodeGenRV64PPAC::VisitStmt_(const For* op) { - std::string func_name; - bool is_ppac_func = false; - uint8_t i = 0; - for (auto key: op->annotate_keys) { - if (auto str = key.as()) { - if (str->value == "_ppac_func_name") { - auto name = op->annotate_values[i].as(); - func_name = name->value; - is_ppac_func = true; - break; - } - } - ++i; - } - if (is_ppac_func) { - // scan along the annotate list to find parameters - std::string ret, arg0, arg1; - int batch_num, in_block_num, out_channel_num; - i = 0; - uint8_t param_num = 0; - for (auto key: op->annotate_keys) { - if (auto str = key.as()) { - if (str->value == "_ret") { - auto v = op->annotate_values[i].as(); - ret = v->value; - ++param_num; - } else if (str->value == "_arg0") { - auto v = op->annotate_values[i].as(); - arg0 = v->value; - ++param_num; - } else if (str->value == "_arg1") { - auto v = op->annotate_values[i].as(); - arg1 = v->value; - ++param_num; - } else if (str->value == "_batch_num") { - auto v = op->annotate_values[i].as(); - batch_num = v->value; - ++param_num; - } else if (str->value == "_in_block_num") { - auto v = op->annotate_values[i].as(); - in_block_num = v->value; - ++param_num; - } else if (str->value == "_out_channel_num") { - auto v = op->annotate_values[i].as(); - out_channel_num = v->value; - ++param_num; - } - } - ++i; - } - if (param_num != 6) { - LOG(FATAL) << "PPAC function call need exactly 6 parameters but found " << param_num; - } - // print ppac function call - PrintIndent(); - stream << func_name << "(" - << ret << ", " - << arg0 << ", " - << arg1 << ", " - << batch_num << ", " - << in_block_num << ", " - << out_channel_num - << ");\n"; - return; - } - CodeGenC::VisitStmt_(op); -} - -void CodeGenRV64PPAC::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - -void CodeGenRV64PPAC::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { - CHECK_EQ(t.lanes(), 1) - << "do not support vector types"; - if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - if (t.bits() <= 8) { - os << "uint8_t"; return; - } else if (t.bits() <= 16) { - os << "uint16_t"; return; - } else if (t.bits() <= 32) { - os << "uint32_t"; return; - } else if (t.bits() <= 64) { - os << "uint64_t"; return; - } else { - LOG(WARNING) << "Casting type " << t << " to uint64_t"; - os << "uint64_t"; - return; - } - } - else if (t.is_int()) { - if (t.bits() <= 8) { - os << "int8_t"; return; - } else if (t.bits() <= 16) { - os << "int16_t"; return; - } else if (t.bits() <= 32) { - os << "int32_t"; return; - } else if (t.bits() <= 64) { - os << "int64_t"; return; - } else { - LOG(WARNING) << "Casting type " << t << " to int64_t"; - os << "int64_t"; - return; - } - } - } - os << t; -} - -} //namespace codegen -} //namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/ppac/codegen_rv64_ppac.h b/tvm/src/codegen/ppac/codegen_rv64_ppac.h deleted file mode 100644 index 881bdea05..000000000 --- a/tvm/src/codegen/ppac/codegen_rv64_ppac.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * \file codegen_rv64_ppac.h - */ - -#ifndef TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ -#define TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ - -#include -#include -#include "../codegen_c.h" -#include "../merlinc/codeanalys_merlinc.h" - -namespace TVM { -namespace codegen { - -class CodeGenRV64PPAC : public CodeGenC { - public: - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void PrintType(Type t, std::ostream& os) override; - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const IfThenElse* op) override; - void VisitStmt_(const For* op) override; -}; - -} // namespace codegen -} // namespace TVM - -#endif //TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ \ No newline at end of file diff --git a/tvm/src/lang/ir.cc b/tvm/src/lang/ir.cc index c88f8ea94..3589de195 100644 --- a/tvm/src/lang/ir.cc +++ b/tvm/src/lang/ir.cc @@ -149,8 +149,6 @@ TVM_REGISTER_NODE_TYPE(Quantize); TVM_REGISTER_NODE_TYPE(KernelDef); TVM_REGISTER_NODE_TYPE(KernelExpr); TVM_REGISTER_NODE_TYPE(KernelStmt); -TVM_REGISTER_NODE_TYPE(StreamStmt); -TVM_REGISTER_NODE_TYPE(StreamExpr); TVM_REGISTER_NODE_TYPE(Return); TVM_REGISTER_NODE_TYPE(Break); TVM_REGISTER_NODE_TYPE(While); diff --git a/tvm/src/pass/ir_mutator.cc b/tvm/src/pass/ir_mutator.cc index 89485e723..ec67aa314 100644 --- a/tvm/src/pass/ir_mutator.cc +++ b/tvm/src/pass/ir_mutator.cc @@ -202,15 +202,6 @@ Stmt IRMutator::Mutate_(const Store *op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const StreamStmt *op, const Stmt& s) { - Expr value = this->Mutate(op->value); - if (value.same_as(op->value)) { - return s; - } else { - return StreamStmt::make(op->buffer_var, value, op->stream_type, op->depth); - } -} - Stmt IRMutator::Mutate_(const Provide* op, const Stmt& s) { auto new_args = MutateArray(op->args, this); auto new_value = this->Mutate(op->value); @@ -330,8 +321,7 @@ Stmt IRMutator::Mutate_(const KernelDef *op, const Stmt &s) { if (body.same_as(op->body) && ret_void.same_as(op->ret_void)) { return s; } else { - return KernelDef::make(op->args, op->api_args, op->api_types, - body, ret_void, op->ret_type, op->name, op->channels); + return KernelDef::make(op->args, body, ret_void, op->ret_type, op->name); } } @@ -412,7 +402,6 @@ TVM_STATIC_IR_FUNCTOR(IRMutator, vtable_stmt) .DISPATCH_TO_MUTATE_STMT(Prefetch) .DISPATCH_TO_MUTATE_STMT(KernelDef) .DISPATCH_TO_MUTATE_STMT(KernelStmt) -.DISPATCH_TO_MUTATE_STMT(StreamStmt) .DISPATCH_TO_MUTATE_STMT(Return) .DISPATCH_TO_MUTATE_STMT(Break) .DISPATCH_TO_MUTATE_STMT(While) @@ -441,10 +430,6 @@ Expr IRMutator::Mutate_(const Load *op, const Expr& e) { } } -Expr IRMutator::Mutate_(const StreamExpr *op, const Expr& e) { - return e; -} - Expr IRMutator::Mutate_(const Let *op, const Expr& e) { Expr value = this->Mutate(op->value); Expr body = this->Mutate(op->body); @@ -680,7 +665,6 @@ TVM_STATIC_IR_FUNCTOR(IRMutator, vtable_expr) .DISPATCH_TO_MUTATE_EXPR(SetBit) .DISPATCH_TO_MUTATE_EXPR(SetSlice) .DISPATCH_TO_MUTATE_EXPR(Quantize) -.DISPATCH_TO_MUTATE_EXPR(StreamExpr) .DISPATCH_TO_MUTATE_EXPR(KernelExpr); } // namespace ir diff --git a/tvm/src/pass/ir_visitor.cc b/tvm/src/pass/ir_visitor.cc index 6346c6262..160cb906e 100644 --- a/tvm/src/pass/ir_visitor.cc +++ b/tvm/src/pass/ir_visitor.cc @@ -252,13 +252,6 @@ void IRVisitor::Visit_(const KernelStmt *op) { } } -void IRVisitor::Visit_(const StreamStmt *op) { - this->Visit(op->value); -} - -void IRVisitor::Visit_(const StreamExpr *op) { -} - void IRVisitor::Visit_(const Return *op) { this->Visit(op->value); } @@ -345,8 +338,6 @@ TVM_STATIC_IR_FUNCTOR(IRVisitor, vtable) .DISPATCH_TO_VISIT(KernelDef) .DISPATCH_TO_VISIT(KernelExpr) .DISPATCH_TO_VISIT(KernelStmt) -.DISPATCH_TO_VISIT(StreamStmt) -.DISPATCH_TO_VISIT(StreamExpr) .DISPATCH_TO_VISIT(Return) .DISPATCH_TO_VISIT(Break) .DISPATCH_TO_VISIT(While) diff --git a/tvm/src/pass/split_host_device.cc b/tvm/src/pass/split_host_device.cc index fdcd0c56f..534e0b695 100644 --- a/tvm/src/pass/split_host_device.cc +++ b/tvm/src/pass/split_host_device.cc @@ -81,14 +81,6 @@ class IRUseDefAnalysis : public IRMutator { return IRMutator::Mutate_(op, s); } - Stmt Mutate_(const StreamStmt *op, const Stmt& s) final { - if (!def_count_.count(op->buffer_var.get())) { - def_count_[op->buffer_var.get()] = 0; - use_count_[op->buffer_var.get()] = 0; - } - return IRMutator::Mutate_(op, s); - } - Expr Mutate_(const Let *op, const Expr& e) final { this->HandleDef(op->var.get()); Expr body = this->Mutate(op->body); @@ -117,14 +109,6 @@ class IRUseDefAnalysis : public IRMutator { return IRMutator::Mutate_(op, e); } - Expr Mutate_(const StreamExpr *op, const Expr& e) final { - if (!def_count_.count(op->buffer_var.get())) { - def_count_[op->buffer_var.get()] = 0; - use_count_[op->buffer_var.get()] = 0; - } - return IRMutator::Mutate_(op, e); - } - Stmt Mutate_(const KernelDef *op, const Stmt& s) { for (auto arg : op->args) { this->HandleDef(arg.get()); diff --git a/tvm/src/pass/stream_inference.cc b/tvm/src/pass/stream_inference.cc deleted file mode 100644 index ec18b1871..000000000 --- a/tvm/src/pass/stream_inference.cc +++ /dev/null @@ -1,345 +0,0 @@ -/*! - * Copyright (c) 2019 by Contributors - * \file remove_no_op.cc - * \brief Remove no op from the stmt - */ -#include -#include -#include -#include - -namespace TVM { -namespace ir { - -// use/def analysis to capture host xcel deps -class StreamUseDefAnalysis : public IRMutator { - public: - Stmt Mutate_(const AttrStmt *op, const Stmt& s) final { - if (op->attr_key == attr::device_scope) { - if (op->value.as()->value == "fpga") - host_scope_ = false; - return IRMutator::Mutate_(op, s); - } else { - return IRMutator::Mutate_(op, s); - } - } - - Stmt Mutate_(const LetStmt *op, const Stmt& s) final { - this->HandleDef(op->var.get()); - Stmt body = this->Mutate(op->body); - Expr value = this->Mutate(op->value); - if (body.same_as(op->body) && - value.same_as(op->value)) { - return s; - } else { - return LetStmt::make(op->var, value, body); - } - } - - Stmt Mutate_(const For *op, const Stmt& s) final { - this->HandleDef(op->loop_var.get()); - return IRMutator::Mutate_(op, s); - } - - Stmt Mutate_(const Allocate *op, const Stmt& s) final { - this->HandleDef(op->buffer_var.get()); - return IRMutator::Mutate_(op, s); - } - - Stmt Mutate_(const Store *op, const Stmt& s) final { - this->HandleUse(op->buffer_var); - return IRMutator::Mutate_(op, s); - } - - Stmt Mutate_(const StreamStmt *op, const Stmt& s) final { - this->HandleUse(op->buffer_var); - return IRMutator::Mutate_(op, s); - } - - Expr Mutate_(const Let *op, const Expr& e) final { - this->HandleDef(op->var.get()); - Expr body = this->Mutate(op->body); - Expr value = this->Mutate(op->value); - if (body.same_as(op->body) && - value.same_as(op->value)) { - return e; - } else { - return Let::make(op->var, value, body); - } - } - - Expr Mutate_(const Variable *op, const Expr& e) final { - this->HandleUse(e); - return IRMutator::Mutate_(op, e); - } - - Expr Mutate_(const Load *op, const Expr& e) final { - this->HandleUse(op->buffer_var); - return IRMutator::Mutate_(op, e); - } - - Expr Mutate_(const StreamExpr *op, const Expr& e) final { - this->HandleUse(op->buffer_var); - return IRMutator::Mutate_(op, e); - } - - Stmt Mutate_(const KernelDef *op, const Stmt& s) { - for (auto arg : op->args) { - this->HandleDef(arg.get()); - } - Stmt body = this->Mutate(op->body); - for (auto arg : op->args) { - xcel_def_count_[arg.get()] = 0; - } - return s; - } - - void HandleDef(const Variable* v) { - if (host_scope_) { - CHECK(!host_def_count_.count(v)) - << "variable " << v->name_hint - << " has already been defined, the Stmt is not SSA"; - CHECK(!host_use_count_.count(v)) - << "variable " << v->name_hint - << " has been used before definition!"; - host_use_count_[v] = 0; - host_def_count_[v] = 1; - } else { - CHECK(!xcel_def_count_.count(v)) - << "variable " << v->name_hint - << " has already been defined, the Stmt is not SSA"; - CHECK(!xcel_use_count_.count(v)) - << "variable " << v->name_hint - << " has been used before definition!"; - xcel_use_count_[v] = 0; - xcel_def_count_[v] = 1; - } - } - - void HandleUse(const Expr& v) { - CHECK(v.as()); - Var var(v.node_); - if (host_scope_) { - auto it = host_use_count_.find(var.get()); - if (it != host_use_count_.end()) { - if (it->second >= 0) { - ++it->second; - } - } else { - host_undefined_.push_back(var); - host_use_count_[var.get()] = -1; - } - } else { - auto it = xcel_use_count_.find(var.get()); - if (it != xcel_use_count_.end()) { - if (it->second >= 0) { - ++it->second; - } - } else { - xcel_undefined_.push_back(var); - xcel_use_count_[var.get()] = -1; - } - } - } - - bool host_scope_{true}; - Array host_undefined_; - Array xcel_undefined_; - std::unordered_map host_use_count_; - std::unordered_map host_def_count_; - std::unordered_map xcel_use_count_; - std::unordered_map xcel_def_count_; -}; - - -class StreamMutator : public IRMutator { - public: - explicit StreamMutator(int bus_bandwidth) { - bus_bandwidth_ = bus_bandwidth; - } - // move device attr to allocate level - Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - // if (op->attr_key == attr::device_scope) - // return stmt.as()->body; - return stmt; - } - - Stmt Mutate_(const For* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - auto extent = op->extent.as()->value; - auto min = op->min.as()->value; - // mutate sender: split and block inner loop - if (auto stream_op = op->body.as()) { - if (extent - min > bus_bandwidth_) { - LOG(WARNING) << "large"; - } else { - } - // mutate receiver : (StreamExpr + For(Store = GetSlice)) - } else if (auto store_op = op->body.as()) { - if (store_op->value.as() == nullptr) return stmt; - if (extent - min > bus_bandwidth_) { - LOG(WARNING) << "large"; - } else { - return stmt; - // allocate intermediate buffer - VarExpr new_var(store_op->buffer_var.get()->name_hint + "_save"); - Expr new_load = Load::make(store_op->buffer_var.type(), new_var, 0, const_true()); - Stmt new_store = Store::make(store_op->buffer_var, new_load, - store_op->index, store_op->predicate); - Stmt new_for = For::make(op->loop_var, op->min, op->extent, op->for_type, - op->device_api, new_store); - // save stream data into intermediate buffer - Stmt read_in = Store::make(new_var, store_op->value, - Expr(0), const_true()); - // allocate intermediate buffer - return Allocate::make(new_var, - store_op->value.type(), - {make_const(Int(bus_bandwidth_), 1)}, - const_true(), Block::make(read_in, new_for)); - } - } - return stmt; - } - - Stmt Mutate_(const StreamStmt* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - const Variable* v = op->buffer_var.get(); - stream_type_map_[v] = op->buffer_var.type(); - return stmt; - } - - Expr Mutate_(const StreamExpr* op, const Expr& e) final { - Expr expr = IRMutator::Mutate_(op, e); - op = expr.as(); - const Variable* v = op->buffer_var.get(); - stream_type_map_[v] = op->buffer_var.type(); - return expr; - } - private: - int bus_bandwidth_; - bool is_host_{true}; - std::unordered_map stream_type_map_; -}; - -// Mark the statment scope of each stage. -class StreamInferer : public IRMutator { - public: - explicit StreamInferer(int bus_bandwidth) { - bus_bandwidth_ = bus_bandwidth; - } - - Stmt Mutate_(const Allocate* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - if (auto block = op->body.as()) { - if (auto producer = block->first.as()){ - if (const AttrStmt* attr_stmt = producer->body.as()) { - if (const AttrStmt* device_attr = attr_stmt->body.as()) { - if (device_attr->attr_key == attr::device_scope) { - // mutate allocate body - StreamMutator mutator(bus_bandwidth_); - // allocate stream for host - Stmt new_body = mutator.Mutate(op->body); - Stmt new_stmt = Allocate::make(op->buffer_var, - op->type, - op->extents, - op->condition, - new_body); - return AttrStmt::make(device_attr->node, - attr::device_scope, - device_attr->value, - new_stmt); - } - } - } - } - } - return stmt; - } - - // Stmt Mutate_(const ProducerConsumer* op, const Stmt& s) final { - // Stmt stmt = IRMutator::Mutate_(op, s); - // op = stmt.as(); - // return is_no_op(op->body) ? op->body : stmt; - // } - - // Stmt Mutate_(const Store* op, const Stmt& s) final { - // Stmt stmt = IRMutator::Mutate_(op, s); - // op = stmt.as(); - // auto it = var_remap_.find(op->buffer_var.get()); - // if (it != var_remap_.end() && - // !it->second.same_as(op->buffer_var)) { - // CHECK(it->second.as()); - // VarExpr buf_var(it->second.node_); - // if (has_stencil_) outputs_.insert(buf_var); - // return Store::make(buf_var, op->value, op->index, op->predicate); - // } else { - // return stmt; - // } - // } - - // Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { - // if (op->attr_key == attr::realize_scope) { - // storage_scope_[op->node.get()] = op->value.as()->value; - // return this->Mutate(op->body); - // } else if (op->attr_key == attr::double_buffer_scope) { - // Operation func(op->node.node_); - // Stmt body = Mutate(op->body); - // for (int i = 0; i < func->num_outputs(); ++i) { - // TensorKey key{func, i}; - // auto it = buf_map_.find(key); - // CHECK(it != buf_map_.end()) - // << "Cannot find allocated buffer for " << key.f; - // body = AttrStmt::make( - // it->second.buffer->data, op->attr_key, op->value, body); - // } - // return body; - // } else if (op->attr_key == attr::thread_extent) { - // IterVar iv(op->node.node_); - // ThreadScope ts = ThreadScope::make(iv->thread_tag); - // curr_thread_scope_.push_back(ts); - // Stmt stmt = IRMutator::Mutate_(op, s); - // curr_thread_scope_.pop_back(); - // return stmt; - // } else if (op->attr_key == attr::buffer_bind_scope) { - - // Stmt Mutate_(const For* op, const Stmt& s) final { - // Stmt stmt = IRMutator::Mutate_(op, s); - // op = stmt.as(); - // return is_no_op(op->body) ? MakeEvaluate({op->min, op->extent}) : stmt; - // } - - private: - int bus_bandwidth_; - Stmt MakeEvaluate(Expr value) { - if (HasSideEffect(value)) { - return Evaluate::make(value); - } else { - return Evaluate::make(0); - } - } - Stmt MakeEvaluate(const Array& values) { - Stmt stmt; - for (Expr e : values) { - if (HasSideEffect(e)) { - if (stmt.defined()) { - stmt = Block::make(stmt, Evaluate::make(e)); - } else { - stmt = Evaluate::make(e); - } - } - } - return stmt.defined() ? stmt : Evaluate::make(0); - } -}; - -Stmt InferStream(Stmt stmt, - int bus_bandwidth) { - return StreamInferer(bus_bandwidth).Mutate(stmt); -} - -} // namespace ir -} // namespace TVM diff --git a/tvm/src/schedule/compute_primitive.h b/tvm/src/schedule/compute_primitive.h index e7167257c..e65885462 100644 --- a/tvm/src/schedule/compute_primitive.h +++ b/tvm/src/schedule/compute_primitive.h @@ -33,14 +33,6 @@ Stmt PerformComputeAt(Stmt& producer, size_t& attach_level, std::unordered_map& sub); -Stmt StreamFromProducer(Stmt& stmt, - Buffer& producer_buf, - ir::StreamType& type); - -Stmt StreamToConsumer(Stmt& stmt, - Buffer& producer_buf, - ir::StreamType& type); - Stmt UpdateIterVarAttr(Stmt& stmt, const IterVar& var, const IterVarAttrNode* node); diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index a7fc8ee72..b2bd520e7 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "./message_passing.h" #include "../pass/ir_util.h" @@ -28,33 +27,12 @@ size_t FindNodeRef(ArrayNode* array_node, const T& v) { return array_node->data.size(); } -// The replacer of cache. -class LoadReplacer : public ir::IRMutator { - public: - explicit LoadReplacer( - const std::unordered_map& vsub) - : vsub_(vsub) {} - - Expr Mutate_(const Load* op, const Expr& e) { - const Variable* var = op->buffer_var.as(); - auto it = vsub_.find(var); - if (it != vsub_.end()) - return Load::make(op->type, it->second, - op->index, op->predicate); - return e; - } - - private: - const std::unordered_map& vsub_; -}; - // The replacer of cache. class VarReplacer : public ir::IRMutator { public: explicit VarReplacer( const std::unordered_map& vsub) : vsub_(vsub) {} - Expr Mutate_(const Variable* op, const Expr& e) { auto it = vsub_.find(op); if (it != vsub_.end()) return it->second; @@ -65,17 +43,6 @@ class VarReplacer : public ir::IRMutator { const std::unordered_map& vsub_; }; -// create indices for store -Expr getIndex(std::vector indices, const Array shape) { - Expr ret = indices[0]; - Expr mul = 1; - for (size_t i = 1; i < indices.size(); i++) { - ret = Simplify(ret + indices[i] * mul); - mul = Simplify(mul * shape[i]); - } - return ret; -} - Expr InjectPredicate(const Array& predicates, Expr body) { using ir::Reduce; @@ -107,120 +74,6 @@ void ReplaceDataFlow(const Array& stages, } } -class StreamConsumer final : public IRMutator { - public: - StreamConsumer( - const std::string& target, - const ir::StreamType& type, - int channel_index) - : target_(target), type_(type), - channel_index_(channel_index) {} - - Expr Mutate_(const Load* op, const Expr& e) { - Expr index = op->index; - std::string target_name = op->buffer_var.get()->name_hint; - if (target_ == target_name) { - Array keys, values; - keys.push_back(StringImm::make("index")); - values.push_back(IntImm::make(Int(32), channel_index_)); - return StreamExpr::make(op->type, op->buffer_var, - type_, 10, keys, values); - } else { - return Load::make(op->type, op->buffer_var, - index, op->predicate); - } - } - - private: - const std::string target_; - const ir::StreamType type_; - const int channel_index_; -}; - -class StreamProducer final : public IRMutator { - public: - StreamProducer( - const std::string& target, - const ir::StreamType& type, - int channel_index) - : target_(target), type_(type), - channel_index_(channel_index) {} - - Stmt Mutate_(const Store* op, const Stmt& s) { - Expr index = op->index; - Expr value = this->Mutate(op->value); - std::string target_name = op->buffer_var.get()->name_hint; - if (target_name == target_) { - Array keys, values; - keys.push_back(StringImm::make("index")); - values.push_back(IntImm::make(Int(32), channel_index_)); - return StreamStmt::make(op->buffer_var, value, - type_, 10, keys, values); - } else { - return Store::make(op->buffer_var, value, - index, op->predicate); - } - } - - private: - const std::string target_; - const ir::StreamType type_; - const int channel_index_; -}; - -class KernelUpdater final : public IRMutator { - public: - static int channelCount; - KernelUpdater( - const int arg_pos, - const ir::StreamType& type, - const bool is_producer, - const bool kernel_channel) - : arg_pos_(arg_pos), type_(type), - is_producer_(is_producer), - kernel_channel_(kernel_channel) { - if (kernel_channel_) channel_index_ = getIndex(); - } - - Stmt Mutate_(const KernelDef* op, const Stmt& s) { - Stmt stmt = op->body; - // arr saves arg_pos and common channel idx - Array arr = op->channels; - CHECK(op->channels.size() % 2 == 0) - << "arg_pos, index pair number mismatch"; - arr.push_back(IntImm::make(Int(32), arg_pos_)); - arr.push_back(IntImm::make(Int(32), channel_index_)); - std::string target_ = op->args[arg_pos_].get()->name_hint; - if (is_producer_) { // mutate target load - StreamProducer mutator(target_, type_, channel_index_); - stmt = mutator.Mutate(stmt); - } else { // replace load consumer - StreamConsumer mutator(target_, type_, channel_index_); - stmt = mutator.Mutate(stmt); - } - // update kernel arg signature - return KernelDef::make(op->args, op->api_args, - op->api_types, stmt, op->ret_void, - op->ret_type, op->name, arr); - } - private: - const int arg_pos_; - const ir::StreamType type_; - const bool is_producer_; - const bool kernel_channel_; - int channel_index_{0}; - int getIndex() { - channelCount += 1; - int channel_num = channelCount; - if (channelCount % 2 == 0) - channel_num = channelCount - 1; - return channel_num; - } -}; - -// Initialize static channel count -int KernelUpdater::channelCount = 0; - class ParentStmtCollector final : public IRMutator { public: ParentStmtCollector( @@ -264,369 +117,6 @@ class ParentStmtCollector final : public IRMutator { const IterVar& axis_; }; -// initialize static split bound -int Schedule::split_bound = 0; - -// stream buffer data to kernel stage -void Schedule::to_stage(const Tensor& target, - /*kernel def stage*/ Stage dest, - /*position index*/int arg_pos, - StreamType stream_type, - int channel_depth, - std::string name) { - Stage target_stage = (*this)[target]; - Buffer target_buffer; - - // target stage as kernel def operator - if (const ExternOpNode* op = target_stage->op.as()) { - target_buffer = op->output_placeholders[0]; - // remove the receiver buffer (keep the device scope) - const AttrStmt* attr = op->body.as(); - Stmt scope_attr = AttrStmt::make(attr->node, attr->attr_key, - attr->value, Evaluate::make(0)); - target_stage->op = ExternOpNode::make(op->name, - "", - Array(), - op->inputs, - op->input_placeholders, - op->output_placeholders, - scope_attr); - // update dest stage body for data stream in - const ExternOpNode* destOp = dest->op.as(); - KernelUpdater mutator(arg_pos, stream_type, - /*is producer*/false, - /*inter module channel*/false); - auto new_body = mutator.Mutate(destOp->body); - dest->op = ExternOpNode::make(destOp->name, destOp->tag, - destOp->axis, destOp->inputs, - destOp->input_placeholders, - Array(), - new_body); - } -} - -// stream data between hardware modules -void Schedule::stream_to(const Tensor& target, - Stage dest, - Stage source, - StreamType stream_type, - int channel_depth, - std::string new_name) { - Stage target_stage = (*this)[target]; - std::vector consumers; - size_t num_stage = (*this)->stages.size(); - Buffer target_buffer; - std::unordered_map pos; - const ExternOpNode* destOp = dest->op.as(); - const ExternOpNode* srcOp = source->op.as(); - - // update kernel def and scope - const PlaceholderOpNode* op = target_stage->op.as(); - bool is_placeholder = op ? true : false; - if (is_placeholder) { - for (size_t i = 0; i < num_stage; i++) { - Stage s = (*this)->stages[i]; - // name matching to locate kernels - if (const ExternOpNode* op = s->op.as()) { - for (size_t j = 0; j < op->inputs.size(); j++) { - if (target == op->inputs[j]) { - target_buffer = op->input_placeholders[j]; - consumers.push_back(s); - // record streamed data pos in kernel call - if (std::regex_match(op->name, - std::regex(destOp->name + "(\\d)"))) - pos[dest] = j; - else if (std::regex_match(op->name, - std::regex(destOp->name + "(\\d)"))) - pos[source] = j; - break; - } - } - } - } - } else { // only consumed by self stage - const ExternOpNode* op = target_stage->op.as(); - target_buffer = op->output_placeholders[0]; - consumers.push_back(target_stage); - } - // mutator (is_producer false, kernel_channel true) - KernelUpdater destMutator(0, //target_buffer->name, - stream_type, false, true); - // mutate kernel def and repalce lw / st - dest->op = ExternOpNode::make(destOp->name, - destOp->tag, - destOp->axis, - destOp->inputs, - destOp->input_placeholders, - Array(), - destMutator.Mutate(destOp->body)); - // mutator (is_producer true, kernel_channel true) - KernelUpdater srcMutator(0, //target_buffer->name, - stream_type, true, true); - source->op = ExternOpNode::make(srcOp->name, - srcOp->tag, - srcOp->axis, - srcOp->inputs, - srcOp->input_placeholders, - Array(), - srcMutator.Mutate(srcOp->body)); - // update kernel call ops - for (auto s : consumers) { - const ExternOpNode* op = s->op.as(); - Stmt body = AttrStmt::make(VarExpr(), - "device_scope", - StringImm::make("fpga"), - op->body); - // not alloc buffer for kernel call - s->op = ExternOpNode::make(op->name, - op->tag, - op->axis, - op->inputs, - op->input_placeholders, - Array(), - body); - } -} - -// move data to device -Tensor Schedule::move_to(const Tensor& target, - DeviceType device_type, - StreamType stream_type, - int channel_depth, - std::string new_name) { - Stage target_stage = (*this)[target]; - std::vector consumers; - size_t num_stage = (*this)->stages.size(); - size_t min_pos = num_stage; - ArrayNode* stages = (*this)->stages.CopyOnWrite(); - Buffer target_buffer; - - // create producer and consumer stages for placeholder - const PlaceholderOpNode* op = target_stage->op.as(); - bool is_placeholder = op ? true : false; - if (is_placeholder) { - min_pos = 0; - for (size_t i = 0; i < num_stage; i++) { - Stage s = (*this)->stages[i]; - if (const ExternOpNode* op = s->op.as()) { - for (size_t j = 0; j < op->inputs.size(); j++) { - if (target == op->inputs[j]) { - target_buffer = op->input_placeholders[j]; - consumers.push_back(s); - break; - } - } - } - } - } else { // move data generated by extern op - min_pos = FindNodeRef(stages, target_stage) + 1; - const ExternOpNode* op = target_stage->op.as(); - target_buffer = op->output_placeholders[0]; - for (size_t i = 0; i < num_stage; i++) { - Stage s = (*this)->stages[i]; - if (const ExternOpNode* stage_op = s->op.as()) { - for (size_t j = 0; j < stage_op->inputs.size(); j++) { - if (op->output_placeholders[0] == stage_op->input_placeholders[j]) { - consumers.push_back(s); - break; - } - } - } - } - } - - // create sender and write into streaming channel - Array consumer_inputs; - Array consumer_input_placeholders; - Array consumer_output_placeholders; - std::string consumer_name = target_buffer->name + ".stream_send"; - Buffer consumer_buffer = BufferNode::make(Var(consumer_name, Handle()), - target->dtype, - target->shape, - Array(), - Expr(), - consumer_name, - "", 0, 0); - consumer_inputs.push_back(target); - consumer_input_placeholders.push_back(target_buffer); - consumer_output_placeholders.push_back(consumer_buffer); - - // create statement index - std::vector csm_indices; - std::vector csm_loop_vars; - for (size_t i = 0; i < target->shape.size(); i++) { - VarExpr iter(target_buffer->name + std::to_string(i)); - csm_indices.push_back(iter); - csm_loop_vars.push_back(iter); - } - Expr csm_index = getIndex(csm_indices, target->shape); - Expr load_expr = Load::make(target->dtype, - target_buffer->data, - csm_index, - UIntImm::make(UInt(1), 1)); - Stmt consumer_body = StreamStmt::make(consumer_buffer->data, - load_expr, - stream_type, - channel_depth); - - Expr sender_scope, receiver_scope; - size_t consumer_pos = min_pos; - switch (device_type) { - case DeviceType::CPU: - consumer_pos = num_stage; - sender_scope = StringImm::make("fpga"); - receiver_scope = StringImm::make("cpu"); - break; - case DeviceType::FPGA: - sender_scope = StringImm::make("cpu"); - receiver_scope = StringImm::make("fpga"); - break; - case DeviceType::GPU: - sender_scope = StringImm::make("cpu"); - receiver_scope = StringImm::make("gpu"); - break; - } - - for (size_t j = 0; j < target->shape.size(); j++) { - consumer_body = For::make( - VarExpr(csm_loop_vars[j]), - 0, target->shape[j], - ForType::Serial, - DeviceAPI::None, - consumer_body); - } - - consumer_body = AttrStmt::make( - consumer_buffer->data, - "device_scope", sender_scope, consumer_body); - - // create new stage and return stream tensors - // auto n = std::make_shared(); - // n->name = consumer_name; - // n->body = consumer_body; - // n->inputs = consumer_inputs; - // n->input_placeholders = consumer_input_placeholders; - // n->output_placeholders = consumer_output_placeholders; - // Operation consumer_op(n); - - Operation consumer_op = ExternOpNode::make(consumer_name, - "", - Array(), - consumer_inputs, - consumer_input_placeholders, - consumer_output_placeholders, - consumer_body); - Stage consumer_stage = Stage(consumer_op); - // insert sender before bound for (host,xcel <- host) case - if (device_type == DeviceType::FPGA) { - if (split_bound == 0) { - split_bound = consumer_pos + 1; - } else { // insert host sender before bound - consumer_pos = split_bound; - split_bound += 1; - } - } - stages->data.insert(stages->data.begin() + consumer_pos, consumer_stage.node_); - (*this)->stage_map.Set(consumer_op, consumer_stage); - - // build producer (receiver) stage which takes in data from streaming - // channel and provide data to orginal consumers - Array producer_inputs; - Array producer_input_placeholders; - Array producer_output_placeholders; - std::string producer_name = target_buffer->name + ".stream_recv"; - Buffer producer_buffer = BufferNode::make(Var(producer_name, Handle()), - target->dtype, - target->shape, - Array(), - Expr(), - producer_name, - "", 0, 0); - // producer_inputs.push_back(consumer_op.output(0)); - // producer_input_placeholders.push_back(consumer_buffer); - producer_output_placeholders.push_back(producer_buffer); - // streaming producer tensor reading from placeholder - Expr stream = StreamExpr::make(target->dtype, - consumer_buffer->data, - stream_type, - channel_depth); - // create for loops for tensor init - std::vector indices; - std::vector loop_vars; - for (size_t i = 0; i < target->shape.size(); i++) { - VarExpr iter(target_buffer->name + std::to_string(i)); - indices.push_back(iter); - loop_vars.push_back(iter); - } - Expr index = getIndex(indices, target->shape); - // store op initialized with variable node - Stmt for_stmt = Store::make(producer_buffer->data, - stream, index, - UIntImm::make(UInt(1), 1)); - for (size_t j = 0; j < target->shape.size(); j++) { - for_stmt = For::make( - VarExpr(loop_vars[j]), - 0, target->shape[j], - ForType::Serial, - DeviceAPI::None, - for_stmt); - } - - // attr annotates new scope - Stmt body = AttrStmt::make( - target_buffer->data, - "device_scope", receiver_scope, for_stmt); - Tensor producer = ExternOpNode::make(producer_buffer->name, - "", - Array(), - producer_inputs, - producer_input_placeholders, - producer_output_placeholders, - body).output(0); - - // recv stage creation + return tensor - Stage producer_stage = Stage(producer->op); - size_t pos = FindNodeRef(stages, consumer_stage); - if (split_bound == 0 || device_type == DeviceType::CPU) - pos = pos + 1; - else pos = split_bound + 1; - stages->data.insert(stages->data.begin() + pos, producer_stage.node_); - (*this)->stage_map.Set(producer->op, producer_stage); - - // update consumer stages with new tensor and buffer - std::unordered_map vsub; - vsub[target_buffer->data.as()] = producer_buffer->data; - for (size_t i = 0; i < consumers.size(); i++) { - Stage s = consumers[i]; - Array new_inputs; - Array new_input_placeholders; - const ExternOpNode* op = s->op.as(); - new_inputs.push_back(producer); - new_input_placeholders.push_back(producer_buffer); - for (size_t j = 0; j < op->inputs.size(); j++) { - if (target != op->inputs[j]) { - new_inputs.push_back(op->inputs[j]); - new_input_placeholders.push_back(op->input_placeholders[j]); - } - } - Stmt body = LoadReplacer(vsub).Mutate(op->body); - Stmt new_body = AttrStmt::make( - target_buffer->data, - "device_scope", - receiver_scope, - op->body); - s->op = ExternOpNode::make( - op->name, - op->tag, - op->axis, - new_inputs, - new_input_placeholders, - op->output_placeholders, - body); - } - return producer; -} - Tensor Schedule::reuse_at(const Tensor& target, Stage parent, IterVar axis, diff --git a/tvm/src/schedule/schedule_ops.cc b/tvm/src/schedule/schedule_ops.cc index 8156844f5..b4f8e7468 100644 --- a/tvm/src/schedule/schedule_ops.cc +++ b/tvm/src/schedule/schedule_ops.cc @@ -349,7 +349,7 @@ Stmt ScheduleOps( << "call schedule.normalize before scheduleops"; CHECK(s->op.defined()); // no need to specify place holder op. - if (auto op = s->op.as()) continue; + if (s->op.as()) continue; // Remove grouping sugar, get the real attach spec. Stage attach_spec = s.GetAttachSpec(); diff --git a/tvm/src/template/sdaccel/CLKernel.cpp b/tvm/src/template/sdaccel/CLKernel.cpp deleted file mode 100644 index 84cf29465..000000000 --- a/tvm/src/template/sdaccel/CLKernel.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/*===============================================================*/ -/* */ -/* CLKernel.cpp */ -/* */ -/* Defines the object class for an OpenCL kernel */ -/* */ -/*===============================================================*/ - -#include "CLKernel.h" -#include - -namespace rosetta -{ - // initialize the kernel from binary file - CLKernel::CLKernel(cl_context context, cl_program program, std::string kernel_name, cl_device_id device_id) - { - printf("Creating kernel %s ... ", kernel_name.c_str()); - - int err; - - // set the name and device ID - this->device_id = device_id; - this->kernel_name = kernel_name; - - // Create the compute kernel in the program we wish to run - kernel = clCreateKernel(program, kernel_name.c_str(), &err); - if (!kernel || err != CL_SUCCESS) - { - printf("Error: Failed to create compute kernel!\n"); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - - printf("Done!\n"); - } - - void CLKernel::set_global(int global_work_size[3]) - { - printf("Set global work size of kernel %s to [%d, %d, %d]\n", kernel_name.c_str(), - global_work_size[0], global_work_size[1], global_work_size[2]); - - for (int i = 0; i < 3; i ++ ) - this->global_size[i] = global_work_size[i]; - } - - void CLKernel::set_local(int local_work_size[3]) - { - printf("Set local work size of kernel %s to [%d, %d, %d]\n", kernel_name.c_str(), - local_work_size[0], local_work_size[1], local_work_size[2]); - - for (int i = 0; i < 3; i ++ ) - this->local_size[i] = local_work_size[i]; - } - - std::string CLKernel::get_name() - { - return this->kernel_name; - } - - void CLKernel::releaseKernel() - { - printf("Release kernel %s ... ", kernel_name.c_str()); - // release kernel - clReleaseKernel(kernel); - printf("Done!\n"); - } -} diff --git a/tvm/src/template/sdaccel/CLKernel.h b/tvm/src/template/sdaccel/CLKernel.h deleted file mode 100644 index 2933913c8..000000000 --- a/tvm/src/template/sdaccel/CLKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/*===============================================================*/ -/* */ -/* CLKernel.h */ -/* */ -/* Defines the object class for an OpenCL kernel */ -/* */ -/*===============================================================*/ - - -#ifndef __CLKernel__Harness__ -#define __CLKernel__Harness__ - -// standard headers -#include -#include -#include -// opencl header -#include -// CLMemObj is a member of this class -#include "CLMemObj.h" - -namespace rosetta -{ - - // wrapper class around an OpenCL kernel - class CLKernel - { - - friend class CLWorld; - - public: - - // constructor - // compiles the kernel - CLKernel(cl_context context, cl_program program, std::string kernel_name, cl_device_id device_id); - - // set global/local work group size - void set_global(int global_work_size[3]); - void set_local(int local_work_size[3]); - - // get kernel name - std::string get_name(); - - protected: - - // set cl_mem argument - int set_mem_arg(int id, cl_mem mem_obj) - { - int err; - err = clSetKernelArg(this->kernel, id, sizeof(mem_obj), &mem_obj); - if (err != CL_SUCCESS) - { - printf("Error: Failed to set kernel argument %d for kernel %s!\n", id, (this->kernel_name).c_str()); - printf("Error Code %d\n", err); - return EXIT_FAILURE; - } - - return err; - } - - // set memory arguments for this kernel - template - int set_const_arg(int id, T& mem_obj) - { - int err; - // printf("%d\n", mem_obj); - err = clSetKernelArg(this->kernel, id, sizeof(mem_obj), &mem_obj); - printf("****************\n"); - printf("%d\n", err); - if (err != CL_SUCCESS) - { - printf("Error: Failed to set kernel argument %d for kernel %s!\n", id, (this->kernel_name).c_str()); - printf("Error Code %d\n", err); - return EXIT_FAILURE; - } - - return err; - } - - void releaseKernel(); - - private: - - // global and local work group size - size_t global_size[3]; - size_t local_size[3]; - - // kernel information and objects - std::string kernel_name; - cl_device_id device_id; // target device id - cl_kernel kernel; // compute kernel - - }; - -} -#endif /* defined(__CLKernel__Harness__) */ diff --git a/tvm/src/template/sdaccel/CLMemObj.cpp b/tvm/src/template/sdaccel/CLMemObj.cpp deleted file mode 100644 index a6fdecf4a..000000000 --- a/tvm/src/template/sdaccel/CLMemObj.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/*===============================================================*/ -/* */ -/* CLMemObj.cpp */ -/* */ -/* Implements the member functions of CLMemObj class */ -/* */ -/*===============================================================*/ - - -#include "CLMemObj.h" - -namespace rosetta -{ - // default constructor, initializes everything to 0 - CLMemObj::CLMemObj() - { - this->mem_data = nullptr; - this->elt_size = 0; - this->length = 0; - this->flags = 0; - this->bank = nullptr; - } - - // meaningful constructor, initialize data info constants - CLMemObj::CLMemObj(void *mem_data, int elt_size, int length, cl_mem_flags flags, cl_mem_ext_ptr_t* xil_ext ) - { - this->mem_data = mem_data; - this->elt_size = elt_size; - this->length = length; - this->flags = flags; - // can use Xilinx mem extensions to specify DDR bank - if (xil_ext != nullptr) - { - this->bank = new cl_mem_ext_ptr_t; - this->bank->flags = xil_ext->flags; - this->bank->obj = xil_ext->obj; - this->bank->param = 0; - } - else - this->bank = nullptr; - } - - // return the pointer to data - void * CLMemObj::get_data() { return mem_data; } - - // get size of each element - int CLMemObj::get_element_size() { return elt_size; } - - // get the number of elements in the buffer - int CLMemObj::get_length() { return length; } - - // get OpenCL memory flags - cl_mem_flags CLMemObj::get_flags() { return flags; } - - // get xilinx memory extension pointer - cl_mem_ext_ptr_t* CLMemObj::get_xil_ext_ptr() { return bank; } -} diff --git a/tvm/src/template/sdaccel/CLMemObj.h b/tvm/src/template/sdaccel/CLMemObj.h deleted file mode 100644 index 30e564aff..000000000 --- a/tvm/src/template/sdaccel/CLMemObj.h +++ /dev/null @@ -1,57 +0,0 @@ -/*===============================================================*/ -/* */ -/* CLMemObj.h */ -/* */ -/* Defines the object class for an OpenCL memory buffer */ -/* */ -/*===============================================================*/ - - -#ifndef __CLMemObj__Harness__ -#define __CLMemObj__Harness__ - -// standard header for command line output -#include -// opencl header -#include -// xilinx opencl extension header -#include - -namespace rosetta -{ - // wrapper class around cl_mem - class CLMemObj - { - - friend class CLWorld; - - public: - - // default constructor - CLMemObj (); - // a meaningful constructor - CLMemObj (void* mem_data, int elt_size, int length, cl_mem_flags flags, cl_mem_ext_ptr_t* xil_ext = nullptr); - - // get information about the buffer - void* get_data(); - int get_element_size(); - int get_length(); - cl_mem_flags get_flags(); - cl_mem_ext_ptr_t* get_xil_ext_ptr(); - - private: - - // pointer to data - void *mem_data; - // size of each element - int elt_size; - // number of elements - int length; - // OpenCL memory flag - cl_mem_flags flags; - // Xilinx extension describing bank assignment - cl_mem_ext_ptr_t* bank; - }; -} - -#endif /* defined(__CLMemObj__Harness__) */ diff --git a/tvm/src/template/sdaccel/CLWorld.cpp b/tvm/src/template/sdaccel/CLWorld.cpp deleted file mode 100644 index 7be386df2..000000000 --- a/tvm/src/template/sdaccel/CLWorld.cpp +++ /dev/null @@ -1,401 +0,0 @@ -/*===============================================================*/ -/* */ -/* CLWorld.cpp */ -/* */ -/* Implementation of the CLWorld class */ -/* */ -/*===============================================================*/ - -#include "CLWorld.h" - -namespace rosetta -{ - // default constructor - // make sure it does something meaningful - CLWorld::CLWorld() - { - // default: run on alpha data 7v3 board - this->target_device_name = "xilinx:adm-pcie-7v3:1ddr:3.0"; - this->device_type = CL_DEVICE_TYPE_ACCELERATOR; - - // configure the OpenCL runtime - createWorld(); - } - - // meaningful constructor - // user specifies device - CLWorld::CLWorld(std::string target_device_name, cl_device_type device_type) - { - this->target_device_name = target_device_name; - this->device_type = device_type; - createWorld(); - } - - // get the compute device - cl_device_id CLWorld::getDevice() - { - return this->device_id; - } - - // get context - cl_context CLWorld::getContext() - { - return this->context; - } - - // get compute program - cl_program CLWorld::getProgram() - { - return this->program; - } - - // insert a new memory object - int CLWorld::addMemObj(CLMemObj &new_mem_obj) - { - int err; - - printf("Adding memory object into the world ... "); - - // first push the CLMemObj object into our vector - mem_objs.push_back(new_mem_obj); - - // then create the actual cl_mem buffer, push it into another vector - cl_mem buf; - - buf = clCreateBuffer(context, new_mem_obj.flags, new_mem_obj.elt_size * new_mem_obj.length, new_mem_obj.bank, &err); - if (err != CL_SUCCESS) - { - printf("Error creating buffer for memory object %d!\n", mem_objs.size()-1); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - - cl_mem_buffers.push_back(buf); - - // write the buffer onto the device if needed - if ((new_mem_obj.flags != CL_MEM_WRITE_ONLY) && (new_mem_obj.mem_data != nullptr)) - { - err = clEnqueueWriteBuffer(cmd_queue, buf, true, 0, new_mem_obj.elt_size * new_mem_obj.length, - new_mem_obj.mem_data, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error writing buffer %d onto the device!\n", mem_objs.size()-1); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - } - - printf("Done!\n"); - - return (mem_objs.size() - 1); - } - - int CLWorld::updateMemObj(int mem_idx) - { - printf("Updating mem object %d ... ", mem_idx); - - // write the buffer onto the device if needed - if (mem_objs[mem_idx].flags != CL_MEM_WRITE_ONLY) - { - int err = clEnqueueWriteBuffer(cmd_queue, cl_mem_buffers[mem_idx], true, 0, - mem_objs[mem_idx].elt_size * mem_objs[mem_idx].length, - mem_objs[mem_idx].mem_data, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error writing buffer %d onto the device!\n", mem_idx); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - } - else - printf("Buffer %d is write_only! Not updating it ... \n", mem_idx); - - return EXIT_SUCCESS; - } - - int CLWorld::readMemObj(int mem_idx) - { - printf("Reading mem object %d into host buffers ... ", mem_idx); - - int err = clEnqueueReadBuffer(cmd_queue, cl_mem_buffers[mem_idx], true, 0, - mem_objs[mem_idx].elt_size * mem_objs[mem_idx].length, - mem_objs[mem_idx].mem_data, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error reading kernel buffer %d!\n", mem_idx); - printf("Error code %d\n", err); - exit(EXIT_FAILURE); - } - - printf("Done!\n"); - - return err; - } - - - // create compute program from a file - // return error code - int CLWorld::addProgram(std::string filename) - { - printf("Adding binary program into the world ... "); - - // load the file - size_t code_size = (size_t) load_file_to_memory(filename.c_str()); - - // start to compile - int err; - cl_int create_binary_status; - - // Create the compute program from the source buffer - program = clCreateProgramWithBinary(context, 1, &device_id, (const size_t *) &code_size, - (const unsigned char **) &kernel_code, &create_binary_status, &err); - if (!program) - { - printf("Error: Failed to create compute program!\n"); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - - // Build the program executable - err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); - if (err != CL_SUCCESS) - { - size_t len; - char buffer[2048]; - - printf("Error: Failed to build program executable!\n"); - printf("Error Code %d\n", err); - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); - printf("%s\n", buffer); - exit(EXIT_FAILURE); - } - - printf("Done!\n"); - - return err; - } - - // insert a kernel into the world - // return the position of the kernel in the vector - int CLWorld::addKernel(CLKernel &new_kernel) - { - printf("Adding kernel %s into the world ... ", new_kernel.get_name().c_str()); - - kernels.push_back(new_kernel); - - printf("Done!\n"); - - return (kernels.size() - 1); - } - - // methods to set kernel arguments - // memory argument - int CLWorld::setMemKernelArg(int kernel_id, int pos, int arg_id) - { - printf("Set mem arg %d for kernel %d with mem object %d ... ", pos, kernel_id, arg_id); - - int err = kernels[kernel_id].set_mem_arg(pos, cl_mem_buffers[arg_id]); - if (err != CL_SUCCESS) - { - printf("Error setting kernel argument!\n"); - printf("Error code %d\n", err); - exit(EXIT_FAILURE); - } - - printf("Done!\n"); - - return err; - } - - // run all kernels - // return error code - int CLWorld::runKernels(bool flush) - { - printf("Start kernel execution ... "); - - int err; - - // wait for previous write buffer tasks to finish - printf("Waiting for queue... \n"); - clFinish(cmd_queue); - - // enqueue all the kernels - // temporarily we assume kernels won't have any dependency between them - // or the dependency is handled inside kernels (such as pipes, etc. ) - for (int i = 0; i < kernels.size(); i ++ ) - { - printf("Start kernel %d!\n", i); - err = clEnqueueNDRangeKernel(cmd_queue, kernels[i].kernel, 3, NULL, kernels[i].global_size, kernels[i].local_size, - 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error enqueuing kernel %d!\n", i); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - } - - // wait for them to finish - printf("Waiting for kernels ... \n"); - clFinish(cmd_queue); - - // remove all of them from the vector - // so that this function can be called multiple times - // at a cost that kernels won't be released automatically - if (flush) - { - int total_size = kernels.size(); - for (int i = 0; i < total_size; i ++ ) - kernels.pop_back(); - } - - printf("Done!\n"); - - return err; - } - - // create runtime environment - int CLWorld::createWorld() - { - printf("Initializing OpenCL runtime environment ... "); - - int err; - - // scan the machine for available OpenCL platforms - cl_uint platform_cnt; - cl_platform_id platforms[16]; - err = clGetPlatformIDs(16, platforms, &platform_cnt); - if (err != CL_SUCCESS) - { - printf("Error: Failed to find an OpenCL platform!\n"); - printf("Error Code %d\n", err); - printf("Test failed\n"); - exit(EXIT_FAILURE); - } - printf("INFO: Found %d platforms\n", platform_cnt); - - - // find the target device - char device_name[1024]; - cl_device_id devices[16]; - cl_uint device_cnt; - bool found_device = false; - // scan all platforms - for (int p = 0; (p < platform_cnt) & (!found_device); p ++ ) - { - err = clGetDeviceIDs(platforms[p], this->device_type, 16, devices, &device_cnt); - if (err != CL_SUCCESS) - { - printf("Error: Failed to create a device group for platform %d!\n", p); - printf("Error Code %d\n", err); - printf("Test failed\n"); - exit(EXIT_FAILURE); - } - // iterate through all devices on the platform - for (int d = 0; (d < device_cnt) & (!found_device); d ++ ) - { - err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 1024, device_name, 0); - if (err != CL_SUCCESS) - { - printf("Error: Failed to get device name for device %d on platform %d!\n", d, p); - printf("Error Code %d\n", err); - printf("Test failed\n"); - exit(EXIT_FAILURE); - } - - if (std::string(device_name) == this->target_device_name) - { - this->platform = platforms[p]; - this->device_id = devices[d]; - found_device = true; - printf("Selected device %d on platform %d as target device!\n", d, p); - } - } - } - - if (!found_device) - { - printf("Error: Target device %s is not found!\n", (this->target_device_name).c_str()); - exit(EXIT_FAILURE); - } - - // create context and command queue - this->context = clCreateContext(0, 1, &(this->device_id), 0, 0, &err); - if (!(this->context)) - { - printf("Error: Failed to create a compute context!\n"); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - this->cmd_queue = clCreateCommandQueue(this->context, this->device_id, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - &err); - if (!(this->cmd_queue)) - { - printf("Error: Failed to create a command queue!\n"); - printf("Error Code %d\n", err); - exit(EXIT_FAILURE); - } - - printf("Done!\n"); - - return err; - } - - // read kernel binary file into memory - int CLWorld::load_file_to_memory(const char *filename) - { - int size = 0; - FILE *f = fopen(filename, "rb"); - if (f == NULL) - { - kernel_code = NULL; - printf("Can not open kernel file!\n"); - exit(-1); - } - fseek(f, 0, SEEK_END); - size = ftell(f); - printf("Size of the file is %ld\n", size); - fseek(f, 0, SEEK_SET); - kernel_code = new char[size+1]; - if ((unsigned int) size != fread(kernel_code, sizeof(char), size, f)) - { - delete []kernel_code; - printf("Reading kernel failed!\n"); - exit(-2); - } - fclose(f); - (kernel_code)[size] = 0; - return size; - } - - - // release all runtime constructs - void CLWorld::releaseWorld() - { - printf("Cleaning up OpenCL opjects ... "); - - // release memory objects - for (int i = 0; i < cl_mem_buffers.size(); i ++ ) - clReleaseMemObject(cl_mem_buffers[i]); - - // release program - delete []kernel_code; - clReleaseProgram(program); - - // release kernels - for (int i = 0; i < kernels.size(); i ++ ) - kernels[i].releaseKernel(); - - // release device and context - clReleaseCommandQueue(cmd_queue); - clReleaseContext(context); - - printf("Done!\n"); - } - -} - - - - diff --git a/tvm/src/template/sdaccel/CLWorld.h b/tvm/src/template/sdaccel/CLWorld.h deleted file mode 100644 index 9624687aa..000000000 --- a/tvm/src/template/sdaccel/CLWorld.h +++ /dev/null @@ -1,129 +0,0 @@ -/*===============================================================*/ -/* */ -/* CLWorld.h */ -/* */ -/* Defines the object class for OpenCL context */ -/* */ -/*===============================================================*/ - - -#ifndef __CLWorld__Harness__ -#define __CLWorld__Harness__ - -// standard headers -#include -#include -#include -// opencl header -#include -// CLKernel and CLMemObj are members of this class -#include "CLKernel.h" -#include "CLMemObj.h" - -namespace rosetta -{ - - class CLWorld - { - - public: - - // default constructor - CLWorld(); - - // meaningful constructor - CLWorld(std::string target_device_name, cl_device_type device_type); - - // get the compute device associated with this world - cl_device_id getDevice(); - - // get the compute context associated with this world - cl_context getContext(); - - // get the binary program - cl_program getProgram(); - - // insert a compute program - int addProgram(std::string filename); - - // insert a kernel - int addKernel(CLKernel &new_kernel); - - // insert a memory object - int addMemObj(CLMemObj &new_mem_obj); - - // update a memory object (write new value) - int updateMemObj(int mem_id); - - // read a memory object - int readMemObj(int mem_id); - - // set memory kernel argument - int setMemKernelArg(int kernel_id, int pos, int mem_id); - - // set constant kernel argument - template - int setConstKernelArg(int kernel_id, int pos, T& arg) - { - // printf("%lu\n", arg); - printf("Set const arg %d for kernel %d ... ", pos, kernel_id); - - int err = kernels[kernel_id].set_const_arg(pos, arg); - if (err != CL_SUCCESS) - { - printf("Error setting kernel argument!\n"); - printf("Error code %d\n", err); - exit(EXIT_FAILURE); - } - - printf("Done!\n"); - - return err; - } - - // run kernels - int runKernels(bool flush = false); - - // clean up - void releaseWorld(); - - private: - - // OpenCL runtime variables - - // the platform we will use - cl_platform_id platform; - - // the device we will use - std::string target_device_name; // device name - cl_device_type device_type; // device type - cl_device_id device_id; // device id - - // compute context - cl_context context; - - // command queue - cl_command_queue cmd_queue; - - // binary program for the device - char* kernel_code; - cl_program program; - - // kernels - std::vector kernels; - - // memory objects - std::vector mem_objs; - // actual OpenCL memory buffers - std::vector cl_mem_buffers; - - // function to create the OpenCL runtime - int createWorld(); - - // load binary file into memory - int load_file_to_memory(const char *filename); - }; - -} - -#endif diff --git a/tvm/src/template/sdaccel/Makefile b/tvm/src/template/sdaccel/Makefile deleted file mode 100644 index 282f67921..000000000 --- a/tvm/src/template/sdaccel/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -# Set kernel name -KERNEL_NAME = App - -# Set host source and headers -# HOST_SRC_CPP = ./src/host/digit_recognition.cpp ./src/host/utils.cpp ./src/host/check_result.cpp -HOST_SRC_CPP = host.cpp utils.cpp -# HOST_SRC_H = ./src/host/utils.h ./src/host/check_result.h ./src/host/typedefs.h ./src/host/testing_data.h \ - ./src/host/training_data.h -HOST_SRC_H = utils.h -# DATA = ./data/*.dat - - -# Set host code include paths -HOST_INC = -I/opt/Xilinx/Vivado/2018.2.op2258646/include/ -HOST_LIB = -L/opt/Xilinx/Vivado/2018.2.op2258646/lib/ - -# Set kernel file -OCL_KERNEL_SRC = interface.cpp -# OCL_KERNEL_H = ./src/host/typedefs.h -# SDSOC_KERNEL_SRC = ./src/sdsoc/digitrec.cpp -# SDSOC_KERNEL_H = ./src/host/typedefs.h -# SW_KERNEL_SRC = ./src/sw/digitrec_sw.cpp -# SW_KERNEL_H = ./src/host/typedefs.h ./src/sw/digitrec_sw.h - -# Set opencl kernel arguments -# log: removed --report system -OCL_KERNEL_ARGS = --max_memory_ports all - -#------------------------- -# Leave the rest to harness -#------------------------- -include harness.mk - diff --git a/tvm/src/template/sdaccel/harness.mk b/tvm/src/template/sdaccel/harness.mk deleted file mode 100644 index 23856f9c7..000000000 --- a/tvm/src/template/sdaccel/harness.mk +++ /dev/null @@ -1,196 +0,0 @@ -# ======================================== Check Xilinx SDX Environment Settings ================================================== # -ifndef XILINX_SDX - $(error Environment variable XILINX_SDX is required and should point to SDx install area) -endif - -# =============================================== Tools Used in Rosetta =========================================================== # - -# sdaccel tools -OCL_CXX = xcpp -XOCC = xocc - -# sdsoc tools -SDSXX = sds++ - -# default sw compiler -SW_CXX = g++ - -# ============================================= SDAccel Platform and Target Settings ============================================== # - -# Set Default OpenCL device and platform -USR_PLATFORM = n -OCL_DEVICE = xilinx:adm-pcie-7v3:1ddr:3.0 -OCL_PLATFORM = one_of_default_platforms - -# Check if the user specified opencl platform -ifneq ($(OCL_PLATFORM), one_of_default_platforms) - USR_PLATFORM=y -endif - -# Check OCL_TARGET value -OCL_TARGET = sw_emu -ifeq ($(OCL_TARGET),sw_emu) -else ifeq ($(OCL_TARGET),hw_emu) -else ifeq ($(OCL_TARGET),hw) -else - $(error "OCL_TARGET does not support the $(OCL_TARGET) value. Supported values are: sw_emu, hw_emu, hw") -endif - -# Check opencl kernel file type -OCL_KERNEL_TYPE = ocl - -ifeq ($(suffix $(OCL_KERNEL_SRC)),.cl) - OCL_KERNEL_TYPE=ocl -else - OCL_KERNEL_TYPE=c -endif - -# OpenCL runtime Libraries -OPENCL_INC = $(XILINX_SDX)/runtime/include/1_2 -OPENCL_LIB = $(XILINX_SDX)/runtime/lib/x86_64 - -# opencl harness files -OCL_HARNESS_DIR = . -OCL_HARNESS_SRC_CPP = $(OCL_HARNESS_DIR)/CLKernel.cpp $(OCL_HARNESS_DIR)/CLMemObj.cpp $(OCL_HARNESS_DIR)/CLWorld.cpp -OCL_HARNESS_SRC_H = $(OCL_HARNESS_DIR)/CLKernel.h $(OCL_HARNESS_DIR)/CLMemObj.h $(OCL_HARNESS_DIR)/CLWorld.h - -# host compilation flags -OCL_HOST_FLAGS = -DOCL -g -lxilinxopencl -I$(OPENCL_INC) $(HOST_INC) -L$(OPENCL_LIB) $(HOST_LIB) -I$(OCL_HARNESS_DIR) -I$(APPLICATION_DIR) - -# xclbin compilation flags -XCLBIN_FLAGS = -s -t $(OCL_TARGET) -g - -# change OCL_HOST_FLAG -ifdef K_CONST - OCL_HOST_FLAGS += -DK_CONST=$(K_CONST) -endif -ifdef NUM_ITER - OCL_HOST_FLAGS += -DNUM_ITER=$(NUM_ITER) -endif -ifdef FIXED_FLAG - OCL_HOST_FLAGS += -DFIXED_TYPE -endif - - -ifneq ($(KERNEL_TYPE),ocl) - XCLBIN_FLAGS += --kernel $(KERNEL_NAME) -endif - -ifeq ($(USR_PLATFORM),n) - XCLBIN_FLAGS += --xdevice $(OCL_DEVICE) -else - XCLBIN_FLAGS += --platform $(OCL_PLATFORM) -endif - - -# change XCLBIN_FLAGS -ifdef K_CONST - XCLBIN_FLAGS += -DK_CONST=$(K_CONST) -endif -ifdef NUM_ITER - XCLBIN_FLAGS += -DNUM_ITER=$(NUM_ITER) -endif -ifdef FIXED_FLAG - XCLBIN_FLAGS += -DFIXED_TYPE -endif - - -XCLBIN_FLAGS += $(OCL_KERNEL_ARGS) - - -# host exe -OCL_HOST_EXE = $(KERNEL_NAME)_host.exe - -# Kernel XCLBIN file -XCLBIN = $(KERNEL_NAME).$(OCL_TARGET).xclbin -XO = $(KERNEL_NAME).$(OCL_TARGET).xo - -# =============================================== SDSoC Platform and Target Settings ============================================== # - -# platform -SDSOC_PLATFORM = zc706 - -# executable -SDSOC_EXE = $(KERNEL_NAME).elf - -# sds++ flags -SDSFLAGS = -sds-pf $(SDSOC_PLATFORM) -sds-hw $(KERNEL_NAME) $(SDSOC_KERNEL_SRC) -sds-end -clkid 3 \ - -poll-mode 1 -verbose -SDSCFLAGS += -DSDSOC -Wall -O3 -c -SDSCFLAGS += -MMD -MP -MF"$(@:%.o=%.d)" -SDSLFLAGS = -O3 - -# objects -ALL_SDSOC_SRC = $(HOST_SRC_CPP) $(SDSOC_KERNEL_SRC) -OBJECTS := $(ALL_SDSOC_SRC:.cpp=.o) -DEPS := $(OBJECTS:.o=.d) - -# =============================================== Pure Software Compilation Settings ============================================== # - -# compiler flags -SW_FLAGS = -DSW -O3 - -# sw executable -SW_EXE = $(KERNEL_NAME)_sw.exe - -# ========================================================= Rules ================================================================= # - -# we will have 4 top-level rules: ocl, sdsoc, sw and clean -# default to sw - -.PHONY: all ocl sdsoc sw clean - -all: sw - -# ocl rules -ocl: $(OCL_HOST_EXE) $(XCLBIN) - -# ocl secondary rule: host executable -$(OCL_HOST_EXE): $(HOST_SRC_CPP) $(HOST_SRC_H) $(OCL_HARNESS_SRC_CPP) $(OCL_HARNESS_SRC_H) $(DATA) - $(OCL_CXX) $(OCL_HOST_FLAGS) -o $@ $(HOST_SRC_CPP) $(OCL_HARNESS_SRC_CPP) - -# ocl secondary rule: xclbin -$(XCLBIN): $(XO) - $(XOCC) -l $(XCLBIN_FLAGS) -o $@ $(XO) - -# ocl secondary rule: xo -$(XO): $(OCL_KERNEL_SRC) $(OCL_KERNEL_H) - $(XOCC) -c $(XCLBIN_FLAGS) -o $@ $(OCL_KERNEL_SRC) - -# sdsoc rules -sdsoc: $(SDSOC_EXE) - -$(SDSOC_EXE): $(OBJECTS) - $(SDSXX) $(SDSFLAGS) $(SDSLFLAGS) ${OBJECTS} -o $@ - --include $(DEPS) - -%.o: %.cpp - $(SDSXX) $(SDSFLAGS) $(SDSCFLAGS) $< -o $@ - - -# software rules -sw: $(HOST_SRC_CPP) $(HOST_SRC_H) $(SW_KENREL_SRC) $(SW_KERNEL_H) $(DATA) - $(SW_CXX) $(SW_FLAGS) -o $(SW_EXE) $(HOST_SRC_CPP) $(SW_KERNEL_SRC) - -# cleanup -clean: - @echo "Cleaning old files" - rm -rf *.exe - rm -rf *.elf - rm -rf *.xclbin - rm -rf *.bit - rm -rf *.rpt - rm -rf system_estimate.xtxt - rm -rf _xocc* - rm -rf _sds - rm -rf sd_card - rm -rf .Xil - rm -rf ./src/host/*.d - rm -rf ./src/sdsoc/*.o - rm -rf ./src/sdsoc/*.d - rm -rf ./src/host/*.o - rm -rf *.dat - rm -rf *.html - rm -rf *.csv - rm -rf *.json diff --git a/tvm/src/template/sdaccel/run.tcl b/tvm/src/template/sdaccel/run.tcl deleted file mode 100644 index 0d6dca4b5..000000000 --- a/tvm/src/template/sdaccel/run.tcl +++ /dev/null @@ -1,14 +0,0 @@ -set hls_prj digitrec.prj -open_project ${hls_prj} -reset -set_top default_function -add_files -tb main.cpp -add_files -tb data - -open_solution "solution1" -set_part {xc7z020clg484-1} -create_clock -period 10 - -csim_design -O -csynth_design -#cosim_design -exit diff --git a/tvm/src/template/sdaccel/run_hw.sh b/tvm/src/template/sdaccel/run_hw.sh deleted file mode 100755 index f65d28e6d..000000000 --- a/tvm/src/template/sdaccel/run_hw.sh +++ /dev/null @@ -1,28 +0,0 @@ -#===============================================================# -# # -# run_hw.sh # -# # -# A bash script to synthesize and generate bitstream # -# # -# # -#===============================================================# - - -#!/bin/bash -make clean - -# the k value of KNN, default is 3 -k_value=3 -# the directory of this lab -app_dir=`pwd` - -### COMPILATION -# create some blank-line space for easy readability -echo ""; echo ""; echo "" ; echo "" -echo "####################################################" -echo " Synthesize and Generate Bitstream with K_CONST=$k_value" -echo "####################################################" -make ocl OCL_TARGET=hw OCL_PLATFORM=$AWS_PLATFORM APPLICATION_DIR=$app_dir K_CONST=$k_value -#export XCL_EMULATION_MODE=hw_emu -#./DigitRec_host.exe -f DigitRec.hw_emu.xclbin - diff --git a/tvm/src/template/sdaccel/run_sw.sh b/tvm/src/template/sdaccel/run_sw.sh deleted file mode 100755 index 80ba00495..000000000 --- a/tvm/src/template/sdaccel/run_sw.sh +++ /dev/null @@ -1,51 +0,0 @@ -#===============================================================# -# # -# run1.sh # -# # -# A bash script to run the software emulation # -# # -# # -#===============================================================# - - -#!/bin/bash -make clean - -# check env variable setup -if [ -z "$AWS_PLATFORM" ]; then - echo "AWS_PLATFORM not set up; use default" - export AWS_PLATFORM=xilinx:adm-pcie-7v3:1ddr:3.0 -fi - -# set up emulation configuration -echo "#################################################" -echo " Setting emulation configuration..." -echo "#################################################" -export LC_CTYPE=en_US.UTF-8 -export LC_ALL=en_US.UTF-8 -export XCL_EMULATION_MODE=true -emconfigutil --platform=$AWS_PLATFORM - -# the k value of KNN, default is 3 -k_value=3 -# the directory of this lab -app_dir=`pwd` - -### COMPILATION -# create some blank-line space for easy readability -echo ""; echo ""; echo "" ; echo "" -echo "####################################################" -echo " Compiling project with K_CONST=$k_value" -echo "####################################################" -make ocl OCL_TARGET=sw_emu OCL_PLATFORM=$AWS_PLATFORM APPLICATION_DIR=$app_dir K_CONST=$k_value - - -### EXECUTION -echo ""; echo ""; echo "" ; echo "" -echo "####################################################" -echo " Executing DigitRec with K_CONST=$k_value" -echo "####################################################" -export XCL_EMULATION_MODE=sw_emu -#export XCL_EMULATION_MODE=hw_emu -./App_host.exe -f App.sw_emu.xclbin - diff --git a/tvm/src/template/sdaccel/utils.cpp b/tvm/src/template/sdaccel/utils.cpp deleted file mode 100644 index 0e6dd632e..000000000 --- a/tvm/src/template/sdaccel/utils.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/*===============================================================*/ -/* */ -/* utils.cpp */ -/* */ -/* Utility functions */ -/* */ -/*===============================================================*/ - -#include -#include -#include -#include - -#include "utils.h" - -void print_usage(char* filename) -{ - printf("usage: %s \n", filename); - printf(" -f [kernel file]\n"); -} - -void parse_sdaccel_command_line_args( - int argc, - char** argv, - std::string& kernelFile) -{ - - int c = 0; - - while ((c = getopt(argc, argv, "f:")) != -1) - { - switch (c) - { - case 'f': - kernelFile = optarg; - break; - default: - { - print_usage(argv[0]); - exit(-1); - } - } // matching on arguments - } // while args present -} - - diff --git a/tvm/src/template/sdaccel/utils.h b/tvm/src/template/sdaccel/utils.h deleted file mode 100644 index a3ab77437..000000000 --- a/tvm/src/template/sdaccel/utils.h +++ /dev/null @@ -1,19 +0,0 @@ -/*===============================================================*/ -/* */ -/* utils.h */ -/* */ -/* Utility functions */ -/* */ -/*===============================================================*/ - -#include -//target device -const std::string TARGET_DEVICE = "xilinx_aws-vu9p-f1-04261818_dynamic_5_0"; - -void print_usage(char* filename); - -void parse_sdaccel_command_line_args( - int argc, - char** argv, - std::string& kernelFile); - diff --git a/tvm/src/template/vivado/Makefile b/tvm/src/template/vivado/Makefile deleted file mode 100644 index 1d84baead..000000000 --- a/tvm/src/template/vivado/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -#========================================================================== -# Makefile -#========================================================================== -# @brief: A makefile the compiles and synthesizes the program -# -# @desc: 1. "make" runs csim by default -# 2. "make csim" compiles & executes the fixed-point implementation -# 3. "make clean" cleans up the directory - - -# Extract Vivado HLS include path -VHLS_PATH := $(dir $(shell which vivado_hls))/.. -VHLS_INC ?= ${VHLS_PATH}/include - -CFLAGS = -g -I${VHLS_INC} - -all: csim - -csim: host.cpp - @echo "Compiling & simulating on amdpool ..." - g++ ${CFLAGS} $^ -o out -lrt - ./out - -vivado: - @echo "Run Vivado csim and HLS" - vivado_hls -f run.tcl - -clean: - rm -rf out *.txt *.dat *.prj *.log - rm -rf zedboard_project* xillydemo.bit - diff --git a/tvm/src/template/vivado/run.tcl b/tvm/src/template/vivado/run.tcl deleted file mode 100644 index d80b865df..000000000 --- a/tvm/src/template/vivado/run.tcl +++ /dev/null @@ -1,36 +0,0 @@ -#============================================================================= -# run_base.tcl -#============================================================================= -# @brief: A Tcl script for synthesizing the design. - -# Project name -set hls_prj out.prj - -# Open/reset the project -open_project ${hls_prj} -reset - -# Top function of the design is "top" -set_top top - -# Add design and testbench files -add_files kernel.cpp -add_files -tb host.cpp - -open_solution "solution1" -# Use Zynq device -set_part {xc7z020clg484-1} - -# Target clock period is 10ns -create_clock -period 10 - -# Directives - -############################################ - -# Simulate the C++ design -csim_design -O -# Synthesize the design -csynth_design -# Co-simulate the design -#cosim_design -exit diff --git a/tvm/src/template/vivado/timer.h b/tvm/src/template/vivado/timer.h deleted file mode 100644 index 77c461b00..000000000 --- a/tvm/src/template/vivado/timer.h +++ /dev/null @@ -1,94 +0,0 @@ -//--------------------------------------------------------- -// Timer.h -//--------------------------------------------------------- -#ifndef __TIMER_H__ -#define __TIMER_H__ -#include -#include -#include -#include - -#define TIMER_ON - -//--------------------------------------------------------- -// Timer is an object which helps profile programs using -// the clock() function. -// - By default, a timer is stopped when you instantiate it -// and must be started manually -// - Passing True to the constructor starts the timer when -// it is constructed -// - When the timer is destructed it prints stats to stdout -//--------------------------------------------------------- -class Timer { - - #ifdef TIMER_ON - - char binName[50]; - unsigned nCalls; - timeval ts_start; - float totalTime; - - public: - //------------------------------------------------------------------ - // constructor - //------------------------------------------------------------------ - Timer (const char* Name="", bool On=false) { - if (On) { - // record the start time - gettimeofday(&ts_start, NULL); - nCalls = 1; - } - else { - nCalls = 0; - } - totalTime = 0; - strcpy(binName, Name); - } - - //------------------------------------------------------------------ - // destructor - //------------------------------------------------------------------ - ~Timer () { - // on being destroyed, print the average and total time - if (nCalls > 0) { - printf ("%-20s: ", binName); - printf ("%6d calls; ", nCalls); - printf ("%7.3f msecs total time\n", 1000*totalTime); - //printf ("%7.4f msecs average time;\n", 1000*totalTime/nCalls); - } - } - - //------------------------------------------------------------------ - // start timer - //------------------------------------------------------------------ - void start() { - // record start time - gettimeofday(&ts_start, NULL); - nCalls++; - } - - //------------------------------------------------------------------ - // stop timer - //------------------------------------------------------------------ - void stop() { - // get current time, add elapsed time to totalTime - timeval ts_curr; - gettimeofday(&ts_curr, NULL); - totalTime += float(ts_curr.tv_sec - ts_start.tv_sec) + - float(ts_curr.tv_usec)*1e-6 - float(ts_start.tv_usec)*1e-6; - } - - #else - - //-------------------------------------------------------------------- - // all methods do nothing if TIMER_ON is not set - //-------------------------------------------------------------------- - public: - Timer (const char* Name, bool On=true) {} - void start() {} - void stop() {} - - #endif -}; - -#endif