From 2b3b2b06b8246299ed05d8198748256e2b74b6ee Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 25 Jul 2019 17:50:59 -0400 Subject: [PATCH 001/103] add sdaccel, aocl for heterocl --- python/heterocl/tvm/target.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 12235d95d..4d146d420 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -50,7 +50,7 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'sdaccel', 'aocl'] def _merge_opts(opts, new_opts): """Helper function to merge options""" From 86525fc8293dcf14ad6a46ecf9b0e49063a02bec Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 25 Jul 2019 17:55:24 -0400 Subject: [PATCH 002/103] fpga --- tvm/src/codegen/opencl/aocl/codegen_aocl.cc | 0 tvm/src/codegen/opencl/codegen_opencl.cc | 0 tvm/src/codegen/opencl/sdaccel/codegen_sdaccel.cc | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tvm/src/codegen/opencl/aocl/codegen_aocl.cc create mode 100644 tvm/src/codegen/opencl/codegen_opencl.cc create mode 100644 tvm/src/codegen/opencl/sdaccel/codegen_sdaccel.cc diff --git a/tvm/src/codegen/opencl/aocl/codegen_aocl.cc b/tvm/src/codegen/opencl/aocl/codegen_aocl.cc new file mode 100644 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc new file mode 100644 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/sdaccel/codegen_sdaccel.cc b/tvm/src/codegen/opencl/sdaccel/codegen_sdaccel.cc new file mode 100644 index 000000000..e69de29bb From 1d8115f2a83461da8cff8157129790d3ba0f8787 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 25 Jul 2019 18:04:39 -0400 Subject: [PATCH 003/103] Create codeanalys_openclc.cc --- tvm/src/codegen/opencl/codeanalys_openclc.cc | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.cc diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc new file mode 100644 index 000000000..e69de29bb From 47026fe9591eb902854dfbe5a7b3552b010b028f Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 25 Jul 2019 18:39:19 -0400 Subject: [PATCH 004/103] Update target.py --- python/heterocl/tvm/target.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 4d146d420..3df1564db 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -50,7 +50,7 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'sdaccel', 'aocl'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'opencl', 'sdaccel', 'aocl'] def _merge_opts(opts, new_opts): """Helper function to merge options""" From 5fba7cc970caf2eadcc96c304bc145cb0d27b590 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 25 Jul 2019 22:40:23 -0400 Subject: [PATCH 005/103] run --- .vscode/c_cpp_properties.json | 18 + .../codegen => }/opencl/aocl/codegen_aocl.cc | 0 tvm/opencl/codeanalys_openclc.cc | 921 ++++++++++++++++++ tvm/opencl/codegen_opencl.cc | 368 +++++++ .../opencl/sdaccel/codegen_sdaccel.cc | 0 tvm/src/codegen/opencl/build_opencl.cc | 102 ++ tvm/src/codegen/opencl/codegen_aocl.cc | 246 +++++ tvm/src/codegen/opencl/codegen_sdaccel.cc | 327 +++++++ .../{codegen_opencl.cc => opencl_module.cc} | 0 9 files changed, 1982 insertions(+) create mode 100644 .vscode/c_cpp_properties.json rename tvm/{src/codegen => }/opencl/aocl/codegen_aocl.cc (100%) create mode 100644 tvm/opencl/codeanalys_openclc.cc create mode 100644 tvm/opencl/codegen_opencl.cc rename tvm/{src/codegen => }/opencl/sdaccel/codegen_sdaccel.cc (100%) create mode 100644 tvm/src/codegen/opencl/build_opencl.cc create mode 100644 tvm/src/codegen/opencl/codegen_aocl.cc create mode 100644 tvm/src/codegen/opencl/codegen_sdaccel.cc rename tvm/src/codegen/opencl/{codegen_opencl.cc => opencl_module.cc} (100%) diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 000000000..8764f7120 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Mac", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [], + "macFrameworkPath": [], + "compilerPath": "/usr/local/bin/gcc-8", + "cStandard": "c11", + "cppStandard": "c++17", + "intelliSenseMode": "clang-x64", + "compileCommands": "${workspaceFolder}/build/pkgs/llvm/build/compile_commands.json" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/tvm/src/codegen/opencl/aocl/codegen_aocl.cc b/tvm/opencl/aocl/codegen_aocl.cc similarity index 100% rename from tvm/src/codegen/opencl/aocl/codegen_aocl.cc rename to tvm/opencl/aocl/codegen_aocl.cc diff --git a/tvm/opencl/codeanalys_openclc.cc b/tvm/opencl/codeanalys_openclc.cc new file mode 100644 index 000000000..66869a817 --- /dev/null +++ b/tvm/opencl/codeanalys_openclc.cc @@ -0,0 +1,921 @@ +#include +#include +#include +#include "./codeanalys_openclc.h" +#include "../codegen_common.h" +#include "../../arithmetic/compute_expr.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +void CodeAnalysOpenCLC::Init() { + ; +} + +void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { + alloc_storage_scope_.clear(); + handle_data_type_.clear(); + map_arg_type_.clear(); + CodeGenSourceBase::ClearFuncState(); +} +void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { + // Clear previous generated state. + this->InitFuncState(f); + + // Add to alloc buffer type. + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Record the arguments for analyzing the type + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + this->map_arg_type_[vid]; + } + int func_scope = this->BeginScope(); + VisitStmt(f->body); + this->EndScope(func_scope); +} + +str2tupleMap CodeAnalysOpenCLC::Finish() { + return this->map_arg_type_; +} + +void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) + VisitExpr(n, os); +} + +void CodeAnalysOpenCLC::PrintSSAAssign( + const std::string& target, const std::string& src, Type t) { + PrintType(t, stream); + stream << ' ' << target << " = "; + if (src.length() > 3 && + src[0] == '(' && src[src.length() - 1] == ')') { + stream << src.substr(1, src.length() - 2); + } else { + stream << src; + } + stream << ";\n"; +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetBufferRef( + Type t, const Variable* buffer, Expr index) { + std::ostringstream os; + std::string vid = GetVarID(buffer); + std::string scope; + if (alloc_storage_scope_.count(buffer)) { + scope = alloc_storage_scope_.at(buffer); + } + bool is_vol = volatile_buf_.count(buffer) != 0; + if (t.lanes() == 1) { + if (!HandleTypeMatch(buffer, t) || is_vol) { + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)" << vid << ')'; + } else { + os << vid; + } + os << '['; + PrintExpr(index, os); + os << ']'; + } else { + // Buffer declared as vector type. + // optimize for case where it is in register, + if (HandleTypeMatch(buffer, t) && !is_vol) { + // optimize for constant access + int offset; + if (arith::GetConstInt(index, &offset)) { + CHECK_EQ(offset % t.lanes(), 0) + << "Find unaligned vector load to a vector type"; + os << vid << '[' << (offset / t.lanes()) << ']'; + return os.str(); + } + } + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)("; + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << vid << " + "; + PrintExpr(index, os); + os << "))[0]"; + } + return os.str(); +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind) { + if (kind < intrinsic::kArrKindBound_) { + std::ostringstream os; + os << "(((TVMArray*)"; + this->PrintExpr(buffer, os); + os << ")"; + if (kind == intrinsic::kArrAddr) { + os << " + "; + this->PrintExpr(index, os); + os << ")"; + return os.str(); + } + os << '['; + this->PrintExpr(index, os); + os << "]."; + // other case: get fields. + switch (kind) { + case intrinsic::kArrData: os << "data"; break; + case intrinsic::kArrShape: os << "shape"; break; + case intrinsic::kArrStrides: os << "strides"; break; + case intrinsic::kArrNDim: os << "ndim"; break; + case intrinsic::kArrTypeCode: os << "dtype.code"; break; + case intrinsic::kArrTypeBits: os << "dtype.bits"; break; + case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; + case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; + case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; + case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; + default: os << "unknown_field_code_" << kind; + } + os << ')'; + return os.str(); + } else { + CHECK_LT(kind, intrinsic::kTVMValueKindBound_); + std::ostringstream os; + os << "(((TVMValue*)"; + this->PrintExpr(buffer, os); + os << ")[" << index << "]."; + if (t.is_handle()) { + os << "v_handle"; + } else if (t.is_float()) { + os << "v_float64"; + } else if (t.is_int()) { + os << "v_int64"; + } else { + os << t; + } + os << ")"; + return os.str(); + } +} + + +bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) return false; + return it->second == t; +} + +void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) { + handle_data_type_[buf_var] = t; + } else { + CHECK(it->second == t) + << "conflicting buf var type"; + } +} + +void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, + Type t, int i, + std::ostream& os) { // NOLINT(*) + os << vec << ".s" << std::hex << i << std::dec; +} + +void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, + Type t, int i, + const std::string& value) { + this->PrintIndent(); + stream << vec << ".s" << std::hex << i + << " = " << value << ";\n" << std::dec; +} + +std::string CodeAnalysOpenCLC::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + return GetBufferRef(t, buffer, base); +} + +void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + std::string ref = GetBufferRef(t, buffer, base); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; +} + +std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + return os.str(); +} + +void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { + LOG(FATAL) << "not implemented"; +} + +void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) +} + +void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) + CHECK_EQ(scope, "global"); +} + +std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) + std::ostringstream os; + PrintType(t, os); + return os.str(); +} + +void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + if (t.is_float()) { + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "int" << t.bits() << "_t"; return; + } + } + } + os << t; +} + + +inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == Int(32)) { + std::ostringstream temp; + temp << op->value; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == UInt(32)) { + std::ostringstream temp; + temp << op->value << "U"; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + switch (op->type.bits()) { + case 64: case 32: { + std::ostringstream temp; + temp << std::scientific << op->value; + if (op->type.bits() == 32) temp << 'f'; + p->MarkConst(temp.str()); + os << temp.str(); + break; + } + case 16: { + os << '('; + p->PrintType(op->type, os); + os << ')' << std::scientific <value << 'f'; + break; + } + default: os << op << "\n"; + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) + os << "\"" << op->value << "\""; +} + +template +inline void PrintBinaryExpr(const T* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + if (isalpha(opstr[0])) { + os << opstr << '('; + p->PrintExpr(op->a, os); + os << ", "; + p->PrintExpr(op->b, os); + os << ')'; + } else { + os << '('; + p->PrintExpr(op->a, os); + os << ' ' << opstr << ' '; + p->PrintExpr(op->b, os); + os << ')'; + } + } else { + p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); + } +} + +inline void PrintBinaryIntrinsitc(const Call* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + CHECK_EQ(op->args.size(), 2U); + os << '('; + p->PrintExpr(op->args[0], os); + os << opstr; + p->PrintExpr(op->args[1], os); + os << ')'; + } else { + p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); + } +} +void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) + std::stringstream value; + this->PrintExpr(op->value, value); + os << CastFromTo(value.str(), op->value.type(), op->type); +} +void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) + os << GetVarID(op); +} +void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "+", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "-", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "*", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "/", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "%", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "min", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "max", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "==", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "!=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "&&", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "||", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) + os << '!'; + PrintExpr(op->a, os); +} + +void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) + if (op->call_type == Call::Extern || + op->call_type == Call::PureExtern) { + os << op->name << "("; + for (size_t i = 0; i < op->args.size(); i++) { + this->PrintExpr(op->args[i], os); + if (i < op->args.size() - 1) { + os << ", "; + } + } + os << ")"; + } else if (op->is_intrinsic(Call::bitwise_and)) { + PrintBinaryIntrinsitc(op, " & ", os, this); + } else if (op->is_intrinsic(Call::bitwise_xor)) { + PrintBinaryIntrinsitc(op, " ^ ", os, this); + } else if (op->is_intrinsic(Call::bitwise_or)) { + PrintBinaryIntrinsitc(op, " | ", os, this); + } else if (op->is_intrinsic(Call::bitwise_not)) { + CHECK_EQ(op->args.size(), 1U); + os << "(~"; + this->PrintExpr(op->args[0], os); + os << ')'; + } else if (op->is_intrinsic(Call::shift_left)) { + PrintBinaryIntrinsitc(op, " << ", os, this); + } else if (op->is_intrinsic(Call::shift_right)) { + PrintBinaryIntrinsitc(op, " >> ", os, this); + } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintExpr(op->args[0], os); + os << " ? "; + PrintExpr(op->args[1], os); + os << " : "; + PrintExpr(op->args[2], os); + os << ")"; + } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { + const Load *l = op->args[0].as(); + CHECK(op->args.size() == 1 && l); + os << "(("; + this->PrintType(l->type.element_of(), os); + os << " *)" << this->GetVarID(l->buffer_var.get()) + << " + "; + this->PrintExpr(l->index, os); + os << ')'; + } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { + CHECK_EQ(op->args.size(), 3U); + os << GetStructRef( + op->type, op->args[0], op->args[1], + op->args[2].as()->value); + } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { + CHECK_EQ(op->args.size(), 1U); + os << "("; + this->PrintExpr(op->args[0], os); + os << " == NULL)"; + } else + os << op->name << "()"; +} + +void CodeAnalysOpenCLC::PrintVecBinaryOp( + const std::string& op, Type t, + Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) + if (isalpha(op[0])) { + os << op << "("; + this->PrintExpr(lhs, os); + os << ", "; + this->PrintExpr(rhs, os); + os << ")"; + } else { + os <<"("; + this->PrintExpr(lhs, os); + os << ' ' << op << ' '; + this->PrintExpr(rhs, os); + os << ")"; + } +} + +inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { + const Ramp* r = index.as(); + if (!r) return false; + if (!is_one(r->stride)) return false; + CHECK_EQ(r->lanes, lanes); + *base = r->base; + return true; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) + int lanes = op->type.lanes(); + // delcare type. + if (op->type.lanes() == 1) { + std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); + os << ref; + } else { + CHECK(is_one(op->predicate)) + << "predicated load is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { + std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); + os << ref; + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // load seperately. + std::string svalue = GetUniqueName("_"); + this->PrintIndent(); + this->PrintType(op->type, stream); + stream << ' ' << svalue << ";\n"; + std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string vid = GetVarID(op->buffer_var.get()); + Type elem_type = op->type.element_of(); + for (int i = 0; i < lanes; ++i) { + std::ostringstream value_temp; + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + value_temp << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, value_temp); + value_temp << ' '; + } + } + PrintType(elem_type, value_temp); + value_temp << "*)" << vid << ')'; + } else { + value_temp << vid; + } + value_temp << '['; + PrintVecElemLoad(sindex, op->index.type(), i, value_temp); + value_temp << ']'; + PrintVecElemStore(svalue, op->type, i, value_temp.str()); + } + os << svalue; + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { + Type t = op->value.type(); + if (t.lanes() == 1) { + std::string value = this->PrintExpr(op->value); + std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; + } else { + CHECK(is_one(op->predicate)) + << "Predicated store is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, t.lanes(), &base)) { + std::string value = this->PrintExpr(op->value); + this->PrintVecStore(op->buffer_var.get(), t, base, value); + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // store elements seperately + std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); + std::string vid = GetVarID(op->buffer_var.get()); + for (int i = 0; i < t.lanes(); ++i) { + this->PrintIndent(); + Type elem_type = t.element_of(); + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + stream << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, stream); + stream << ' '; + } + } + PrintType(elem_type, stream); + stream << "*)" << vid << ')'; + } else { + stream << vid; + } + stream << '['; + PrintVecElemLoad(index, op->index.type(), i, stream); + stream << "] = "; + PrintVecElemLoad(value, op->value.type(), i, stream); + stream << ";\n"; + } + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) + std::string value = PrintExpr(op->value); + CHECK(!var_idmap_.count(op->var.get())); + var_idmap_[op->var.get()] = value; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) + // constraint of current logic + CHECK_EQ(op->base.type(), Int(32)); + os << "((int" << op->lanes << ")("; + for (int i = 0; i < op->lanes; i++) { + os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; + if (i != op->lanes - 1) + os << ", "; + } + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Broadcast: not supported "; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->condition, os); + os << " ? "; + PrintExpr(op->true_value, os); + os << " : "; + PrintExpr(op->false_value, os); + os << ")"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " & (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. a' = SHR a for Idx_R bits + // 2. mask: 1.(length).1 + // (1 << (L - R + 1)) - 1 + // 3. a' & mask + + os << "(("; + PrintExpr(op->a, os); + os << " >> "; + PrintExpr(op->index_right, os); + os << ") & ((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " | (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. mask: 0.(Idx L).01..10.(Idx R).0 + // ((1 << (L - R + 1)) - 1) << R + // 2. a & mask + + os << "("; + PrintExpr(op->a, os); + os << " & (((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1) << "; + PrintExpr(op->index_right, os); + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Quantize is not yet support"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "KernelExpr is not yet support"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { + // TODO comaniac + //std::vector vec_var = GetNodesByType(op->value); + + std::string arg_vid = "unknown"; + std::string str = PrintExpr(op->value); + if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { + size_t pos_arg = str.find("arg"); + size_t pos_data = str.find("data"); + arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); + } + else if (std::regex_match(str, std::regex("arg(.+)"))) + arg_vid = str; + + std::string vid = AllocVarID(op->var.get()); + if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { + if ("unknown" != arg_vid) + LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; + } else { + Type type = op->var.type(); + if (op->var.type() == Handle() && + handle_data_type_.count(op->var.get())) + type = handle_data_type_.at(op->var.get()); + this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); + } + VisitStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { + CHECK(!is_zero(op->condition)); + std::string vid = AllocVarID(op->buffer_var.get()); + if (op->new_expr.defined()) { + // Prefer global static allocation for the program + CHECK_EQ(op->free_function, "nop"); + std::string new_data = PrintExpr(op->new_expr); + this->PrintIndent(); + PrintType(op->type, stream); + stream << "* "<< vid << '=' << new_data << ";\n"; + } else { + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); + stream << ' '; + PrintType(op->type, stream); + stream << ' '<< vid << '[' + << constant_size << "];\n"; + } + RegisterHandleType(op->buffer_var.get(), op->type); + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::thread_extent) { + IterVar iv(op->node.node_); + if (iv->thread_tag.length() != 0) { + if (!var_idmap_.count(iv->var.get())) { + BindThreadIndex(iv); + } + } + } else if (op->attr_key == ir::attr::storage_scope) { + const Variable* v = op->node.as(); + CHECK(v); + alloc_storage_scope_[v] = op->value.as()->value; + } else if (op->attr_key == ir::attr::volatile_scope) { + const Variable* v = op->node.as(); + CHECK(v); + volatile_buf_.insert(v); + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (op->message.as()) { + // GLOG style check + stream << "CHECK(" << cond << ") << \"" + << op->message.as()->value << "\";\n"; + } else { + stream << "assert(" << cond << ");\n"; + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const For* op) { + std::string extent = PrintExpr(op->extent); + PrintIndent(); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { + PrintStmt(op->first); + if (op->rest.defined()) PrintStmt(op->rest); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { + if (is_const(op->value)) return; + const Call* call = op->value.as(); + if (call) { + if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { + this->PrintStorageSync(call); return; + } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { + CHECK_EQ(call->args.size(), 4); + std::string value = PrintExpr(call->args[3]); + std::string ref = GetStructRef( + call->args[3].type(), + call->args[0], + call->args[1], + call->args[2].as()->value); + this->PrintIndent(); + this->stream << ref << " = " << value << ";\n"; + return; + } + } + std::string vid = this->PrintExpr(op->value); + this->PrintIndent(); + this->stream << "(void)" << vid << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { + PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { + LOG(FATAL) << "KernelDef is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { + LOG(FATAL) << "KernelStmt is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { + this->stream << "return "; + PrintExpr(op->value); + this->stream << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { + // TODO: Check if the break statement is used correctly + this->stream << "break;\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const While *op) { + std::string condition = PrintExpr(op->condition); + PrintIndent(); + stream << "while (" << condition << ") {\n"; + int while_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(while_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Reuse *op) { + LOG(FATAL) << "KernelDef is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} + +void CodeAnalysOpenCLC::VisitStmt_(const Stencil *op) { + PrintStmt(op->body); +} + +} // namespace codegen +} // namespace TVM diff --git a/tvm/opencl/codegen_opencl.cc b/tvm/opencl/codegen_opencl.cc new file mode 100644 index 000000000..57da77896 --- /dev/null +++ b/tvm/opencl/codegen_opencl.cc @@ -0,0 +1,368 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_opencl.h" +# include "./codeanalys_openclc.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +CodeGenOpenCL::CodeGenOpenCL() { + restrict_keyword_ = "restrict"; +} + +void CodeGenOpenCL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } +} + + +// void CodeGenOpenCL::AddFunction(LoweredFunc f) { +// this->stream << "__kernel "; +// CodeGenC::AddFunction(f); +// } +// void CodeGenOpenCL::AddFunction(LoweredFunc f, +// str2tupleMap map_arg_type) { +// // Write header files +// // TODO: Insert header files here +// // Clear previous generated state +// this->InitFuncState(f); +// // Register alloc buffer type +// for (const auto & kv : f->handle_data_type) { +// RegisterHandleType(kv.first.get(), kv.second.type()); +// } +// // Write entry function name +// this->stream << "void " << f->name << "("; +// // Write arguments +// for (size_t i = 0; i < f->args.size(); ++i) { +// Var v = f->args[i]; +// std::string vid = AllocVarID(v.get()); +// if (i != 0) this->stream << ", "; +// if (map_arg_type.find(vid) == map_arg_type.end()) { +// LOG(WARNING) << vid << " type not found\n"; +// PrintType(v.type(), this->stream); +// this->stream << ' ' << vid; +// } +// else { +// auto arg = map_arg_type[vid]; +// PrintType(std::get<1>(arg), this->stream); +// this->stream << ' ' << std::get<0>(arg); +// const BufferNode* buf = f->api_args[i].as(); +// if (v.type().is_handle() && buf) { +// var_shape_map_[buf->data.get()] = buf->shape; +// for (size_t i = 0; i < buf->shape.size(); i++) { +// this->stream << '['; +// this->PrintExpr(buf->shape[i], this->stream); +// this->stream << ']'; +// } +// } +// // this->stream << "*"; TODO: create an option for this +// } +// } +// stream << ") {\n"; +// int func_scope = this->BeginScope(); +// range_ = CollectIterRange(f->body); +// this->PrintStmt(f->body); +// this->EndScope(func_scope); +// this->PrintIndent(); +// this->stream << "}\n\n"; +// } + + +// void CodeGenOpenCL::AddFunction(LoweredFunc f, +// str2tupleMap map_arg_type) { +// // Clear previous generated state +// this->InitFuncState(f); + +// // // Skip the first underscore, so SSA variable starts from _1 +// // GetUniqueName("_"); + +// // // Register alloc buffer type +// // for (const auto & kv : f->handle_data_type) { +// // RegisterHandleType(kv.first.get(), kv.second.type()); +// // } + +// // // Write header files +// // this->stream << "#include \n"; +// // this->stream << "#include \n"; +// // this->stream << "#include \n"; + +// // // Write entry function name +// // this->stream << "#pragma ACCEL kernel\n"; +// this->stream << "void " << f->name << "("; + +// // Write arguments +// for (size_t i = 0; i < f->args.size(); ++i) { +// Var v = f->args[i]; +// std::string vid = AllocVarID(v.get()); +// if (i != 0) this->stream << ", "; +// if (map_arg_type.find(vid) == map_arg_type.end()) { +// LOG(WARNING) << vid << " type not found\n"; +// PrintType(v.type(), this->stream); +// this->stream << ' ' << vid; +// } +// else { +// auto arg = map_arg_type[vid]; +// PrintType(std::get<1>(arg), this->stream); +// if (v.type().is_handle()) +// this->stream << "*"; +// this->stream << ' ' << std::get<0>(arg); +// } +// } +// stream << ") {\n"; +// int func_scope = this->BeginScope(); +// this->PrintStmt(f->body); +// this->EndScope(func_scope); +// this->PrintIndent(); +// this->stream << "}\n\n"; +// CodeGenC::AddFunction(f); +// } + + +void CodeGenOpenCL::AddFunction(LoweredFunc f) { + this->stream << "__kernel "; + CodeGenC::AddFunction(f); +} + + + +std::string CodeGenOpenCL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + +void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + if ( t== Bool() ) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: + os << "float"; + break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; +} + +void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenOpenCL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenOpenCL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenOpenCL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + +void CodeGenOpenCL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "__global"; + } else if (scope == "shared") { + os << "__local"; + } +} + +std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintType(op->args[2].type(), os); + os << ")"; + } + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + + +void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) + if (std::isinf(op->value)) { + if ( op->value < 0) { + os << "-"; + } + os << "INFINITY"; + } else if (std::isnan(op->value)) { + os << "NAN"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) + os << "("; + PrintType(op->true_value.type(), os); + os << ")"; + CodeGenC::VisitExpr_(op, os); +} + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/sdaccel/codegen_sdaccel.cc b/tvm/opencl/sdaccel/codegen_sdaccel.cc similarity index 100% rename from tvm/src/codegen/opencl/sdaccel/codegen_sdaccel.cc rename to tvm/opencl/sdaccel/codegen_sdaccel.cc diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc new file mode 100644 index 000000000..4d7319269 --- /dev/null +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -0,0 +1,102 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#include +#include +#include +#include +#include +#include "../../runtime/meta_data.h" +# include +# include "./codegen_sdaccel.h" +# include "./codegen_aocl.h" +# include "../build_common.h" + + +namespace TVM { +namespace codegen { + +// #if OPENCL_SDACCEL_RUNTIME + +// #endif + +// #if OPENCL_AOCL_RUNTIME + +// #endif + + +// codegen for AOCL +std::string BuildAOCL(Array funcs) { + using TVM::runtime::Registry; + bool output_ssa = false; + CodeGenAOCL cg; + cg.Init(output_ssa); + for ( LoweredFunc f : funcs ) { + cg.AddFunction(f); + } + std::string code = cg.Finish(); + + if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { + code = (*f)(code).operator std::string(); + } + LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; + return code; +} + + +// codegen for SDACCEL +// std::string BuildSDACCEL(Array funcs) { +// using TVM::runtime::Registry; +// bool output_ssa = false; +// CodeGenSDACCEL cg; +// cg.Init(output_ssa); +// for (LoweredFunc f : funcs) { +// cg.AddFunction(f); +// } +// std::string code = cg.Finish(); + +// // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { +// // code = (*f)(code).operator std::string(); +// // } +// LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; +// return code; +// } + +// codegen for SDACCEL +std::string BuildSDACCEL(Array funcs) { + using TVM::runtime::Registry; + bool output_ssa = false; + CodeGenSDACCEL cg; + cg.Init(output_ssa); + for (LoweredFunc f : funcs) { + cg.AddFunction(f); + } + std::string code = cg.Finish(); + + // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { + // code = (*f)(code).operator std::string(); + // } + LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; + // std::unordered_map + std::unordered_map temp = ExtractFuncInfo(funcs); + + + return code; +} + + + + +TVM_REGISTER_API("codegen.build_sdaccel") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildSDACCEL(args[0]); + }); + +TVM_REGISTER_API("codegen.build_aocl") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildAOCL(args[0]); + }); +} // namespace codegen +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc new file mode 100644 index 000000000..6763aa7c3 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -0,0 +1,246 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ + +# include +# include +# include +# include +# include +# include "./codegen_aocl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +CodeGenAOCL::CodeGenAOCL() { + restrict_keyword_ = "restrict"; +} + +void CodeGenAOCL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } +} + +void CodeGenAOCL::AddFunction(LoweredFunc f) { + this->stream << "__kernel "; + CodeGenC::AddFunction(f); +} + +std::string CodeGenAOCL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenAOCL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + +void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + os << t.bits(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + if ( t== Bool() ) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: + os << "float"; + break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; +} + +void CodeGenAOCL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenAOCL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenAOCL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenAOCL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + +void CodeGenAOCL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "__global"; + } else if (scope == "shared") { + os << "__local"; + } +} + +std::string CodeGenAOCL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenAOCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenAOCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintType(op->args[2].type(), os); + os << ")"; + } + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenAOCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) + if (std::isinf(op->value)) { + if ( op->value < 0) { + os << "-"; + } + os << "INFINITY"; + } else if (std::isnan(op->value)) { + os << "NAN"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenAOCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) + os << "("; + PrintType(op->true_value.type(), os); + os << ")"; + CodeGenC::VisitExpr_(op, os); +} + +} // namespace codegen +} // namespace tvm \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc new file mode 100644 index 000000000..268699302 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -0,0 +1,327 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include "./codegen_sdaccel.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +CodeGenSDACCEL::CodeGenSDACCEL() { + restrict_keyword_ = "restrict"; +} + +void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } +} + + +// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { +// this->stream << "__kernel "; +// CodeGenC::AddFunction(f); +// } + +void CodeGenSDACCEL::AddFunction(LoweredFunc f) { + this->stream << "# pragma once\n"; + this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + this->stream << "# include \n"; + this->stream << "# include \n"; + this->stream << "# include \n"; + this->stream << "# include \n"; + this->stream << "# include \n"; + this->stream << "# include \n\n"; + this->stream << "__kernel "; + + CodeGenC::AddFunction(f); +} + + + +// void CodeGenSDACCEL::AddFunction(LoweredFunc f, +// str2tupleMap map_arg_type) { +// // Don't Write header flies +// // Clear previous generated state +// this->InitFuncState(f); +// // Register alloc buffer type +// for ( const auto & kv : f->handle_data_type ) { +// this->stream << kv.first.get(); +// this->stream << kv.second.type(); +// RegisterHandleType(kv.first.get(), kv.second.type()); +// } +// // Write entry function name +// this->stream << "__kernel "; +// // Write arguments +// for ( size_t i = 0; i < f->args.size(); i++ ) { +// Var v = f->args[i]; +// std::string vid = AllocVarID(v.get()); +// if ( i!= 0 ) { +// this->stream << ", "; +// } +// if ( map_arg_type.find(vid) == map_arg_type.end()) { +// LOG(WARNING) << vid << " type not found\n"; +// PrintType(v.type(), this->stream); +// this->stream << ' ' << vid; +// } +// else { +// auto arg = map_arg_type[vid]; +// PrintType(std::get<1>(arg), this->stream); +// if (v.type().is_handle()) { +// this->stream << "*"; +// } +// this->stream << ' ' << std::get<0>(arg); + +// } +// stream << ") {\n"; +// int func_scope = this->BeginScope(); +// this->PrintStmt(f->body); +// this->EndScope(func_scope); +// this->PrintIndent(); +// this->stream << "}\n\n"; +// } +// CodeGenSDACCEL::AddFunction(f, map_arg_type); +// } + +std::string CodeGenSDACCEL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenSDACCEL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + +void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + if ( t== Bool() ) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: + os << "float"; + break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; +} + +void CodeGenSDACCEL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenSDACCEL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenSDACCEL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenSDACCEL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + +void CodeGenSDACCEL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "__global"; + } else if (scope == "shared") { + os << "__local"; + } +} + +std::string CodeGenSDACCEL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenSDACCEL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenSDACCEL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintType(op->args[2].type(), os); + os << ")"; + } + CodeGenC::VisitExpr_(op, os); +} + + +void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + + + +void CodeGenSDACCEL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) + if (std::isinf(op->value)) { + if ( op->value < 0) { + os << "-"; + } + os << "INFINITY"; + } else if (std::isnan(op->value)) { + os << "NAN"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenSDACCEL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) + os << "("; + PrintType(op->true_value.type(), os); + os << ")"; + CodeGenC::VisitExpr_(op, os); +} + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/opencl_module.cc similarity index 100% rename from tvm/src/codegen/opencl/codegen_opencl.cc rename to tvm/src/codegen/opencl/opencl_module.cc From 31d00e3da8c1b8853c6500f2e33548a949a11d83 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 26 Jul 2019 00:22:56 -0400 Subject: [PATCH 006/103] can run successfully --- .vscode/settings.json | 60 +++++++++ tvm/src/codegen/opencl/build_opencl.cc | 124 ++++++++++++++----- tvm/src/codegen/opencl/codeanalys_openclc.cc | 0 tvm/src/codegen/opencl/codegen_sdaccel.cc | 51 +++++++- tvm/src/codegen/opencl/opencl_module.cc | 0 5 files changed, 202 insertions(+), 33 deletions(-) create mode 100644 .vscode/settings.json delete mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.cc delete mode 100644 tvm/src/codegen/opencl/opencl_module.cc diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..a767b8b52 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,60 @@ +{ + "files.associations": { + "array": "cpp", + "atomic": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "cfenv": "cpp", + "chrono": "cpp", + "cinttypes": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "condition_variable": "cpp", + "csetjmp": "cpp", + "csignal": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "forward_list": "cpp", + "list": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "fstream": "cpp", + "functional": "cpp", + "future": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "memory": "cpp", + "mutex": "cpp", + "new": "cpp", + "numeric": "cpp", + "optional": "cpp", + "ostream": "cpp", + "ratio": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "thread": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "typeindex": "cpp", + "typeinfo": "cpp", + "utility": "cpp" + } +} \ No newline at end of file diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 4d7319269..bbdfd6fd2 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "../../runtime/meta_data.h" # include @@ -26,44 +25,87 @@ namespace codegen { // #endif +// std::string BuildOpenCL(Array funcs) { +// using TVM::runtime::Registry; + +// CodeAnalysOpenCLC ca; +// CodeGenSDACCEL cg; +// for (LoweredFunc f : funcs) { +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); -// codegen for AOCL -std::string BuildAOCL(Array funcs) { - using TVM::runtime::Registry; - bool output_ssa = false; - CodeGenAOCL cg; - cg.Init(output_ssa); - for ( LoweredFunc f : funcs ) { - cg.AddFunction(f); - } - std::string code = cg.Finish(); - - if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { - code = (*f)(code).operator std::string(); - } - LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; - return code; -} +// cg.AddFunction(f, map_arg_type); +// } +// std::string code = cg.Finish(); +// if (const auto*f = Registry::Get("tvm_callback_opencl_postproc")) { +// code = (*f)(code).operator std::string(); +// } +// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; +// return code; +// } -// codegen for SDACCEL -// std::string BuildSDACCEL(Array funcs) { +// std::string BuildOpenCL(Array funcs) { // using TVM::runtime::Registry; // bool output_ssa = false; // CodeGenSDACCEL cg; // cg.Init(output_ssa); + // for (LoweredFunc f : funcs) { // cg.AddFunction(f); // } // std::string code = cg.Finish(); -// // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { -// // code = (*f)(code).operator std::string(); -// // } -// LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; +// if (const auto*f = Registry::Get("tvm_callback_opencl_postproc")) { +// code = (*f)(code).operator std::string(); +// } +// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; +// return code; +// } + + + + + +// codegen for AOCL +// std::string BuildAOCL(Array funcs) { +// using TVM::runtime::Registry; +// bool output_ssa = false; +// CodeGenAOCL cg; +// cg.Init(output_ssa); +// for ( LoweredFunc f : funcs ) { +// cg.AddFunction(f); +// } +// std::string code = cg.Finish(); + +// if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { +// code = (*f)(code).operator std::string(); +// } +// LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; +// return code; +// } + + +// codegen for AOCL +// std::string BuildAOCL(Array funcs) { +// using TVM::runtime::Registry; +// bool output_ssa = false; +// CodeGenAOCL cg; +// cg.Init(output_ssa); +// for ( LoweredFunc f : funcs ) { +// cg.AddFunction(f); +// } +// std::string code = cg.Finish(); + +// if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { +// code = (*f)(code).operator std::string(); +// } +// LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; // return code; // } + // codegen for SDACCEL std::string BuildSDACCEL(Array funcs) { using TVM::runtime::Registry; @@ -79,13 +121,31 @@ std::string BuildSDACCEL(Array funcs) { // code = (*f)(code).operator std::string(); // } LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; - // std::unordered_map - std::unordered_map temp = ExtractFuncInfo(funcs); - - return code; } +// codegen for SDACCEL +// template +// std::string BuildOpenCL(Array funcs) { +// CodeAnalysOpenCL ca; +// CodeGen cg; +// for (LoweredFunc f : funcs) { +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); +// cg.AddFunction(f, map_arg_type); +// } +// std::string code = cg.Finish(); + +// // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { +// // code = (*f)(code).operator std::string(); +// // } +// LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; +// // std::unordered_map +// // std::unordered_map temp = ExtractFuncInfo(funcs); +// return code; +// } + @@ -94,9 +154,9 @@ TVM_REGISTER_API("codegen.build_sdaccel") * rv = BuildSDACCEL(args[0]); }); -TVM_REGISTER_API("codegen.build_aocl") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildAOCL(args[0]); - }); +// TVM_REGISTER_API("codegen.build_aocl") +// .set_body([]( TVMArgs args, TVMRetValue * rv ) { +// * rv = BuildOpenCL(args[0]); +// }); } // namespace codegen } // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc deleted file mode 100644 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 268699302..b04cbfaec 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -45,10 +45,59 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f) { this->stream << "# include \n"; this->stream << "# include \n\n"; this->stream << "__kernel "; - + CodeGenC::AddFunction(f); } +// void CodeGenSDACCEL::AddFunction(LoweredFunc f, +// str2tupleMap map_arg_type) { +// // Clear previous generated state +// this->InitFuncState(f); + +// // Skip the first underscore, so SSA variable starts from _1 +// GetUniqueName("_"); + +// // Register alloc buffer type +// for (const auto & kv : f->handle_data_type) { +// RegisterHandleType(kv.first.get(), kv.second.type()); +// } + +// // Write header files +// this->stream << "#include \n"; +// this->stream << "#include \n"; +// this->stream << "#include \n"; + +// // Write entry function name +// this->stream << "#pragma ACCEL kernel\n"; +// this->stream << "void " << f->name << "("; + +// // Write arguments +// for (size_t i = 0; i < f->args.size(); ++i) { +// Var v = f->args[i]; +// std::string vid = AllocVarID(v.get()); +// if (i != 0) this->stream << ", "; +// if (map_arg_type.find(vid) == map_arg_type.end()) { +// LOG(WARNING) << vid << " type not found\n"; +// PrintType(v.type(), this->stream); +// this->stream << ' ' << vid; +// } +// else { +// auto arg = map_arg_type[vid]; +// PrintType(std::get<1>(arg), this->stream); +// if (v.type().is_handle()) +// this->stream << "*"; +// this->stream << ' ' << std::get<0>(arg); +// } +// } +// stream << ") {\n"; +// int func_scope = this->BeginScope(); +// this->PrintStmt(f->body); +// this->EndScope(func_scope); +// this->PrintIndent(); +// this->stream << "}\n\n"; +// } + + // void CodeGenSDACCEL::AddFunction(LoweredFunc f, diff --git a/tvm/src/codegen/opencl/opencl_module.cc b/tvm/src/codegen/opencl/opencl_module.cc deleted file mode 100644 index e69de29bb..000000000 From e0dc81f02a16a69aeaf809a197b4873921fa1c81 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 29 Jul 2019 16:27:05 -0400 Subject: [PATCH 007/103] Create codegen_opencl.cc --- tvm/src/codegen/opencl/codegen_opencl.cc | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tvm/src/codegen/opencl/codegen_opencl.cc diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc new file mode 100644 index 000000000..e69de29bb From 8c010da63a26aaa9942c5d7d73bdf74c90f90bf6 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 29 Jul 2019 17:27:49 -0400 Subject: [PATCH 008/103] now --- tvm/src/codegen/opencl/build_opencl.cc | 34 ++++++++++++++++++++---- tvm/src/codegen/opencl/codegen_opencl.cc | 0 2 files changed, 29 insertions(+), 5 deletions(-) delete mode 100644 tvm/src/codegen/opencl/codegen_opencl.cc diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index bbdfd6fd2..c26167d33 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -3,11 +3,12 @@ yb269@cornell.edu */ -#include -#include -#include -#include -#include "../../runtime/meta_data.h" +# include +# include +# include +# include +# include +# include "../../runtime/meta_data.h" # include # include "./codegen_sdaccel.h" # include "./codegen_aocl.h" @@ -124,6 +125,23 @@ std::string BuildSDACCEL(Array funcs) { return code; } +// codegen for OpenCL +// std::string BuildOpenCL(Array funcs) { +// using TVM::runtime::Registry; +// bool output_ssa = false; +// CodeGenOpenCL cg; +// cg.Init(output_ssa); +// for (LoweredFunc f : funcs) { +// cg.AddFunction(f); +// } +// std::string code = cg.Finish(); + +// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; +// return code; +// } + + + // codegen for SDACCEL // template // std::string BuildOpenCL(Array funcs) { @@ -154,6 +172,12 @@ TVM_REGISTER_API("codegen.build_sdaccel") * rv = BuildSDACCEL(args[0]); }); + +// TVM_REGISTER_API("codegen.build_opencl") +// .set_body([]( TVMArgs args, TVMRetValue * rv ) { +// * rv = BuildOpenCL(args[0]); +// }); + // TVM_REGISTER_API("codegen.build_aocl") // .set_body([]( TVMArgs args, TVMRetValue * rv ) { // * rv = BuildOpenCL(args[0]); diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc deleted file mode 100644 index e69de29bb..000000000 From 843b6f46b4e9f8a60df131948cf2394045e07355 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 29 Jul 2019 20:55:48 -0400 Subject: [PATCH 009/103] all done --- tvm/src/codegen/opencl/build_opencl.cc | 39 +- tvm/src/codegen/opencl/codeanalys_openclc.cc | 919 +++++++++++++++++++ tvm/src/codegen/opencl/codegen_sdaccel.cc | 142 ++- 3 files changed, 1041 insertions(+), 59 deletions(-) create mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.cc diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index c26167d33..f65f27604 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -12,6 +12,7 @@ # include # include "./codegen_sdaccel.h" # include "./codegen_aocl.h" +# include "./codeanalys_openclc.h" # include "../build_common.h" @@ -108,23 +109,45 @@ namespace codegen { // codegen for SDACCEL +// std::string BuildSDACCEL(Array funcs) { +// using TVM::runtime::Registry; +// bool output_ssa = false; +// CodeGenSDACCEL cg; +// cg.Init(output_ssa); +// for (LoweredFunc f : funcs) { +// cg.AddFunction(f); +// } +// std::string code = cg.Finish(); + +// // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { +// // code = (*f)(code).operator std::string(); +// // } +// LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; +// return code; +// } + + +// codegen for SDACCEL_WITH_ANALYSIS std::string BuildSDACCEL(Array funcs) { using TVM::runtime::Registry; - bool output_ssa = false; + CodeAnalysOpenCLC ca; CodeGenSDACCEL cg; - cg.Init(output_ssa); for (LoweredFunc f : funcs) { - cg.AddFunction(f); + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); + + cg.AddFunction(f, map_arg_type); + } std::string code = cg.Finish(); - // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { - // code = (*f)(code).operator std::string(); - // } - LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; + if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { + code = (*f)(code).operator std::string(); + } + LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; return code; } - // codegen for OpenCL // std::string BuildOpenCL(Array funcs) { // using TVM::runtime::Registry; diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc new file mode 100644 index 000000000..030453a94 --- /dev/null +++ b/tvm/src/codegen/opencl/codeanalys_openclc.cc @@ -0,0 +1,919 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file tvm/src/codegen/hlsc/codegen_hlsc.cc + */ +#include +#include +#include +#include "./codeanalys_openclc.h" +#include "../codegen_common.h" +#include "../../arithmetic/compute_expr.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +void CodeAnalysOpenCLC::Init() { + ; +} + +void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { + alloc_storage_scope_.clear(); + handle_data_type_.clear(); + map_arg_type_.clear(); + CodeGenSourceBase::ClearFuncState(); +} +void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { + // Clear previous generated state. + this->InitFuncState(f); + + // Add to alloc buffer type. + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Record the arguments for analyzing the type + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + this->map_arg_type_[vid]; + } + int func_scope = this->BeginScope(); + VisitStmt(f->body); + this->EndScope(func_scope); +} + +str2tupleMap CodeAnalysOpenCLC::Finish() { + return this->map_arg_type_; +} + +void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) + VisitExpr(n, os); +} + +void CodeAnalysOpenCLC::PrintSSAAssign( + const std::string& target, const std::string& src, Type t) { + PrintType(t, stream); + stream << ' ' << target << " = "; + if (src.length() > 3 && + src[0] == '(' && src[src.length() - 1] == ')') { + stream << src.substr(1, src.length() - 2); + } else { + stream << src; + } + stream << ";\n"; +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetBufferRef( + Type t, const Variable* buffer, Expr index) { + std::ostringstream os; + std::string vid = GetVarID(buffer); + std::string scope; + if (alloc_storage_scope_.count(buffer)) { + scope = alloc_storage_scope_.at(buffer); + } + bool is_vol = volatile_buf_.count(buffer) != 0; + if (t.lanes() == 1) { + if (!HandleTypeMatch(buffer, t) || is_vol) { + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)" << vid << ')'; + } else { + os << vid; + } + os << '['; + PrintExpr(index, os); + os << ']'; + } else { + // Buffer declared as vector type. + // optimize for case where it is in register, + if (HandleTypeMatch(buffer, t) && !is_vol) { + // optimize for constant access + int offset; + if (arith::GetConstInt(index, &offset)) { + CHECK_EQ(offset % t.lanes(), 0) + << "Find unaligned vector load to a vector type"; + os << vid << '[' << (offset / t.lanes()) << ']'; + return os.str(); + } + } + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)("; + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << vid << " + "; + PrintExpr(index, os); + os << "))[0]"; + } + return os.str(); +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind) { + if (kind < intrinsic::kArrKindBound_) { + std::ostringstream os; + os << "(((TVMArray*)"; + this->PrintExpr(buffer, os); + os << ")"; + if (kind == intrinsic::kArrAddr) { + os << " + "; + this->PrintExpr(index, os); + os << ")"; + return os.str(); + } + os << '['; + this->PrintExpr(index, os); + os << "]."; + // other case: get fields. + switch (kind) { + case intrinsic::kArrData: os << "data"; break; + case intrinsic::kArrShape: os << "shape"; break; + case intrinsic::kArrStrides: os << "strides"; break; + case intrinsic::kArrNDim: os << "ndim"; break; + case intrinsic::kArrTypeCode: os << "dtype.code"; break; + case intrinsic::kArrTypeBits: os << "dtype.bits"; break; + case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; + case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; + case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; + case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; + default: os << "unknown_field_code_" << kind; + } + os << ')'; + return os.str(); + } else { + CHECK_LT(kind, intrinsic::kTVMValueKindBound_); + std::ostringstream os; + os << "(((TVMValue*)"; + this->PrintExpr(buffer, os); + os << ")[" << index << "]."; + if (t.is_handle()) { + os << "v_handle"; + } else if (t.is_float()) { + os << "v_float64"; + } else if (t.is_int()) { + os << "v_int64"; + } else { + os << t; + } + os << ")"; + return os.str(); + } +} + + +bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) return false; + return it->second == t; +} + +void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) { + handle_data_type_[buf_var] = t; + } else { + CHECK(it->second == t) + << "conflicting buf var type"; + } +} + +void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, + Type t, int i, + std::ostream& os) { // NOLINT(*) + os << vec << ".s" << std::hex << i << std::dec; +} + +void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, + Type t, int i, + const std::string& value) { + this->PrintIndent(); + stream << vec << ".s" << std::hex << i + << " = " << value << ";\n" << std::dec; +} + +std::string CodeAnalysOpenCLC::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + return GetBufferRef(t, buffer, base); +} + +void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + std::string ref = GetBufferRef(t, buffer, base); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; +} + +std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + return os.str(); +} + +void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { + LOG(FATAL) << "not implemented"; +} + +void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) +} + +void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) + CHECK_EQ(scope, "global"); +} + +std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) + std::ostringstream os; + PrintType(t, os); + return os.str(); +} + +void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + if (t.is_float()) { + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "int" << t.bits() << "_t"; return; + } + } + } + os << t; +} + + +inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == Int(32)) { + std::ostringstream temp; + temp << op->value; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == UInt(32)) { + std::ostringstream temp; + temp << op->value << "U"; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + switch (op->type.bits()) { + case 64: case 32: { + std::ostringstream temp; + temp << std::scientific << op->value; + if (op->type.bits() == 32) temp << 'f'; + p->MarkConst(temp.str()); + os << temp.str(); + break; + } + case 16: { + os << '('; + p->PrintType(op->type, os); + os << ')' << std::scientific <value << 'f'; + break; + } + default: os << op << "\n"; + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) + os << "\"" << op->value << "\""; +} + +template +inline void PrintBinaryExpr(const T* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + if (isalpha(opstr[0])) { + os << opstr << '('; + p->PrintExpr(op->a, os); + os << ", "; + p->PrintExpr(op->b, os); + os << ')'; + } else { + os << '('; + p->PrintExpr(op->a, os); + os << ' ' << opstr << ' '; + p->PrintExpr(op->b, os); + os << ')'; + } + } else { + p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); + } +} + +inline void PrintBinaryIntrinsitc(const Call* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + CHECK_EQ(op->args.size(), 2U); + os << '('; + p->PrintExpr(op->args[0], os); + os << opstr; + p->PrintExpr(op->args[1], os); + os << ')'; + } else { + p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); + } +} +void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) + std::stringstream value; + this->PrintExpr(op->value, value); + os << CastFromTo(value.str(), op->value.type(), op->type); +} +void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) + os << GetVarID(op); +} +void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "+", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "-", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "*", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "/", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "%", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "min", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "max", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "==", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "!=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "&&", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "||", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) + os << '!'; + PrintExpr(op->a, os); +} + +void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) + if (op->call_type == Call::Extern || + op->call_type == Call::PureExtern) { + os << op->name << "("; + for (size_t i = 0; i < op->args.size(); i++) { + this->PrintExpr(op->args[i], os); + if (i < op->args.size() - 1) { + os << ", "; + } + } + os << ")"; + } else if (op->is_intrinsic(Call::bitwise_and)) { + PrintBinaryIntrinsitc(op, " & ", os, this); + } else if (op->is_intrinsic(Call::bitwise_xor)) { + PrintBinaryIntrinsitc(op, " ^ ", os, this); + } else if (op->is_intrinsic(Call::bitwise_or)) { + PrintBinaryIntrinsitc(op, " | ", os, this); + } else if (op->is_intrinsic(Call::bitwise_not)) { + CHECK_EQ(op->args.size(), 1U); + os << "(~"; + this->PrintExpr(op->args[0], os); + os << ')'; + } else if (op->is_intrinsic(Call::shift_left)) { + PrintBinaryIntrinsitc(op, " << ", os, this); + } else if (op->is_intrinsic(Call::shift_right)) { + PrintBinaryIntrinsitc(op, " >> ", os, this); + } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintExpr(op->args[0], os); + os << " ? "; + PrintExpr(op->args[1], os); + os << " : "; + PrintExpr(op->args[2], os); + os << ")"; + } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { + const Load *l = op->args[0].as(); + CHECK(op->args.size() == 1 && l); + os << "(("; + this->PrintType(l->type.element_of(), os); + os << " *)" << this->GetVarID(l->buffer_var.get()) + << " + "; + this->PrintExpr(l->index, os); + os << ')'; + } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { + CHECK_EQ(op->args.size(), 3U); + os << GetStructRef( + op->type, op->args[0], op->args[1], + op->args[2].as()->value); + } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { + CHECK_EQ(op->args.size(), 1U); + os << "("; + this->PrintExpr(op->args[0], os); + os << " == NULL)"; + } else + os << op->name << "()"; +} + +void CodeAnalysOpenCLC::PrintVecBinaryOp( + const std::string& op, Type t, + Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) + if (isalpha(op[0])) { + os << op << "("; + this->PrintExpr(lhs, os); + os << ", "; + this->PrintExpr(rhs, os); + os << ")"; + } else { + os <<"("; + this->PrintExpr(lhs, os); + os << ' ' << op << ' '; + this->PrintExpr(rhs, os); + os << ")"; + } +} + +inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { + const Ramp* r = index.as(); + if (!r) return false; + if (!is_one(r->stride)) return false; + CHECK_EQ(r->lanes, lanes); + *base = r->base; + return true; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) + int lanes = op->type.lanes(); + // delcare type. + if (op->type.lanes() == 1) { + std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); + os << ref; + } else { + CHECK(is_one(op->predicate)) + << "predicated load is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { + std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); + os << ref; + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // load seperately. + std::string svalue = GetUniqueName("_"); + this->PrintIndent(); + this->PrintType(op->type, stream); + stream << ' ' << svalue << ";\n"; + std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string vid = GetVarID(op->buffer_var.get()); + Type elem_type = op->type.element_of(); + for (int i = 0; i < lanes; ++i) { + std::ostringstream value_temp; + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + value_temp << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, value_temp); + value_temp << ' '; + } + } + PrintType(elem_type, value_temp); + value_temp << "*)" << vid << ')'; + } else { + value_temp << vid; + } + value_temp << '['; + PrintVecElemLoad(sindex, op->index.type(), i, value_temp); + value_temp << ']'; + PrintVecElemStore(svalue, op->type, i, value_temp.str()); + } + os << svalue; + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { + Type t = op->value.type(); + if (t.lanes() == 1) { + std::string value = this->PrintExpr(op->value); + std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; + } else { + CHECK(is_one(op->predicate)) + << "Predicated store is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, t.lanes(), &base)) { + std::string value = this->PrintExpr(op->value); + this->PrintVecStore(op->buffer_var.get(), t, base, value); + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // store elements seperately + std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); + std::string vid = GetVarID(op->buffer_var.get()); + for (int i = 0; i < t.lanes(); ++i) { + this->PrintIndent(); + Type elem_type = t.element_of(); + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + stream << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, stream); + stream << ' '; + } + } + PrintType(elem_type, stream); + stream << "*)" << vid << ')'; + } else { + stream << vid; + } + stream << '['; + PrintVecElemLoad(index, op->index.type(), i, stream); + stream << "] = "; + PrintVecElemLoad(value, op->value.type(), i, stream); + stream << ";\n"; + } + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) + std::string value = PrintExpr(op->value); + CHECK(!var_idmap_.count(op->var.get())); + var_idmap_[op->var.get()] = value; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) + // constraint of current logic + CHECK_EQ(op->base.type(), Int(32)); + os << "((int" << op->lanes << ")("; + for (int i = 0; i < op->lanes; i++) { + os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; + if (i != op->lanes - 1) + os << ", "; + } + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Broadcast: not supported "; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->condition, os); + os << " ? "; + PrintExpr(op->true_value, os); + os << " : "; + PrintExpr(op->false_value, os); + os << ")"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " & (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. a' = SHR a for Idx_R bits + // 2. mask: 1.(length).1 + // (1 << (L - R + 1)) - 1 + // 3. a' & mask + + os << "(("; + PrintExpr(op->a, os); + os << " >> "; + PrintExpr(op->index_right, os); + os << ") & ((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " | (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. mask: 0.(Idx L).01..10.(Idx R).0 + // ((1 << (L - R + 1)) - 1) << R + // 2. a & mask + + os << "("; + PrintExpr(op->a, os); + os << " & (((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1) << "; + PrintExpr(op->index_right, os); + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Quantize is not yet support"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "KernelExpr is not yet support"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { + // TODO comaniac + //std::vector vec_var = GetNodesByType(op->value); + + std::string arg_vid = "unknown"; + std::string str = PrintExpr(op->value); + if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { + size_t pos_arg = str.find("arg"); + size_t pos_data = str.find("data"); + arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); + } + else if (std::regex_match(str, std::regex("arg(.+)"))) + arg_vid = str; + + std::string vid = AllocVarID(op->var.get()); + if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { + if ("unknown" != arg_vid) + LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; + } else { + Type type = op->var.type(); + if (op->var.type() == Handle() && + handle_data_type_.count(op->var.get())) + type = handle_data_type_.at(op->var.get()); + this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); + } + VisitStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { + CHECK(!is_zero(op->condition)); + std::string vid = AllocVarID(op->buffer_var.get()); + if (op->new_expr.defined()) { + // Prefer global static allocation for the program + CHECK_EQ(op->free_function, "nop"); + std::string new_data = PrintExpr(op->new_expr); + this->PrintIndent(); + PrintType(op->type, stream); + stream << "* "<< vid << '=' << new_data << ";\n"; + } else { + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); + stream << ' '; + PrintType(op->type, stream); + stream << ' '<< vid << '[' + << constant_size << "];\n"; + } + RegisterHandleType(op->buffer_var.get(), op->type); + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::thread_extent) { + IterVar iv(op->node.node_); + if (iv->thread_tag.length() != 0) { + if (!var_idmap_.count(iv->var.get())) { + BindThreadIndex(iv); + } + } + } else if (op->attr_key == ir::attr::storage_scope) { + const Variable* v = op->node.as(); + CHECK(v); + alloc_storage_scope_[v] = op->value.as()->value; + } else if (op->attr_key == ir::attr::volatile_scope) { + const Variable* v = op->node.as(); + CHECK(v); + volatile_buf_.insert(v); + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (op->message.as()) { + // GLOG style check + stream << "CHECK(" << cond << ") << \"" + << op->message.as()->value << "\";\n"; + } else { + stream << "assert(" << cond << ");\n"; + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const For* op) { + std::string extent = PrintExpr(op->extent); + PrintIndent(); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { + PrintStmt(op->first); + if (op->rest.defined()) PrintStmt(op->rest); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { + if (is_const(op->value)) return; + const Call* call = op->value.as(); + if (call) { + if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { + this->PrintStorageSync(call); return; + } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { + CHECK_EQ(call->args.size(), 4); + std::string value = PrintExpr(call->args[3]); + std::string ref = GetStructRef( + call->args[3].type(), + call->args[0], + call->args[1], + call->args[2].as()->value); + this->PrintIndent(); + this->stream << ref << " = " << value << ";\n"; + return; + } + } + std::string vid = this->PrintExpr(op->value); + this->PrintIndent(); + this->stream << "(void)" << vid << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { + PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { + LOG(FATAL) << "KernelDef is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { + LOG(FATAL) << "KernelStmt is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { + this->stream << "return "; + PrintExpr(op->value); + this->stream << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { + // TODO: Check if the break statement is used correctly + this->stream << "break;\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const While *op) { + std::string condition = PrintExpr(op->condition); + PrintIndent(); + stream << "while (" << condition << ") {\n"; + int while_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(while_scope); + PrintIndent(); + stream << "}\n"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index b04cbfaec..d546f16ec 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -7,6 +7,7 @@ # include # include # include +# include # include "./codegen_sdaccel.h" # include "../../runtime/thread_storage_scope.h" @@ -32,7 +33,38 @@ void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { // CodeGenC::AddFunction(f); // } -void CodeGenSDACCEL::AddFunction(LoweredFunc f) { +// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { + // this->stream << "# pragma once\n"; + // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n\n"; + // this->stream << "__kernel "; + +// CodeGenC::AddFunction(f); +// } + +void CodeGenSDACCEL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Write head files + this->stream << "# pragma ACCEL kernel\n"; this->stream << "# pragma once\n"; this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; @@ -44,58 +76,39 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f) { this->stream << "# include \n"; this->stream << "# include \n"; this->stream << "# include \n\n"; - this->stream << "__kernel "; + + // Write entry function name + this->stream << "__kernel " << f->name << "("; + - CodeGenC::AddFunction(f); -} -// void CodeGenSDACCEL::AddFunction(LoweredFunc f, -// str2tupleMap map_arg_type) { -// // Clear previous generated state -// this->InitFuncState(f); - -// // Skip the first underscore, so SSA variable starts from _1 -// GetUniqueName("_"); - -// // Register alloc buffer type -// for (const auto & kv : f->handle_data_type) { -// RegisterHandleType(kv.first.get(), kv.second.type()); -// } - -// // Write header files -// this->stream << "#include \n"; -// this->stream << "#include \n"; -// this->stream << "#include \n"; - -// // Write entry function name -// this->stream << "#pragma ACCEL kernel\n"; -// this->stream << "void " << f->name << "("; - -// // Write arguments -// for (size_t i = 0; i < f->args.size(); ++i) { -// Var v = f->args[i]; -// std::string vid = AllocVarID(v.get()); -// if (i != 0) this->stream << ", "; -// if (map_arg_type.find(vid) == map_arg_type.end()) { -// LOG(WARNING) << vid << " type not found\n"; -// PrintType(v.type(), this->stream); -// this->stream << ' ' << vid; -// } -// else { -// auto arg = map_arg_type[vid]; -// PrintType(std::get<1>(arg), this->stream); -// if (v.type().is_handle()) -// this->stream << "*"; -// this->stream << ' ' << std::get<0>(arg); -// } -// } -// stream << ") {\n"; -// int func_scope = this->BeginScope(); -// this->PrintStmt(f->body); -// this->EndScope(func_scope); -// this->PrintIndent(); -// this->stream << "}\n\n"; -// } + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + this->stream << "}\n\n"; +} @@ -372,5 +385,32 @@ void CodeGenSDACCEL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT CodeGenC::VisitExpr_(op, os); } +void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + + + } // namespace codegen } // namespace TVM From f0ac7a7d95c762aba43184c76e504c5b9fdb9112 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 29 Jul 2019 21:08:46 -0400 Subject: [PATCH 010/103] Update codegen_sdaccel.cc --- tvm/src/codegen/opencl/codegen_sdaccel.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index d546f16ec..e9e1e4449 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -107,6 +107,7 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, this->PrintStmt(f->body); this->EndScope(func_scope); this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; this->stream << "}\n\n"; } @@ -346,7 +347,6 @@ void CodeGenSDACCEL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(* CodeGenC::VisitExpr_(op, os); } - void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { std::string value = PrintExpr(op->value); // Skip the argument retrieving assign statement @@ -364,7 +364,6 @@ void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { } - void CodeGenSDACCEL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) if (std::isinf(op->value)) { if ( op->value < 0) { @@ -410,7 +409,5 @@ void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { stream << "}\n"; } - - } // namespace codegen } // namespace TVM From 8713bda846247c2088aa432934334d993e5ef9bb Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 29 Jul 2019 22:19:57 -0400 Subject: [PATCH 011/103] Update codegen_sdaccel.cc --- tvm/src/codegen/opencl/codegen_sdaccel.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index e9e1e4449..48d9afd4c 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -304,9 +304,9 @@ void CodeGenSDACCEL::PrintStorageSync(const Call* op) { void CodeGenSDACCEL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global") { - os << "__global"; + os << "__global "; } else if (scope == "shared") { - os << "__local"; + os << "__local "; } } From e687bfb1908a54dcc28bf4e3ae3551c1c48d0a54 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Wed, 7 Aug 2019 17:15:42 -0400 Subject: [PATCH 012/103] modified: python/heterocl/tvm/target.py --- python/heterocl/tvm/target.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 12235d95d..23c87de89 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -50,7 +50,7 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'rv64_ppac'] def _merge_opts(opts, new_opts): """Helper function to merge options""" @@ -68,7 +68,7 @@ class Target(object): Parameters ---------- - target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev"} + target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev", "rv64_ppac"} The major target name. {"merlinc", "soda", "soda_xhls", "vhls"} From b091758bd1bf9d0367a22cce53e481348f134b81 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Wed, 7 Aug 2019 17:20:05 -0400 Subject: [PATCH 013/103] new file: samples/ppac/gemm/csrcPrint.py new file: samples/ppac/gemm/data.py new file: samples/ppac/gemm/gemm_ppac.py new file: samples/ppac/gemm/headcode.txt new file: samples/ppac/gemm/ppac_common.py new file: tvm/src/codegen/build_ppac.cc new file: tvm/src/codegen/codegen_rv64_ppac.cc new file: tvm/src/codegen/codegen_rv64_ppac.h --- samples/ppac/gemm/csrcPrint.py | 62 +++++++++++++++++++++++++ samples/ppac/gemm/data.py | 59 ++++++++++++++++++++++++ samples/ppac/gemm/gemm_ppac.py | 43 ++++++++++++++++++ samples/ppac/gemm/headcode.txt | 6 +++ samples/ppac/gemm/ppac_common.py | 48 ++++++++++++++++++++ tvm/src/codegen/build_ppac.cc | 27 +++++++++++ tvm/src/codegen/codegen_rv64_ppac.cc | 67 ++++++++++++++++++++++++++++ tvm/src/codegen/codegen_rv64_ppac.h | 22 +++++++++ 8 files changed, 334 insertions(+) create mode 100644 samples/ppac/gemm/csrcPrint.py create mode 100644 samples/ppac/gemm/data.py create mode 100644 samples/ppac/gemm/gemm_ppac.py create mode 100644 samples/ppac/gemm/headcode.txt create mode 100644 samples/ppac/gemm/ppac_common.py create mode 100644 tvm/src/codegen/build_ppac.cc create mode 100644 tvm/src/codegen/codegen_rv64_ppac.cc create mode 100644 tvm/src/codegen/codegen_rv64_ppac.h diff --git a/samples/ppac/gemm/csrcPrint.py b/samples/ppac/gemm/csrcPrint.py new file mode 100644 index 000000000..1eee291fd --- /dev/null +++ b/samples/ppac/gemm/csrcPrint.py @@ -0,0 +1,62 @@ +"""author: Guyue Huang (gh424@cornell.edu) +ppac-gemm c code gen +""" +from ppac_common import * + +def getCSrc(data_fname, golden_fname, head_fname, o_fname, dims, bits=1, xLen=64): + + m, n, k = dims + + def printKernel(of): + + of.write('//save data_A\n') + for i in range(m): + of.write(doLoad(('data_A+%d'%(8*i)), i)) + + of.write('\n//do MVP\n') + for j in range(n): + of.write('\tlw a1, data_B+%d\n'%(8*j)) + #TODO: bit-mask + of.write(doMVP(funct='72')) #'1001000' + for i in range(m): + of.write(doStore(('data_C+%d'%(4*(j*m+i))),i)) + + def printTest(of, golden): + for n, gnum in enumerate(golden): + of.write('\tlw a0, data_C+%d\n'%(4*n)) + of.write(('\tTEST_CASE(%d, a0, '%(n+1+1)) + hex(int(gnum)) + ', )\n') + of.write('\n\tTEST_PASSFAIL\n') + + with open(o_fname, 'w') as of: + + with open(head_fname, 'r') as hf: + of.write(hf.read()) + hf.close() + + of.write('\tRVTEST_WITH_ROCC\n') + of.write('start:\n\tRVTEST_CODE_BEGIN\n') + + printKernel(of) + of.write('\n\n') + goldnum = [] + with open(golden_fname, 'r') as gf: + goldnum += (gf.read()).split() + if not len(goldnum) == m*n: + raise Exception('golden number should have %d but %d'%(m*n, len(goldnum))) + printTest(of, goldnum) + + of.write('\tRVTEST_CODE_END\n\n') + + with open(data_fname, 'r') as df: + s = "\t.data\n\tRVTEST_DATA_BEGIN\n\n" + "\tTEST_DATA\n" + df.read() + df.close() + of.write(s) + + of.write('\tRVTEST_DATA_END\n') + +if __name__== '__main__': + getCSrc('bareMdata.txt', 'goldennumber.txt', 'headcode.txt', 'test.S', + dims=[4, 4, 64]) + + #TODO: headcode.txt ppac_common code definition + diff --git a/samples/ppac/gemm/data.py b/samples/ppac/gemm/data.py new file mode 100644 index 000000000..3433d0ec2 --- /dev/null +++ b/samples/ppac/gemm/data.py @@ -0,0 +1,59 @@ +""" author: Guyue Huang (gh424@cornell.edu) +ppac-gemm data generater +""" +import numpy as np +import numpy.random as rd +from ppac_common import compact, bvec2x + +m, n, k = 4, 4, 64 +v_bits, m_bits = 1, 1 +xLen = 64 + +mat_A = rd.randint(2**m_bits, size=(m, k)) +mat_B = rd.randint(2**v_bits, size=(k, n)) +mat_C = np.dot(mat_A, mat_B) +golden = list(mat_C.flatten('F')) # column major + +data_A = np.zeros((m, xLen)) +data_B = np.zeros((n, xLen)) +for i in range(m): + data_A[i,:] = compact(mat_A[i,:], k, m_bits, xLen) +for j in range(n): + data_B[j,:] = compact(mat_B[:,j], k, v_bits, xLen) + +word_A = [bvec2x(vec) for vec in list(data_A)] +word_B = [bvec2x(vec) for vec in list(data_B)] + +with open('csrcmacro.txt', 'w') as of: + of.write('#define M '+str(m)+'\n') + of.write('#define N ' + str(n) + '\n') + of.write('#define K ' + str(k) + '\n') + of.write('#define DATAA '+'0x'+word_A[0]) + for s in word_A[1:]: + of.write(',\\\n'+'\t0x'+s) + of.write('\n\n') + of.write('#define DATAB '+'\t0x'+word_B[0]) + for s in word_B[1:]: + of.write(',\\\n'+'\t0x'+s) + of.write('\n\n') + of.write('#define DATAGOLD ') + for n in golden[:-1]: + of.write(str(n)+', ') + of.write(str(golden[-1])+'\n') +of.close() + +with open('bareMdata.txt','w') as of: + of.write('data_A:\n') + for s in word_A: + of.write('\t.dword '+s+'\n') + of.write('\ndata_B:\n') + for s in word_B: + of.write('\t.dword '+s+'\n') + of.write('\ndata_C:\n') + for s in range(m*n): + of.write('\t.dword 0x0\n') +of.close() + +with open('goldennumber.txt','w') as of: + for n in golden: + of.write(str(n)+'\n') diff --git a/samples/ppac/gemm/gemm_ppac.py b/samples/ppac/gemm/gemm_ppac.py new file mode 100644 index 000000000..729773205 --- /dev/null +++ b/samples/ppac/gemm/gemm_ppac.py @@ -0,0 +1,43 @@ +""" +author: Guyue Huang (gh424@cornell.edu) + +General Matrix Multiplication +target : riscv_ppac + +NOT FINISHED , WILL FAIL AT 'BUILD' +""" +""" +modified on Aug 1 +bit_width = 1 +m, n, k = 16, 2, 64 +""" + +import heterocl as hcl +import numpy as np + + +def gemm(m, n, k, dtype=hcl.Int(), target=None): + matrix_1 = hcl.placeholder((m, k), dtype=dtype) + matrix_2 = hcl.placeholder((k, n), dtype=dtype) + + def kernel(matrix_1, matrix_2): + r = hcl.reduce_axis(0, k, 'k') + return hcl.compute((m, n), + lambda x, y: hcl.sum(matrix_1[x, r] * matrix_2[r, y], + axis=r, dtype=dtype), + dtype=dtype, + name="out_matrix") + + s = hcl.create_schedule([matrix_1, matrix_2], kernel) + f = hcl.build(s, target=target) + return f + +dtype = hcl.UInt(1) +hcl.init(dtype) +m, n, k = 4, 4, 64 +f = gemm(m, n, k, dtype, target="rv64_ppac") + +print(f) +with open("csrc.cc", "w") as ofile: + ofile.write(str(f)) +ofile.close() diff --git a/samples/ppac/gemm/headcode.txt b/samples/ppac/gemm/headcode.txt new file mode 100644 index 000000000..2e19284f5 --- /dev/null +++ b/samples/ppac/gemm/headcode.txt @@ -0,0 +1,6 @@ +#include "riscv_test.h" +#include "riscv-tests/isa/macros/scalar/test_macros.h" +#include "include/ppac.h" +#include "rocc-software/src/riscv_test_rocc.h" + +#define CUSTOM_X 1 diff --git a/samples/ppac/gemm/ppac_common.py b/samples/ppac/gemm/ppac_common.py new file mode 100644 index 000000000..5a76843cb --- /dev/null +++ b/samples/ppac/gemm/ppac_common.py @@ -0,0 +1,48 @@ +import numpy as np + +def compact(arr, arrlen, dataBits, wordBits): + if arr.size < arrlen or wordBits / dataBits < arrlen: + raise Exception("error in length") + word = np.zeros(wordBits, dtype=int) + for l in range(arrlen): + for b in range(dataBits): + word[l * dataBits + b] = ((arr[l] & (1 << b))>>b) + return word + +def bvec2x(vec): + n2c = {0:'0', 1:'1', 2:'2', 3:'3', + 4:'4', 5:'5', 6:'6', 7:'7', + 8:'8', 9:'9',10:'a',11:'b', + 12:'c',13:'d',14:'e',15:'f'} + base = np.array([1,2,4,8]) + vec_int = vec % 2 + len = vec.size + if not len % 4 == 0: + vec_int = np.append(vec_int, np.zeros((4-(len%4)))) + len = len + (4 - len % 4) + x = '' + ct = len/4 + for c in range(ct, 0, -1): + x = x + n2c[(np.dot( vec[4*c-4:4*c], base))] + return x + +def doWrite(rocc_addr): + s = '\tli a0, '+str(rocc_addr)+'\n' + return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 0, 11, 10, K_DO_WRITE)\n' + +def doRead(rocc_addr): + s = '\tli a0, '+str(rocc_addr)+'\n' + return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 0, 10, K_DO_READ)\n' + +def doLoad(addr, rocc_addr): + s = '\tla a1, '+addr+'\n\tli a0, '+str(rocc_addr)+'\n' + return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 11, 10, K_DO_LOAD)\n' + +def doStore(addr, rocc_res_addr): + s = '\tla a1, ' + addr + '\n\tli a0, ' + str(rocc_res_addr) + '\n' + return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 11, 10, K_DO_STORE)\n' + +def doMVP(funct): + return '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 11, 10, '+str(funct)+')\n' + + diff --git a/tvm/src/codegen/build_ppac.cc b/tvm/src/codegen/build_ppac.cc new file mode 100644 index 000000000..93ed51be8 --- /dev/null +++ b/tvm/src/codegen/build_ppac.cc @@ -0,0 +1,27 @@ +/* + author Guyue Huang (gh424@cornell.edu) + */ + +#include "./codegen_rv64_ppac.h" +#include "./build_common.h" + +namespace TVM{ +namespace codegen{ + +std::string BuildRV64PPAC(Array funcs) { + CodeGenRV64PPAC cg; + for (LoweredFunc f: funcs) { + cg.AddFunction(f); + } + std::string code = cg.Finish(); + LOG(WARNING) << "RV64_PPAC backend doesn't yet have runtime, return kernel code"; + return code; +} + +TVM_REGISTER_API("codegen.build_rv64_ppac") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildRV64PPAC(args[0]); + }); + +} +} \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.cc b/tvm/src/codegen/codegen_rv64_ppac.cc new file mode 100644 index 000000000..5882a0280 --- /dev/null +++ b/tvm/src/codegen/codegen_rv64_ppac.cc @@ -0,0 +1,67 @@ +/* + author Guyue Huang (gh424@cornell.edu) + */ + +#include +#include +#include +#include +#include +#include "./codegen_rv64_ppac.h" + +namespace TVM { +namespace codegen { + +void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + if (t.is_float()) { + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + os << "double"; return; + } + } else if (t.is_uint()) { + if (t.bits() <= 8) { + os << "uint8_t"; return; + } + else if (t.bits() <= 16) { + os << "uint16_t"; return; + } + else if (t.bits() <= 32) { + os << "uint32_t"; return; + } + else if (t.bits() <= 64) { + os << "uint64_t"; return; + } + else { + os << "uint64_t"; + LOG(WARNING) << "Casting type " << t << " to uint64_t"; + } + } else if (t.is_int()) { + if (t.bits() <= 8) { + os << "int8_t"; return; + } + else if (t.bits() <= 16) { + os << "int16_t"; return; + } + else if (t.bits() <= 32) { + os << "int32_t"; return; + } + else if (t.bits() <= 64) { + os << "int64_t"; return; + } + else { + os << "int64_t"; + LOG(WARNING) << "Casting type " << t << " to int64_t"; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to C type"; +} + +} +} \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.h b/tvm/src/codegen/codegen_rv64_ppac.h new file mode 100644 index 000000000..5e2003b7b --- /dev/null +++ b/tvm/src/codegen/codegen_rv64_ppac.h @@ -0,0 +1,22 @@ +/* + author Guyue Huang (gh424@cornell.edu) + */ + +#ifndef TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ +#define TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ + +#include +#include "./codegen_c.h" + +namespace TVM { +namespace codegen { + +class CodeGenRV64PPAC : public CodeGenC { + public: + void PrintType(Type t, std::ostream& os) override; +}; + +} +} + +#endif //TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ \ No newline at end of file From 8e12d35f22eab2442dd69f77d8af0762eddb07ab Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 7 Aug 2019 18:45:38 -0400 Subject: [PATCH 014/103] all --- Makefile.config | 3 + python/heterocl/tvm/target.py | 2 +- tvm/Makefile | 7 + tvm/opencl/aocl/codegen_aocl.cc | 0 tvm/opencl/codeanalys_openclc.cc | 921 ------------------ tvm/opencl/codegen_opencl.cc | 368 ------- tvm/opencl/sdaccel/codegen_sdaccel.cc | 0 tvm/src/codegen/opencl/build_opencl.cc | 20 + tvm/src/codegen/opencl/codegen_sdaccel.cc | 21 +- .../codegen/opencl/sdaccel/sdaccel_module.cc | 253 +++++ 10 files changed, 301 insertions(+), 1294 deletions(-) delete mode 100644 tvm/opencl/aocl/codegen_aocl.cc delete mode 100644 tvm/opencl/codeanalys_openclc.cc delete mode 100644 tvm/opencl/codegen_opencl.cc delete mode 100644 tvm/opencl/sdaccel/codegen_sdaccel.cc create mode 100644 tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc diff --git a/Makefile.config b/Makefile.config index 2060d201c..60d1cfd3e 100644 --- a/Makefile.config +++ b/Makefile.config @@ -12,6 +12,9 @@ CMAKE_OK = no # set whether to use vivado hls runtime USE_VIVADO_HLS = 1 +# set whether to use sdaccel opencl runtime +USE_SDACCEL_HLS = 1 + # Specify current directory level with respect to CLAY_ROOT ifndef LEVEL LEVEL := . diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 3df1564db..56d219dc8 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -50,7 +50,7 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'opencl', 'sdaccel', 'aocl'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'opencl', 'sdaccel', 'sdaccel_sw_emu', 'aocl'] def _merge_opts(opts, new_opts): """Helper function to merge options""" diff --git a/tvm/Makefile b/tvm/Makefile index 1a78cbe7c..d6f11ca12 100644 --- a/tvm/Makefile +++ b/tvm/Makefile @@ -126,6 +126,13 @@ else CFLAGS += -DTVM_OPENCL_RUNTIME=0 endif +ifeq ($(USE_SDACCEL_HLS), 1) + CFLAGS += -DOPENCL_SDACCEL_RUNTIME=1 +else + CFLAGS += -DOPENCL_SDACCEL_RUNTIME=0 +endif + + ifeq ($(USE_VIVADO_HLS), 1) CFLAGS += -DHCL_VHLS_RUNTIME=1 else diff --git a/tvm/opencl/aocl/codegen_aocl.cc b/tvm/opencl/aocl/codegen_aocl.cc deleted file mode 100644 index e69de29bb..000000000 diff --git a/tvm/opencl/codeanalys_openclc.cc b/tvm/opencl/codeanalys_openclc.cc deleted file mode 100644 index 66869a817..000000000 --- a/tvm/opencl/codeanalys_openclc.cc +++ /dev/null @@ -1,921 +0,0 @@ -#include -#include -#include -#include "./codeanalys_openclc.h" -#include "../codegen_common.h" -#include "../../arithmetic/compute_expr.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -void CodeAnalysOpenCLC::Init() { - ; -} - -void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { - alloc_storage_scope_.clear(); - handle_data_type_.clear(); - map_arg_type_.clear(); - CodeGenSourceBase::ClearFuncState(); -} -void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { - // Clear previous generated state. - this->InitFuncState(f); - - // Add to alloc buffer type. - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Record the arguments for analyzing the type - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - this->map_arg_type_[vid]; - } - int func_scope = this->BeginScope(); - VisitStmt(f->body); - this->EndScope(func_scope); -} - -str2tupleMap CodeAnalysOpenCLC::Finish() { - return this->map_arg_type_; -} - -void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) - VisitExpr(n, os); -} - -void CodeAnalysOpenCLC::PrintSSAAssign( - const std::string& target, const std::string& src, Type t) { - PrintType(t, stream); - stream << ' ' << target << " = "; - if (src.length() > 3 && - src[0] == '(' && src[src.length() - 1] == ')') { - stream << src.substr(1, src.length() - 2); - } else { - stream << src; - } - stream << ";\n"; -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetBufferRef( - Type t, const Variable* buffer, Expr index) { - std::ostringstream os; - std::string vid = GetVarID(buffer); - std::string scope; - if (alloc_storage_scope_.count(buffer)) { - scope = alloc_storage_scope_.at(buffer); - } - bool is_vol = volatile_buf_.count(buffer) != 0; - if (t.lanes() == 1) { - if (!HandleTypeMatch(buffer, t) || is_vol) { - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)" << vid << ')'; - } else { - os << vid; - } - os << '['; - PrintExpr(index, os); - os << ']'; - } else { - // Buffer declared as vector type. - // optimize for case where it is in register, - if (HandleTypeMatch(buffer, t) && !is_vol) { - // optimize for constant access - int offset; - if (arith::GetConstInt(index, &offset)) { - CHECK_EQ(offset % t.lanes(), 0) - << "Find unaligned vector load to a vector type"; - os << vid << '[' << (offset / t.lanes()) << ']'; - return os.str(); - } - } - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)("; - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << vid << " + "; - PrintExpr(index, os); - os << "))[0]"; - } - return os.str(); -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind) { - if (kind < intrinsic::kArrKindBound_) { - std::ostringstream os; - os << "(((TVMArray*)"; - this->PrintExpr(buffer, os); - os << ")"; - if (kind == intrinsic::kArrAddr) { - os << " + "; - this->PrintExpr(index, os); - os << ")"; - return os.str(); - } - os << '['; - this->PrintExpr(index, os); - os << "]."; - // other case: get fields. - switch (kind) { - case intrinsic::kArrData: os << "data"; break; - case intrinsic::kArrShape: os << "shape"; break; - case intrinsic::kArrStrides: os << "strides"; break; - case intrinsic::kArrNDim: os << "ndim"; break; - case intrinsic::kArrTypeCode: os << "dtype.code"; break; - case intrinsic::kArrTypeBits: os << "dtype.bits"; break; - case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; - case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; - case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; - case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; - default: os << "unknown_field_code_" << kind; - } - os << ')'; - return os.str(); - } else { - CHECK_LT(kind, intrinsic::kTVMValueKindBound_); - std::ostringstream os; - os << "(((TVMValue*)"; - this->PrintExpr(buffer, os); - os << ")[" << index << "]."; - if (t.is_handle()) { - os << "v_handle"; - } else if (t.is_float()) { - os << "v_float64"; - } else if (t.is_int()) { - os << "v_int64"; - } else { - os << t; - } - os << ")"; - return os.str(); - } -} - - -bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) return false; - return it->second == t; -} - -void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) { - handle_data_type_[buf_var] = t; - } else { - CHECK(it->second == t) - << "conflicting buf var type"; - } -} - -void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, - Type t, int i, - std::ostream& os) { // NOLINT(*) - os << vec << ".s" << std::hex << i << std::dec; -} - -void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, - Type t, int i, - const std::string& value) { - this->PrintIndent(); - stream << vec << ".s" << std::hex << i - << " = " << value << ";\n" << std::dec; -} - -std::string CodeAnalysOpenCLC::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - return GetBufferRef(t, buffer, base); -} - -void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - std::string ref = GetBufferRef(t, buffer, base); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; -} - -std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - return os.str(); -} - -void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { - LOG(FATAL) << "not implemented"; -} - -void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) -} - -void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) - CHECK_EQ(scope, "global"); -} - -std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) - std::ostringstream os; - PrintType(t, os); - return os.str(); -} - -void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - if (t.is_float()) { - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "int" << t.bits() << "_t"; return; - } - } - } - os << t; -} - - -inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == Int(32)) { - std::ostringstream temp; - temp << op->value; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == UInt(32)) { - std::ostringstream temp; - temp << op->value << "U"; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - switch (op->type.bits()) { - case 64: case 32: { - std::ostringstream temp; - temp << std::scientific << op->value; - if (op->type.bits() == 32) temp << 'f'; - p->MarkConst(temp.str()); - os << temp.str(); - break; - } - case 16: { - os << '('; - p->PrintType(op->type, os); - os << ')' << std::scientific <value << 'f'; - break; - } - default: os << op << "\n"; - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) - os << "\"" << op->value << "\""; -} - -template -inline void PrintBinaryExpr(const T* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - if (isalpha(opstr[0])) { - os << opstr << '('; - p->PrintExpr(op->a, os); - os << ", "; - p->PrintExpr(op->b, os); - os << ')'; - } else { - os << '('; - p->PrintExpr(op->a, os); - os << ' ' << opstr << ' '; - p->PrintExpr(op->b, os); - os << ')'; - } - } else { - p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); - } -} - -inline void PrintBinaryIntrinsitc(const Call* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - CHECK_EQ(op->args.size(), 2U); - os << '('; - p->PrintExpr(op->args[0], os); - os << opstr; - p->PrintExpr(op->args[1], os); - os << ')'; - } else { - p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); - } -} -void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) - std::stringstream value; - this->PrintExpr(op->value, value); - os << CastFromTo(value.str(), op->value.type(), op->type); -} -void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) - os << GetVarID(op); -} -void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "+", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "-", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "*", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "/", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "%", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "min", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "max", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "==", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "!=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "&&", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "||", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) - os << '!'; - PrintExpr(op->a, os); -} - -void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) - if (op->call_type == Call::Extern || - op->call_type == Call::PureExtern) { - os << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - this->PrintExpr(op->args[i], os); - if (i < op->args.size() - 1) { - os << ", "; - } - } - os << ")"; - } else if (op->is_intrinsic(Call::bitwise_and)) { - PrintBinaryIntrinsitc(op, " & ", os, this); - } else if (op->is_intrinsic(Call::bitwise_xor)) { - PrintBinaryIntrinsitc(op, " ^ ", os, this); - } else if (op->is_intrinsic(Call::bitwise_or)) { - PrintBinaryIntrinsitc(op, " | ", os, this); - } else if (op->is_intrinsic(Call::bitwise_not)) { - CHECK_EQ(op->args.size(), 1U); - os << "(~"; - this->PrintExpr(op->args[0], os); - os << ')'; - } else if (op->is_intrinsic(Call::shift_left)) { - PrintBinaryIntrinsitc(op, " << ", os, this); - } else if (op->is_intrinsic(Call::shift_right)) { - PrintBinaryIntrinsitc(op, " >> ", os, this); - } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintExpr(op->args[0], os); - os << " ? "; - PrintExpr(op->args[1], os); - os << " : "; - PrintExpr(op->args[2], os); - os << ")"; - } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { - const Load *l = op->args[0].as(); - CHECK(op->args.size() == 1 && l); - os << "(("; - this->PrintType(l->type.element_of(), os); - os << " *)" << this->GetVarID(l->buffer_var.get()) - << " + "; - this->PrintExpr(l->index, os); - os << ')'; - } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { - CHECK_EQ(op->args.size(), 3U); - os << GetStructRef( - op->type, op->args[0], op->args[1], - op->args[2].as()->value); - } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { - CHECK_EQ(op->args.size(), 1U); - os << "("; - this->PrintExpr(op->args[0], os); - os << " == NULL)"; - } else - os << op->name << "()"; -} - -void CodeAnalysOpenCLC::PrintVecBinaryOp( - const std::string& op, Type t, - Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) - if (isalpha(op[0])) { - os << op << "("; - this->PrintExpr(lhs, os); - os << ", "; - this->PrintExpr(rhs, os); - os << ")"; - } else { - os <<"("; - this->PrintExpr(lhs, os); - os << ' ' << op << ' '; - this->PrintExpr(rhs, os); - os << ")"; - } -} - -inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { - const Ramp* r = index.as(); - if (!r) return false; - if (!is_one(r->stride)) return false; - CHECK_EQ(r->lanes, lanes); - *base = r->base; - return true; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) - int lanes = op->type.lanes(); - // delcare type. - if (op->type.lanes() == 1) { - std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); - os << ref; - } else { - CHECK(is_one(op->predicate)) - << "predicated load is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { - std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); - os << ref; - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // load seperately. - std::string svalue = GetUniqueName("_"); - this->PrintIndent(); - this->PrintType(op->type, stream); - stream << ' ' << svalue << ";\n"; - std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string vid = GetVarID(op->buffer_var.get()); - Type elem_type = op->type.element_of(); - for (int i = 0; i < lanes; ++i) { - std::ostringstream value_temp; - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - value_temp << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, value_temp); - value_temp << ' '; - } - } - PrintType(elem_type, value_temp); - value_temp << "*)" << vid << ')'; - } else { - value_temp << vid; - } - value_temp << '['; - PrintVecElemLoad(sindex, op->index.type(), i, value_temp); - value_temp << ']'; - PrintVecElemStore(svalue, op->type, i, value_temp.str()); - } - os << svalue; - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { - Type t = op->value.type(); - if (t.lanes() == 1) { - std::string value = this->PrintExpr(op->value); - std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; - } else { - CHECK(is_one(op->predicate)) - << "Predicated store is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, t.lanes(), &base)) { - std::string value = this->PrintExpr(op->value); - this->PrintVecStore(op->buffer_var.get(), t, base, value); - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // store elements seperately - std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); - std::string vid = GetVarID(op->buffer_var.get()); - for (int i = 0; i < t.lanes(); ++i) { - this->PrintIndent(); - Type elem_type = t.element_of(); - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - stream << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, stream); - stream << ' '; - } - } - PrintType(elem_type, stream); - stream << "*)" << vid << ')'; - } else { - stream << vid; - } - stream << '['; - PrintVecElemLoad(index, op->index.type(), i, stream); - stream << "] = "; - PrintVecElemLoad(value, op->value.type(), i, stream); - stream << ";\n"; - } - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) - std::string value = PrintExpr(op->value); - CHECK(!var_idmap_.count(op->var.get())); - var_idmap_[op->var.get()] = value; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) - // constraint of current logic - CHECK_EQ(op->base.type(), Int(32)); - os << "((int" << op->lanes << ")("; - for (int i = 0; i < op->lanes; i++) { - os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; - if (i != op->lanes - 1) - os << ", "; - } - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Broadcast: not supported "; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->condition, os); - os << " ? "; - PrintExpr(op->true_value, os); - os << " : "; - PrintExpr(op->false_value, os); - os << ")"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " & (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. a' = SHR a for Idx_R bits - // 2. mask: 1.(length).1 - // (1 << (L - R + 1)) - 1 - // 3. a' & mask - - os << "(("; - PrintExpr(op->a, os); - os << " >> "; - PrintExpr(op->index_right, os); - os << ") & ((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " | (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. mask: 0.(Idx L).01..10.(Idx R).0 - // ((1 << (L - R + 1)) - 1) << R - // 2. a & mask - - os << "("; - PrintExpr(op->a, os); - os << " & (((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1) << "; - PrintExpr(op->index_right, os); - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "KernelExpr is not yet support"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { - // TODO comaniac - //std::vector vec_var = GetNodesByType(op->value); - - std::string arg_vid = "unknown"; - std::string str = PrintExpr(op->value); - if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { - size_t pos_arg = str.find("arg"); - size_t pos_data = str.find("data"); - arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); - } - else if (std::regex_match(str, std::regex("arg(.+)"))) - arg_vid = str; - - std::string vid = AllocVarID(op->var.get()); - if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { - if ("unknown" != arg_vid) - LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; - } else { - Type type = op->var.type(); - if (op->var.type() == Handle() && - handle_data_type_.count(op->var.get())) - type = handle_data_type_.at(op->var.get()); - this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); - } - VisitStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { - CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); - if (op->new_expr.defined()) { - // Prefer global static allocation for the program - CHECK_EQ(op->free_function, "nop"); - std::string new_data = PrintExpr(op->new_expr); - this->PrintIndent(); - PrintType(op->type, stream); - stream << "* "<< vid << '=' << new_data << ";\n"; - } else { - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - stream << ' '; - PrintType(op->type, stream); - stream << ' '<< vid << '[' - << constant_size << "];\n"; - } - RegisterHandleType(op->buffer_var.get(), op->type); - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { - if (op->attr_key == ir::attr::thread_extent) { - IterVar iv(op->node.node_); - if (iv->thread_tag.length() != 0) { - if (!var_idmap_.count(iv->var.get())) { - BindThreadIndex(iv); - } - } - } else if (op->attr_key == ir::attr::storage_scope) { - const Variable* v = op->node.as(); - CHECK(v); - alloc_storage_scope_[v] = op->value.as()->value; - } else if (op->attr_key == ir::attr::volatile_scope) { - const Variable* v = op->node.as(); - CHECK(v); - volatile_buf_.insert(v); - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (op->message.as()) { - // GLOG style check - stream << "CHECK(" << cond << ") << \"" - << op->message.as()->value << "\";\n"; - } else { - stream << "assert(" << cond << ");\n"; - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const For* op) { - std::string extent = PrintExpr(op->extent); - PrintIndent(); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { - PrintStmt(op->first); - if (op->rest.defined()) PrintStmt(op->rest); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { - if (is_const(op->value)) return; - const Call* call = op->value.as(); - if (call) { - if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { - this->PrintStorageSync(call); return; - } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { - CHECK_EQ(call->args.size(), 4); - std::string value = PrintExpr(call->args[3]); - std::string ref = GetStructRef( - call->args[3].type(), - call->args[0], - call->args[1], - call->args[2].as()->value); - this->PrintIndent(); - this->stream << ref << " = " << value << ";\n"; - return; - } - } - std::string vid = this->PrintExpr(op->value); - this->PrintIndent(); - this->stream << "(void)" << vid << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { - PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { - LOG(FATAL) << "KernelDef is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { - LOG(FATAL) << "KernelStmt is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { - this->stream << "return "; - PrintExpr(op->value); - this->stream << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { - // TODO: Check if the break statement is used correctly - this->stream << "break;\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const While *op) { - std::string condition = PrintExpr(op->condition); - PrintIndent(); - stream << "while (" << condition << ") {\n"; - int while_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(while_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Reuse *op) { - LOG(FATAL) << "KernelDef is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} - -void CodeAnalysOpenCLC::VisitStmt_(const Stencil *op) { - PrintStmt(op->body); -} - -} // namespace codegen -} // namespace TVM diff --git a/tvm/opencl/codegen_opencl.cc b/tvm/opencl/codegen_opencl.cc deleted file mode 100644 index 57da77896..000000000 --- a/tvm/opencl/codegen_opencl.cc +++ /dev/null @@ -1,368 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_opencl.h" -# include "./codeanalys_openclc.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -CodeGenOpenCL::CodeGenOpenCL() { - restrict_keyword_ = "restrict"; -} - -void CodeGenOpenCL::InitFuncState(LoweredFunc f) { - CodeGenC::InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } -} - - -// void CodeGenOpenCL::AddFunction(LoweredFunc f) { -// this->stream << "__kernel "; -// CodeGenC::AddFunction(f); -// } -// void CodeGenOpenCL::AddFunction(LoweredFunc f, -// str2tupleMap map_arg_type) { -// // Write header files -// // TODO: Insert header files here -// // Clear previous generated state -// this->InitFuncState(f); -// // Register alloc buffer type -// for (const auto & kv : f->handle_data_type) { -// RegisterHandleType(kv.first.get(), kv.second.type()); -// } -// // Write entry function name -// this->stream << "void " << f->name << "("; -// // Write arguments -// for (size_t i = 0; i < f->args.size(); ++i) { -// Var v = f->args[i]; -// std::string vid = AllocVarID(v.get()); -// if (i != 0) this->stream << ", "; -// if (map_arg_type.find(vid) == map_arg_type.end()) { -// LOG(WARNING) << vid << " type not found\n"; -// PrintType(v.type(), this->stream); -// this->stream << ' ' << vid; -// } -// else { -// auto arg = map_arg_type[vid]; -// PrintType(std::get<1>(arg), this->stream); -// this->stream << ' ' << std::get<0>(arg); -// const BufferNode* buf = f->api_args[i].as(); -// if (v.type().is_handle() && buf) { -// var_shape_map_[buf->data.get()] = buf->shape; -// for (size_t i = 0; i < buf->shape.size(); i++) { -// this->stream << '['; -// this->PrintExpr(buf->shape[i], this->stream); -// this->stream << ']'; -// } -// } -// // this->stream << "*"; TODO: create an option for this -// } -// } -// stream << ") {\n"; -// int func_scope = this->BeginScope(); -// range_ = CollectIterRange(f->body); -// this->PrintStmt(f->body); -// this->EndScope(func_scope); -// this->PrintIndent(); -// this->stream << "}\n\n"; -// } - - -// void CodeGenOpenCL::AddFunction(LoweredFunc f, -// str2tupleMap map_arg_type) { -// // Clear previous generated state -// this->InitFuncState(f); - -// // // Skip the first underscore, so SSA variable starts from _1 -// // GetUniqueName("_"); - -// // // Register alloc buffer type -// // for (const auto & kv : f->handle_data_type) { -// // RegisterHandleType(kv.first.get(), kv.second.type()); -// // } - -// // // Write header files -// // this->stream << "#include \n"; -// // this->stream << "#include \n"; -// // this->stream << "#include \n"; - -// // // Write entry function name -// // this->stream << "#pragma ACCEL kernel\n"; -// this->stream << "void " << f->name << "("; - -// // Write arguments -// for (size_t i = 0; i < f->args.size(); ++i) { -// Var v = f->args[i]; -// std::string vid = AllocVarID(v.get()); -// if (i != 0) this->stream << ", "; -// if (map_arg_type.find(vid) == map_arg_type.end()) { -// LOG(WARNING) << vid << " type not found\n"; -// PrintType(v.type(), this->stream); -// this->stream << ' ' << vid; -// } -// else { -// auto arg = map_arg_type[vid]; -// PrintType(std::get<1>(arg), this->stream); -// if (v.type().is_handle()) -// this->stream << "*"; -// this->stream << ' ' << std::get<0>(arg); -// } -// } -// stream << ") {\n"; -// int func_scope = this->BeginScope(); -// this->PrintStmt(f->body); -// this->EndScope(func_scope); -// this->PrintIndent(); -// this->stream << "}\n\n"; -// CodeGenC::AddFunction(f); -// } - - -void CodeGenOpenCL::AddFunction(LoweredFunc f) { - this->stream << "__kernel "; - CodeGenC::AddFunction(f); -} - - - -std::string CodeGenOpenCL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - -void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); - if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - os << "void*"; return; - } - if ( t== Bool() ) { - os << "bool"; return; - } - bool fail = false; - if (t.is_float()) { - switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: - os << "float"; - break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << 'u'; - } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; - } - switch (t.bits()) { - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 1: os << "int"; break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } - LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; -} - -void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenOpenCL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenOpenCL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenOpenCL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - -void CodeGenOpenCL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - os << "__global"; - } else if (scope == "shared") { - os << "__local"; - } -} - -std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} - -void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) - if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintType(op->args[2].type(), os); - os << ")"; - } - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - - -void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) - if (std::isinf(op->value)) { - if ( op->value < 0) { - os << "-"; - } - os << "INFINITY"; - } else if (std::isnan(op->value)) { - os << "NAN"; - } else { - CodeGenC::VisitExpr_(op, os); - } -} - -void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) - os << "("; - PrintType(op->true_value.type(), os); - os << ")"; - CodeGenC::VisitExpr_(op, os); -} - -} // namespace codegen -} // namespace TVM diff --git a/tvm/opencl/sdaccel/codegen_sdaccel.cc b/tvm/opencl/sdaccel/codegen_sdaccel.cc deleted file mode 100644 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index f65f27604..5992a7175 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -14,15 +14,35 @@ # include "./codegen_aocl.h" # include "./codeanalys_openclc.h" # include "../build_common.h" +# include "./sdaccel/sdaccel_module.h" namespace TVM { namespace codegen { // #if OPENCL_SDACCEL_RUNTIME +// runtime::Module BuildSDAccelSwEmu(Array funcs) { +// CodeAnalysOpenCLC ca; +// CodeGenSDACCEL cg; +// for (LoweredFunc f : funcs) { +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); +// cg.AddFunction(f, map_arg_type); +// } +// std::string code = cg.Finish(); + +// return runtime::CreateSDAccelModule(funcs[0], code); +// } +// TVM_REGISTER_API("codegen.build_sdaccel_sw_emu") +// .set_body([](TVMArgs args, TVMRetValue* rv) { +// *rv = BuildSDAccelSwEmu(args[0]); +// }); // #endif + + // #if OPENCL_AOCL_RUNTIME // #endif diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 48d9afd4c..95e1e9b87 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -64,7 +64,7 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, } // Write head files - this->stream << "# pragma ACCEL kernel\n"; + // stream.open("host.cpp"); this->stream << "# pragma once\n"; this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; @@ -78,7 +78,9 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, this->stream << "# include \n\n"; // Write entry function name - this->stream << "__kernel " << f->name << "("; + // this->stream << "__kernel " << f->name << "("; + this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; + this->stream << f->name << "("; @@ -96,6 +98,7 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, else { auto arg = map_arg_type[vid]; this->stream << "__global "; + // this->stream << "global "; PrintType(std::get<1>(arg), this->stream); if (v.type().is_handle()) this->stream << "*"; @@ -301,15 +304,25 @@ void CodeGenSDACCEL::PrintStorageSync(const Call* op) { } } +// void CodeGenSDACCEL::PrintStorageScope( +// const std::string& scope, std::ostream& os) { // NOLINT(*) +// if (scope == "global") { +// os << "__global "; +// } else if (scope == "shared") { +// os << "__local "; +// } +// } + void CodeGenSDACCEL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global") { - os << "__global "; + os << "global "; } else if (scope == "shared") { - os << "__local "; + os << "local "; } } + std::string CodeGenSDACCEL::CastFromTo(std::string value, Type from, Type target) { if (from == target) return value; std::ostringstream os; diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc new file mode 100644 index 000000000..7a89a0dc1 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc @@ -0,0 +1,253 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include "./sdaccel_module.h" +# include +# include +# include +# include +# include + +namespace TVM { +namespace runtime { + +namespace { + +void PrintIndent(std::ofstream& stream, int indent) { + for (int i = 0;i < indent; i++ ) { + stream << ' '; + } +} + +inline size_t GetTypeSize(TVMType t) { + size_t byte = (t.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + return byte; +} + +inline size_t GetDataSize(TVMArray* arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= arr->shape[i]; + } + size_t byte = (arr->dtype.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + size *= (byte * 8 * arr->dtype.lanes + 7) / 8; + return size; +} + +inline TVMType Type2TVMType(Type t) { + TVMType tt; + if (t.is_int()) tt.code = kDLInt; + else if (t.is_uint()) tt.code = kDLUInt; + else if (t.is_float()) tt.code = kDLFloat; + else LOG(FATAL) << "Unacceptable type: " << t; + tt.bits = static_cast(t.bits()); + tt.fracs = static_cast(t.fracs()); + return tt; +} + +inline std::string Type2Str(TVMType t) { + +} + +inline std::string Tpye2ExtStr(TVMType t) { + +} + + + + + +inline std::string Type2Byte(TVMType t) { + std::string str = ""; + if (t.code == kDLFloat) { + str += "float"; + } else if (t.code == kDLInt || t.code == kDLUInt) { + if (t.code == kDLUInt) str += "u"; + str += "int"; + if (t.bits <= 8) str += "8"; + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + str += "_t"; + } + return str; +} + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + arg_sizes.push_back(GetDataSize(arr)); + arg_types.push_back(arr->dtype); + } else { + const Variable* var = func->api_args[i].as(); + TVMType t = Type2TVMType(var->type); + arg_sizes.push_back(GetTypeSize(t)); + arg_types.push_back(t); + } + } +} + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} + +// copy values from the shared mem to local mem +void PrintCopy() + + + + +// copy values from local mem back to shared mem +void PrintCopyBack() + + + +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string test_file) { + int indent = 0; + std::ofstream stream; + stream.open("host.cpp"); + stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# pragram once\n"; + stream << "# define LENGTH (1024)\n"; + stream << "# define NUM_WORKGROUPS (1)\n"; + stream << "# define WORKGROUP_SIZE (16)\n"; + stream << test_file; + stream << "int main(void) { \n"; + indent += 2; + + for ( int i = 0;i < args.size(); i++ ) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << Type2Byte(arg_types)[i] << "*"; + PrintIndent(stream, indent); + + + } + } + + // call the function + PrintIndent(stream, indent) + + +} +} // namespace + +class SDAccelModuleNode final : public ModuleNode { + public: + SDAccelModuleNode(LoweredFunc func, std::string test_file) + : func_(func), test_file_(test_file) {} + + const char* type_key() const { + return "sdaccel_sw_emu"; + } + + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final { + return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) + LOG(FATAL) << "The function should take in " << func_->args.size() + << " inputs but get " << args.size(); + std::vector arg_sizes; + std::vector arg_types; + std::vector shmids; + CollectArgInfo(args, func_, arg_sizes, arg_types); + GenSharedMem(args, shmids, arg_sizes); + GenHostCode(args, shmids, arg_types, func_, test_file_); + // TODO: find a better way to do the following + LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; + LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; + system("make -f sdaccel.mk run_cpu_em"); + // system("./out"); + LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; + system("make -f sdaccel.mk clean"); + FreeSharedMem(args, shmids, arg_sizes); + }); + } + + private: + LoweredFunc func_; + std::string test_file_; +}; + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code) { + + std::shared_ptr n = + std::make_shared(func, code); + + return Module(n); +} + + +} // namespace runtime +} // namespace TVM \ No newline at end of file From 416ca4314350bfbb7042206db775dd2b6c3bb6e9 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Fri, 9 Aug 2019 15:02:05 -0400 Subject: [PATCH 015/103] remove tvm check code from kernel --- samples/ppac/gemm/gemm_ppac.py | 12 +- samples/ppac/gemm/ppac_common.py | 2 +- tvm/src/codegen/build_ppac.cc | 27 ----- tvm/src/codegen/codegen_rv64_ppac.cc | 158 +++++++++++++++++++++++++-- tvm/src/codegen/codegen_rv64_ppac.h | 10 ++ 5 files changed, 168 insertions(+), 41 deletions(-) delete mode 100644 tvm/src/codegen/build_ppac.cc diff --git a/samples/ppac/gemm/gemm_ppac.py b/samples/ppac/gemm/gemm_ppac.py index 729773205..664ecf01d 100644 --- a/samples/ppac/gemm/gemm_ppac.py +++ b/samples/ppac/gemm/gemm_ppac.py @@ -2,9 +2,7 @@ author: Guyue Huang (gh424@cornell.edu) General Matrix Multiplication -target : riscv_ppac - -NOT FINISHED , WILL FAIL AT 'BUILD' +target : rv64_ppac """ """ modified on Aug 1 @@ -29,15 +27,17 @@ def kernel(matrix_1, matrix_2): name="out_matrix") s = hcl.create_schedule([matrix_1, matrix_2], kernel) - f = hcl.build(s, target=target) + f = hcl.build(s, target=target, name='gemm') return f -dtype = hcl.UInt(1) +dtype = hcl.UInt(8) hcl.init(dtype) m, n, k = 4, 4, 64 f = gemm(m, n, k, dtype, target="rv64_ppac") print(f) +""" with open("csrc.cc", "w") as ofile: - ofile.write(str(f)) + ofile.write('/*CodeGenC backend*/\n'+str(f)) ofile.close() +""" \ No newline at end of file diff --git a/samples/ppac/gemm/ppac_common.py b/samples/ppac/gemm/ppac_common.py index 5a76843cb..c5d17c9ac 100644 --- a/samples/ppac/gemm/ppac_common.py +++ b/samples/ppac/gemm/ppac_common.py @@ -21,7 +21,7 @@ def bvec2x(vec): vec_int = np.append(vec_int, np.zeros((4-(len%4)))) len = len + (4 - len % 4) x = '' - ct = len/4 + ct = int(len/4) for c in range(ct, 0, -1): x = x + n2c[(np.dot( vec[4*c-4:4*c], base))] return x diff --git a/tvm/src/codegen/build_ppac.cc b/tvm/src/codegen/build_ppac.cc deleted file mode 100644 index 93ed51be8..000000000 --- a/tvm/src/codegen/build_ppac.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* - author Guyue Huang (gh424@cornell.edu) - */ - -#include "./codegen_rv64_ppac.h" -#include "./build_common.h" - -namespace TVM{ -namespace codegen{ - -std::string BuildRV64PPAC(Array funcs) { - CodeGenRV64PPAC cg; - for (LoweredFunc f: funcs) { - cg.AddFunction(f); - } - std::string code = cg.Finish(); - LOG(WARNING) << "RV64_PPAC backend doesn't yet have runtime, return kernel code"; - return code; -} - -TVM_REGISTER_API("codegen.build_rv64_ppac") -.set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = BuildRV64PPAC(args[0]); - }); - -} -} \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.cc b/tvm/src/codegen/codegen_rv64_ppac.cc index 5882a0280..697bbbda9 100644 --- a/tvm/src/codegen/codegen_rv64_ppac.cc +++ b/tvm/src/codegen/codegen_rv64_ppac.cc @@ -2,16 +2,129 @@ author Guyue Huang (gh424@cornell.edu) */ +#include +#include #include #include +#include #include #include -#include #include "./codegen_rv64_ppac.h" - +#include "./build_common.h" + namespace TVM { namespace codegen { +void CodeGenRV64PPAC::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + + this->InitFuncState(f); + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + // Write entry function name + this->stream << "void " << f->name << "("; + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + PrintType(std::get<1>(arg), this->stream); + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + //const BufferNode* buf = f->api_args[i].as(); + /* + if (v.type().is_handle() && buf) { + var_shape_map_[buf->data.get()] = buf->shape; + for (size_t i = 0; i < buf->shape.size(); i++) { + this->stream << '['; + this->PrintExpr(buf->shape[i], this->stream); + this->stream << ']'; + } + }*/ + + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + //range_ = CollectIterRange(f->body); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + this->stream << "}\n\n"; +} + +/* +std::string CodeGenRV64PPAC::GetBufferRef(Type t, const Variable* buffer, Expr index) { + std::ostringstream os; + std::string vid = GetVarID(buffer); + if (t.lanes() == 1) { + bool is_scalar = (buf_length_map_.count(buffer) == 1 && + buf_length_map_[buffer] == 1); + if (is_scalar) { + os << vid; + } else { + os << vid; + //std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); + + os << '['; + PrintExpr(index, os); + os << ']'; + + } + } + return os.str(); +}*/ + +void CodeGenRV64PPAC::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + +void CodeGenRV64PPAC::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { CHECK_EQ(t.lanes(), 1) << "do not yet support vector types"; @@ -26,6 +139,36 @@ void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { os << "double"; return; } } else if (t.is_uint()) { + if (t.bits() == 1) { + os << "int"; return; + } else if (t.bits()<=32) { + os << "uint32_t"; return; + } else if (t.bits() <= 64) { + os << "uint64_t"; return; + } + /* + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + }*/ + } else if (t.is_int()) { + if (t.bits() == 1) { + os << "int"; return; + } else if (t.bits()<=32) { + os << "int32_t"; return; + } else if (t.bits() <= 64) { + os << "int64_t"; return; + } + /* + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "int" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + }*/ + } else if (t.is_ufixed() && t.fracs()==0 ) { if (t.bits() <= 8) { os << "uint8_t"; return; } @@ -40,9 +183,9 @@ void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { } else { os << "uint64_t"; - LOG(WARNING) << "Casting type " << t << " to uint64_t"; + LOG(WARNING) << "Casting type " << t << " to int64_t"; } - } else if (t.is_int()) { + } else if (t.is_fixed() && t.fracs()==0 ) { if (t.bits() <= 8) { os << "int8_t"; return; } @@ -60,8 +203,9 @@ void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { LOG(WARNING) << "Casting type " << t << " to int64_t"; } } - LOG(FATAL) << "Cannot convert type " << t << " to C type"; + os << t; + //LOG(FATAL) << "Cannot convert type " << t << " to C type"; } -} -} \ No newline at end of file +} //codegen +} //TVM \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.h b/tvm/src/codegen/codegen_rv64_ppac.h index 5e2003b7b..55a51e694 100644 --- a/tvm/src/codegen/codegen_rv64_ppac.h +++ b/tvm/src/codegen/codegen_rv64_ppac.h @@ -6,6 +6,8 @@ #define TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ #include +#include +#include "./merlinc/codeanalys_merlinc.h" #include "./codegen_c.h" namespace TVM { @@ -13,7 +15,15 @@ namespace codegen { class CodeGenRV64PPAC : public CodeGenC { public: + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); void PrintType(Type t, std::ostream& os) override; + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const IfThenElse* op) override; + //void VisitStmt_(const Allocate* op) override; + //std::map > var_shape_map_; + + protected: + //std::string GetBufferRef(Type t, const Variable* buffer, Expr index); }; } From e9a0a1ce9ca5ffe86432d013e321880190c52f17 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 14 Aug 2019 11:26:25 -0400 Subject: [PATCH 016/103] opencl-backend --- python/heterocl/tvm/target.py | 7 +++++ tvm/src/codegen/opencl/build_opencl.cc | 25 ++++++++++++++++++ tvm/src/codegen/opencl/codegen_sdaccel.cc | 31 +++++++++++------------ 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 56d219dc8..6e97a28e6 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -1,3 +1,10 @@ +''' +@Description: In User Settings Edit +@Author: your name +@Date: 2019-07-25 17:49:16 +@LastEditTime: 2019-08-14 11:15:38 +@LastEditors: Please set LastEditors +''' """Target management API of TVM. TVM's target string is in fomat `` [-option=value]...``. diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 5992a7175..b05883924 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -1,3 +1,11 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-25 23:25:00 + * @LastEditTime: 2019-08-14 11:10:00 + * @LastEditors: Please set LastEditors + */ + /* Yang.Bai yb269@cornell.edu @@ -165,9 +173,15 @@ std::string BuildSDACCEL(Array funcs) { if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { code = (*f)(code).operator std::string(); } + LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; return code; } + +// runtime::Module BuildSDACCELXCLBIN(Array funcs) + + + // codegen for OpenCL // std::string BuildOpenCL(Array funcs) { // using TVM::runtime::Registry; @@ -215,6 +229,17 @@ TVM_REGISTER_API("codegen.build_sdaccel") * rv = BuildSDACCEL(args[0]); }); +TVM_REGISTER_API("codegen.build_aocl") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildSDACCEL(args[0]); + }); + +// For runtime +// TVM_REGISTER_API("codegen.build_sdaccel_xclbin") +// .set_body([]( TVMArgs args, TVMRetValue * rv ) { +// * rv = BuildSDACCEL(args[0]); +// }); + // TVM_REGISTER_API("codegen.build_opencl") // .set_body([]( TVMArgs args, TVMRetValue * rv ) { diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 95e1e9b87..235b06395 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -1,3 +1,4 @@ + /* Yang.Bai yb269@cornell.edu @@ -65,25 +66,23 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, // Write head files // stream.open("host.cpp"); - this->stream << "# pragma once\n"; - this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - this->stream << "# include \n"; - this->stream << "# include \n"; - this->stream << "# include \n"; - this->stream << "# include \n"; - this->stream << "# include \n"; - this->stream << "# include \n\n"; + // this->stream << "# pragma once\n"; + // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n\n"; // Write entry function name // this->stream << "__kernel " << f->name << "("; - this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; - this->stream << f->name << "("; - - - + // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; + // this->stream << f->name << "("; + this->stream << "__kernel " << "void " << f->name << "("; // Write arguments for (size_t i = 0; i < f->args.size(); ++i) { From 5695f2c2dbda2b8be18728520237b5edb93f8197 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 14 Aug 2019 11:37:52 -0400 Subject: [PATCH 017/103] all --- tvm/src/codegen/opencl/build_opencl.cc | 2 +- tvm/src/codegen/opencl/codeanalys_openclc.cc | 7 ++ tvm/src/codegen/opencl/codegen_aocl.cc | 7 ++ tvm/src/codegen/opencl/codegen_sdaccel.cc | 7 ++ .../codegen/opencl/sdaccel/sdaccel_module.cc | 89 ++++++++++++++++++- 5 files changed, 108 insertions(+), 4 deletions(-) diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index b05883924..da937addc 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -2,7 +2,7 @@ * @Description: In User Settings Edit * @Author: your name * @Date: 2019-07-25 23:25:00 - * @LastEditTime: 2019-08-14 11:10:00 + * @LastEditTime: 2019-08-14 11:37:38 * @LastEditors: Please set LastEditors */ diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc index 030453a94..8515fde47 100644 --- a/tvm/src/codegen/opencl/codeanalys_openclc.cc +++ b/tvm/src/codegen/opencl/codeanalys_openclc.cc @@ -1,3 +1,10 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-29 17:29:40 + * @LastEditTime: 2019-08-14 11:37:37 + * @LastEditors: Please set LastEditors + */ /*! * Copyright (c) 2017 by Contributors * \file tvm/src/codegen/hlsc/codegen_hlsc.cc diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 6763aa7c3..b3823b4bd 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -1,3 +1,10 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-25 21:24:18 + * @LastEditTime: 2019-07-25 21:24:18 + * @LastEditors: your name + */ /* Yang.Bai yb269@cornell.edu diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 235b06395..abb39c3e0 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -1,3 +1,10 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-25 21:24:18 + * @LastEditTime: 2019-07-25 21:24:18 + * @LastEditors: your name + */ /* Yang.Bai diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc index 7a89a0dc1..71d1a98cc 100644 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc @@ -1,3 +1,10 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-30 15:15:28 + * @LastEditTime: 2019-07-30 15:15:28 + * @LastEditors: your name + */ /* Yang.Bai yb269@cornell.edu @@ -156,11 +163,12 @@ void GenHostCode(TVMArgs& args, int indent = 0; std::ofstream stream; stream.open("host.cpp"); + + // write the header files and macro commmands. stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - stream << "# include \n"; stream << "# include \n"; stream << "# include \n"; @@ -181,6 +189,65 @@ void GenHostCode(TVMArgs& args, stream << "int main(void) { \n"; indent += 2; + + // get the platform and devices + stream << "#if define(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; + PrintIndent(stream, indent); + stream << "# define STR_VALUE(arg) #arg\n"; + PrintIndent(stream, indent); + stream << "# define GET_STRING(name) STR_VALUE(name)\n"; + PrintIndent(stream, indent); + stream << "# define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n" + stream << "#endif"; + + + // get the xclbin filename . + stream << "char * xclbinFilename = argv[1]\n"; + stream << "size_t \n"; + + // source memories + + + // create the test data and goldn data locally + + + + + // OpenCL HOST CODE AREA START + // get First Platform + stream << "std::vector platforms;\n"; + stream << "cl::Platform::get(&platforms)\n;"; + stream << "cl::Platform platform = platform[0];\n"; + stream << "std::cout << "" " + + // get accelerator devices and select 1st such device + + // create context and command queue for selected device + + + // load xcl binary into the buffer + + + // creat program from binary file + + // create kernel + + // create buffers inside device + + // copy input data to device buffer from host memory + + // run the kernel + + // copy device result data to host memory + // OpenCL HOST CODE AREA END + + + + // compare the results of the kernel to the simulation + + + + for ( int i = 0;i < args.size(); i++ ) { if (args[i].type_code() == kArrayHandle) { // read from the shared memory @@ -194,9 +261,25 @@ void GenHostCode(TVMArgs& args, } // call the function - PrintIndent(stream, indent) - + PrintIndent(stream, indent); + stream << func->name << "("; + for (int i = 0;i < args.size();i++) { + if (i != args.size()-1) { + stream << ", "; + } + } + stream << ");\n"; + // copy to shared mem + for (int i = 0;i < args.size();i++ ) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + PrintIndent(stream, indent); + } + } + stream << "}\n"; + stream.close(); } } // namespace From 568555dd97403fefd19238466be4f0af3b492c36 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Thu, 15 Aug 2019 14:06:23 -0400 Subject: [PATCH 018/103] fix ppac module build --- tvm/src/codegen/build_rv64_ppac.cc | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tvm/src/codegen/build_rv64_ppac.cc diff --git a/tvm/src/codegen/build_rv64_ppac.cc b/tvm/src/codegen/build_rv64_ppac.cc new file mode 100644 index 000000000..eef52c4d7 --- /dev/null +++ b/tvm/src/codegen/build_rv64_ppac.cc @@ -0,0 +1,31 @@ +/* + author Guyue Huang (gh424@cornell.edu) + */ + +#include "./codegen_rv64_ppac.h" +#include "./build_common.h" + +namespace TVM{ +namespace codegen{ + +std::string BuildRV64PPAC(Array funcs) { + CodeAnalysMerlinC ca; + CodeGenRV64PPAC cg; + for (LoweredFunc f: funcs) { + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); + cg.AddFunction(f, map_arg_type); + } + std::string code = cg.Finish(); + LOG(WARNING) << "RV64_PPAC backend doesn't yet have runtime, return kernel code"; + return code; +} + +TVM_REGISTER_API("codegen.build_rv64_ppac") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildRV64PPAC(args[0]); + }); + +} +} \ No newline at end of file From 2518aeb58014fea0f35003ab20b7812083ee16d3 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Thu, 15 Aug 2019 16:47:09 -0400 Subject: [PATCH 019/103] support ppac MVPb pragma --- python/heterocl/tvm/schedule.py | 14 +- python/heterocl/tvm/stmt.py | 1 + tvm/HalideIR/src/ir/Expr.h | 3 +- tvm/HalideIR/src/ir/IRPrinter.cpp | 3 + tvm/include/tvm/expr.h | 4 +- tvm/include/tvm/schedule.h | 5 +- tvm/src/api/api_lang.cc | 2 +- tvm/src/codegen/build_rv64_ppac.cc | 10 +- tvm/src/codegen/codegen_rv64_ppac.cc | 186 ++++++++++++-------------- tvm/src/codegen/codegen_rv64_ppac.h | 4 +- tvm/src/op/op_util.cc | 1 + tvm/src/schedule/compute_primitive.cc | 1 + tvm/src/schedule/schedule_lang.cc | 11 +- 13 files changed, 130 insertions(+), 115 deletions(-) diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 21905b443..458e8e1b9 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -612,7 +612,7 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) - def pragma(self, var, pragma_type): + def pragma(self, var, pragma_type, annotate_key=None, annotate_value=None): """Annotate the iteration with pragma This will translate to a pragma_scope surrounding @@ -626,6 +626,13 @@ def pragma(self, var, pragma_type): pragma_type : str The pragma string to be annotated + + annotate_key : str (optional) + The attribute key to be annotated + + annotate_value : Expr (optional) + The attribute value to be annotated + Note ---- @@ -655,9 +662,12 @@ def pragma(self, var, pragma_type): Hint parallel loop to execute in strided pattern. :code:`for (int i = task_id; i < end; i += num_task)` + + - **PPAC_MVPb_func** + PPAC backend binary matrix-vector product kernel function """ - _api_internal._StagePragma(self, var, pragma_type) + _api_internal._StagePragma(self, var, pragma_type, annotate_key, annotate_value) def prefetch(self, tensor, var, offset): """Prefetch the specified variable diff --git a/python/heterocl/tvm/stmt.py b/python/heterocl/tvm/stmt.py index 4db84970f..d0f281797 100644 --- a/python/heterocl/tvm/stmt.py +++ b/python/heterocl/tvm/stmt.py @@ -38,6 +38,7 @@ class For(Stmt): Vectorized = 2 Unrolled = 3 Pipelined = 4 + PPACFuncLoop = 5 @register_node class Store(Stmt): diff --git a/tvm/HalideIR/src/ir/Expr.h b/tvm/HalideIR/src/ir/Expr.h index b78a466ed..850a0800e 100644 --- a/tvm/HalideIR/src/ir/Expr.h +++ b/tvm/HalideIR/src/ir/Expr.h @@ -292,7 +292,8 @@ enum class ForType : int { Parallel = 1, Vectorized = 2, Unrolled = 3, - Pipelined = 4 + Pipelined = 4, + PPACFuncLoop = 5 }; /** An enum describing the partition type */ diff --git a/tvm/HalideIR/src/ir/IRPrinter.cpp b/tvm/HalideIR/src/ir/IRPrinter.cpp index 6a3a5d651..ac5fe6f52 100644 --- a/tvm/HalideIR/src/ir/IRPrinter.cpp +++ b/tvm/HalideIR/src/ir/IRPrinter.cpp @@ -67,6 +67,9 @@ ostream &operator<<(ostream &out, const ForType &type) { case ForType::Pipelined: out << "pipelined"; break; + case ForType::PPACFuncLoop: + out << "PPACFuncLoop"; + break; } return out; } diff --git a/tvm/include/tvm/expr.h b/tvm/include/tvm/expr.h index 233e6a172..f7fb31d22 100644 --- a/tvm/include/tvm/expr.h +++ b/tvm/include/tvm/expr.h @@ -195,7 +195,8 @@ enum IterVarType : int { * \brief Marks boundary of tensorization intrinsic. */ kTensorized = 8, - kPipelined = 9 + kPipelined = 9, + kPPACFuncLoop = 10 }; /*! @@ -299,6 +300,7 @@ inline const char* IterVarType2String(IterVarType t) { case kParallelized: return "Parallelized"; case kTensorized: return "Tensorized"; case kPipelined: return "Pipelined"; + case kPPACFuncLoop: return "PPACFuncLoop"; } return "Unknown"; } diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index 9dc1956c8..422c23929 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -219,7 +219,10 @@ class Stage : public NodeRef { * * \return reference to self. */ - EXPORT Stage& pragma(IterVar var, const std::string& pragma_type); // NOLINT(*) + EXPORT Stage& pragma(IterVar var, + const std::string& pragma_type, + const std::string& annotate_key, + const Expr& annotate_value); // NOLINT(*) /*! * \brief Fetch data in advance. * \param domain the tensor to be prefetched diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index f07d590a5..048b98426 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -393,7 +393,7 @@ TVM_REGISTER_API("_StageStencil") TVM_REGISTER_API("_StagePragma") .set_body([](TVMArgs args, TVMRetValue* ret) { args[0].operator Stage() - .pragma(args[1], args[2]); + .pragma(args[1], args[2], args[3], args[4]); }); TVM_REGISTER_API("_StagePrefetch") diff --git a/tvm/src/codegen/build_rv64_ppac.cc b/tvm/src/codegen/build_rv64_ppac.cc index eef52c4d7..2ee3713b3 100644 --- a/tvm/src/codegen/build_rv64_ppac.cc +++ b/tvm/src/codegen/build_rv64_ppac.cc @@ -9,13 +9,13 @@ namespace TVM{ namespace codegen{ std::string BuildRV64PPAC(Array funcs) { - CodeAnalysMerlinC ca; + //CodeAnalysMerlinC ca; CodeGenRV64PPAC cg; for (LoweredFunc f: funcs) { - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); - cg.AddFunction(f, map_arg_type); + //ca.AddFunction(f); + //str2tupleMap map_arg_type; + //map_arg_type = ca.Finish(); + cg.AddFunction(f); } std::string code = cg.Finish(); LOG(WARNING) << "RV64_PPAC backend doesn't yet have runtime, return kernel code"; diff --git a/tvm/src/codegen/codegen_rv64_ppac.cc b/tvm/src/codegen/codegen_rv64_ppac.cc index 697bbbda9..661fe88a5 100644 --- a/tvm/src/codegen/codegen_rv64_ppac.cc +++ b/tvm/src/codegen/codegen_rv64_ppac.cc @@ -15,6 +15,7 @@ namespace TVM { namespace codegen { +/* void CodeGenRV64PPAC::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { @@ -40,17 +41,6 @@ void CodeGenRV64PPAC::AddFunction(LoweredFunc f, PrintType(std::get<1>(arg), this->stream); this->stream << "*"; this->stream << ' ' << std::get<0>(arg); - //const BufferNode* buf = f->api_args[i].as(); - /* - if (v.type().is_handle() && buf) { - var_shape_map_[buf->data.get()] = buf->shape; - for (size_t i = 0; i < buf->shape.size(); i++) { - this->stream << '['; - this->PrintExpr(buf->shape[i], this->stream); - this->stream << ']'; - } - }*/ - } } stream << ") {\n"; @@ -61,28 +51,34 @@ void CodeGenRV64PPAC::AddFunction(LoweredFunc f, this->PrintIndent(); this->stream << "}\n\n"; } +*/ +void CodeGenRV64PPAC::PrintMVPb(const For* op, std::string m, bool compacted) { + PrintIndent(); + stream << "WHERE SUPPOSED TO BE MVPb KERNEL\n" << "We get M! m = " << m << "\n"; +} -/* -std::string CodeGenRV64PPAC::GetBufferRef(Type t, const Variable* buffer, Expr index) { +void CodeGenRV64PPAC::VisitStmt_(const For* op) { std::ostringstream os; - std::string vid = GetVarID(buffer); - if (t.lanes() == 1) { - bool is_scalar = (buf_length_map_.count(buffer) == 1 && - buf_length_map_[buffer] == 1); - if (is_scalar) { - os << vid; - } else { - os << vid; - //std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); - - os << '['; - PrintExpr(index, os); - os << ']'; - + if (op->for_type == ForType::PPACFuncLoop) { + int i = 0, matrix_m = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto m = op->annotate_values[i].as(); + if (str->value == "matrix_row_num" && m != nullptr && m->value > 0) { + matrix_m = m->value; + break; + } + } } - } - return os.str(); -}*/ + i++; + if (matrix_m > 0) { + os << matrix_m; + PrintMVPb(op, os.str(), false); + return; + } + } + CodeGenC::VisitStmt_(op); +} void CodeGenRV64PPAC::VisitStmt_(const LetStmt* op) { std::string value = PrintExpr(op->value); @@ -125,87 +121,75 @@ void CodeGenRV64PPAC::VisitStmt_(const IfThenElse* op) { stream << "}\n"; } + + void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { CHECK_EQ(t.lanes(), 1) << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - if (t.is_float()) { - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - os << "double"; return; - } - } else if (t.is_uint()) { - if (t.bits() == 1) { - os << "int"; return; - } else if (t.bits()<=32) { - os << "uint32_t"; return; - } else if (t.bits() <= 64) { - os << "uint64_t"; return; - } - /* - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "uint" << t.bits() << "_t"; return; + if (t.is_uint() || t.is_int() || t.is_fixed() || t.is_ufixed()) { + if (t.is_uint()) { + if (t.bits() == 1) { + os << "int"; return; + } else if (t.bits() <= 32) { + os << "uint32_t"; return; + } else if (t.bits() <= 64) { + os << "uint64_t"; return; + } else { + os << "int"; return; } - case 1: os << "int"; return; - }*/ - } else if (t.is_int()) { - if (t.bits() == 1) { - os << "int"; return; - } else if (t.bits()<=32) { - os << "int32_t"; return; - } else if (t.bits() <= 64) { - os << "int64_t"; return; } - /* - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "int" << t.bits() << "_t"; return; + else if (t.is_int()) { + if (t.bits() == 1) { + os << "int"; return; + } else if (t.bits() <= 32) { + os << "int32_t"; return; + } else if (t.bits() <= 64) { + os << "int64_t"; return; + } else { + os << "int"; return; } - case 1: os << "int"; return; - }*/ - } else if (t.is_ufixed() && t.fracs()==0 ) { - if (t.bits() <= 8) { - os << "uint8_t"; return; - } - else if (t.bits() <= 16) { - os << "uint16_t"; return; - } - else if (t.bits() <= 32) { - os << "uint32_t"; return; - } - else if (t.bits() <= 64) { - os << "uint64_t"; return; } - else { - os << "uint64_t"; - LOG(WARNING) << "Casting type " << t << " to int64_t"; - } - } else if (t.is_fixed() && t.fracs()==0 ) { - if (t.bits() <= 8) { - os << "int8_t"; return; - } - else if (t.bits() <= 16) { - os << "int16_t"; return; - } - else if (t.bits() <= 32) { - os << "int32_t"; return; - } - else if (t.bits() <= 64) { - os << "int64_t"; return; - } - else { - os << "int64_t"; - LOG(WARNING) << "Casting type " << t << " to int64_t"; + else if (t.is_ufixed() && t.fracs()==0 ) { + if (t.bits() <= 8) { + os << "uint8_t"; return; + } + else if (t.bits() <= 16) { + os << "uint16_t"; return; + } + else if (t.bits() <= 32) { + os << "uint32_t"; return; + } + else if (t.bits() <= 64) { + os << "uint64_t"; return; + } + else { + os << "uint64_t"; + LOG(WARNING) << "Casting type " << t << " to int64_t"; + return; + } + } else if (t.fracs()==0 ) { + if (t.bits() <= 8) { + os << "int8_t"; return; + } + else if (t.bits() <= 16) { + os << "int16_t"; return; + } + else if (t.bits() <= 32) { + os << "int32_t"; return; + } + else if (t.bits() <= 64) { + os << "int64_t"; return; + } + else { + os << "int64_t"; + LOG(WARNING) << "Casting type " << t << " to int64_t"; + return; + } } } os << t; //LOG(FATAL) << "Cannot convert type " << t << " to C type"; } -} //codegen -} //TVM \ No newline at end of file +} //namespace codegen +} //namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.h b/tvm/src/codegen/codegen_rv64_ppac.h index 55a51e694..4f51c2040 100644 --- a/tvm/src/codegen/codegen_rv64_ppac.h +++ b/tvm/src/codegen/codegen_rv64_ppac.h @@ -15,7 +15,9 @@ namespace codegen { class CodeGenRV64PPAC : public CodeGenC { public: - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + //void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + void PrintMVPb(const For* op, std::string m, bool compacted); + void VisitStmt_(const For* op); void PrintType(Type t, std::ostream& os) override; void VisitStmt_(const LetStmt* op) override; void VisitStmt_(const IfThenElse* op) override; diff --git a/tvm/src/op/op_util.cc b/tvm/src/op/op_util.cc index 9cf9e6713..5dac3d853 100644 --- a/tvm/src/op/op_util.cc +++ b/tvm/src/op/op_util.cc @@ -68,6 +68,7 @@ MakeLoopNest(const Stage& stage, case kDataPar: break; case kTensorized: break; case kPipelined: break; + case kPPACFuncLoop: break; default: LOG(FATAL) << "Unknown iter type" << it_attr->iter_type << " in the iter_var_attrs"; diff --git a/tvm/src/schedule/compute_primitive.cc b/tvm/src/schedule/compute_primitive.cc index ae59872b3..709b722bd 100644 --- a/tvm/src/schedule/compute_primitive.cc +++ b/tvm/src/schedule/compute_primitive.cc @@ -205,6 +205,7 @@ class IterVarAttrUpdater final : public IRMutator { case kVectorized: for_type = ForType::Vectorized; break; case kParallelized: for_type = ForType::Parallel; break; case kPipelined: for_type = ForType::Pipelined; break; + case kPPACFuncLoop: for_type = ForType::PPACFuncLoop; break; case kDataPar: break; case kTensorized: break; default: LOG(FATAL) << "Unknown iter type" << node_->iter_type; diff --git a/tvm/src/schedule/schedule_lang.cc b/tvm/src/schedule/schedule_lang.cc index 624c159a1..22c47819f 100644 --- a/tvm/src/schedule/schedule_lang.cc +++ b/tvm/src/schedule/schedule_lang.cc @@ -511,17 +511,24 @@ Stage& Stage::stencil(int burst_width, int unroll_factor, int num_iteration) { / return *this; } -Stage& Stage::pragma(IterVar var, const std::string& pragma_type) { // NOLINT(*) +Stage& Stage::pragma(IterVar var, const std::string& pragma_type, + const std::string& annotate_key, const Expr& annotate_value) { // NOLINT(*) if (pragma_type == "unroll") { this->unroll(var); } else if (pragma_type == "vectorize") { this->vectorize(var); - } else { + } else if (pragma_type == "PPAC_MVPb_func"){ /* UpdateIterVarAttr(operator->(), var, [pragma_type](IterVarAttrNode* n) { n->pragmas.push_back(ir::StringImm::make(pragma_type)); }); */ + std::shared_ptr node = std::make_shared(); + node->iter_type = kPPACFuncLoop; + node->for_loop_annotate_keys.push_back(ir::StringImm::make(annotate_key)); + node->for_loop_annotate_values.push_back(annotate_value); + SetIterVarAttr(operator->(), var, node.get()); + return *this; } return *this; } From 314994e5ba6d877685352cc9aa715f30b3b18aae Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Fri, 16 Aug 2019 09:39:32 -0400 Subject: [PATCH 020/103] fix ignoring ppac pragma in cpu backend --- tvm/src/codegen/llvm/codegen_cpu.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tvm/src/codegen/llvm/codegen_cpu.cc b/tvm/src/codegen/llvm/codegen_cpu.cc index 796fa9d4b..fe92530f4 100644 --- a/tvm/src/codegen/llvm/codegen_cpu.cc +++ b/tvm/src/codegen/llvm/codegen_cpu.cc @@ -719,7 +719,8 @@ void CodeGenCPU::VisitStmt_(const For* op) { CHECK(is_zero(op->min)); if (op->for_type == ForType::Serial || op->for_type == ForType::Unrolled || - op->for_type == ForType::Pipelined) { + op->for_type == ForType::Pipelined || + op->for_type == ForType::PPACFuncLoop ) { CodeGenLLVM::VisitStmt_(op); } else if (op->for_type == ForType::Parallel) { if (parallel_env_.penv == nullptr) { From a06f97bd021c7e46f46db283012076469da318eb Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 12:45:04 -0400 Subject: [PATCH 021/103] opencl-backend --- .gitignore | 4 +- python/heterocl/tvm/target.py | 42 +--- tvm/src/codegen/opencl/codeanalys_openclc.cc | 2 +- tvm/src/codegen/opencl/codeanalys_openclc.h | 202 ++++++++++++++++++ tvm/src/codegen/opencl/codegen_aocl.cc | 4 +- tvm/src/codegen/opencl/codegen_aocl.h | 58 +++++ tvm/src/codegen/opencl/codegen_sdaccel.h | 66 ++++++ .../codegen/opencl/sdaccel/sdaccel_module.cc | 4 +- .../codegen/opencl/sdaccel/sdaccel_module.h | 30 +++ 9 files changed, 364 insertions(+), 48 deletions(-) create mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.h create mode 100644 tvm/src/codegen/opencl/codegen_aocl.h create mode 100644 tvm/src/codegen/opencl/codegen_sdaccel.h create mode 100644 tvm/src/codegen/opencl/sdaccel/sdaccel_module.h diff --git a/.gitignore b/.gitignore index a70651d15..ba1cf217b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,8 +16,8 @@ tags docs/source/samples docs/source/tutorials soda_* -*.cpp -*.h +# *.cpp +# *.h out # Downloaded files diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 6e97a28e6..d5056a7ea 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -2,49 +2,9 @@ @Description: In User Settings Edit @Author: your name @Date: 2019-07-25 17:49:16 -@LastEditTime: 2019-08-14 11:15:38 +@LastEditTime: 2019-08-14 16:19:52 @LastEditors: Please set LastEditors ''' -"""Target management API of TVM. - -TVM's target string is in fomat `` [-option=value]...``. - -Note ----- -The list of options include: - -- **-device=** - - The device name. - -- **-mtriple=** or **-target** - - Specify the target triple, which is useful for cross - compilation. - -- **-mcpu=** - - Specify a specific chip in the current architecture to - generate code for. By default this is infered from the - target triple and autodetected to the current architecture. - -- **-mattr=a1,+a2,-a3,...** - - Override or control specific attributes of the target, - such as whether SIMD operations are enabled or not. The - default set of attributes is set by the current CPU. - -- **-system-lib** - - Build TVM system library module. System lib is a global module that contains - self registered functions in program startup. User can get the module using - :any:`tvm.module.system_lib`. - It is useful in environments where dynamic loading api like dlopen is banned. - The system lib will be available as long as the result code is linked by the program. - -We can use :any:`tvm.target.create` to create a tvm.target.Target from the target string. -We can also use other specific function in this module to create specific targets. -""" from __future__ import absolute_import import warnings diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc index 8515fde47..5184a90fc 100644 --- a/tvm/src/codegen/opencl/codeanalys_openclc.cc +++ b/tvm/src/codegen/opencl/codeanalys_openclc.cc @@ -2,7 +2,7 @@ * @Description: In User Settings Edit * @Author: your name * @Date: 2019-07-29 17:29:40 - * @LastEditTime: 2019-08-14 11:37:37 + * @LastEditTime: 2019-08-14 12:10:25 * @LastEditors: Please set LastEditors */ /*! diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h new file mode 100644 index 000000000..8aaeedb39 --- /dev/null +++ b/tvm/src/codegen/opencl/codeanalys_openclc.h @@ -0,0 +1,202 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +#ifndef TVM_CODEGEN_CODEANALYS_OPENCLC_H_ +#define TVM_CODEGEN_CODEANALYS_OPENCLC_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../codegen_source_base.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +template +using str2tupleMap = std::unordered_map>; + +/*! + * \brief A class to analyze the IR AST for MerlinC generation. + * + */ +class CodeAnalysOpenCLC : + public ExprFunctor, + public StmtFunctor, + public CodeGenSourceBase { + public: + /*! + * \brief Initialize the code generator. + * \param output_ssa Whether output SSA. + */ + void Init(); + /*! + * \brief Add the function to the generated module. + * \param f The function to be compiled. + */ + void AddFunction(LoweredFunc f); + /*! + * \brief Finalize the compilation and return the code. + * \return The code. + */ + str2tupleMap Finish(); + /*! + * \brief Print the Stmt n to CodeAnalysMerlinC->stream + * \param n The statement to be printed. + */ + void PrintStmt(const Stmt& n) { + VisitStmt(n); + } + /*! + * \brief Print the expression n(or its ssa id if in ssa mode) into os + * \param n The expression to be printed. + * \param os The output stream + */ + void PrintExpr(const Expr& n, std::ostream& os); + /*! + * \brief Same as PrintExpr, but simply returns result string + * \param n The expression to be printed. + */ + std::string PrintExpr(const Expr& n) { + std::ostringstream os; + PrintExpr(n, os); + return os.str(); + } + // The following parts are overloadable print operations. + /*! + * \brief Initialize codegen state for generating f. + * \param f The function to be compiled. + */ + virtual void InitFuncState(LoweredFunc f); + // expression + void VisitExpr_(const Variable* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Load* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Let* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Call* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Add* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Sub* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Mul* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Div* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Mod* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Min* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Max* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const EQ* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const NE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const LT* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const LE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GT* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const And* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Or* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Cast* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Not* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Select* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Ramp* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Broadcast* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const IntImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const UIntImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const FloatImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const StringImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GetBit* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GetSlice* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SetBit* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) + // statment + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const Store* op) override; + void VisitStmt_(const For* op) override; + void VisitStmt_(const IfThenElse* op) override; + void VisitStmt_(const Allocate* op) override; + void VisitStmt_(const AttrStmt* op) override; + void VisitStmt_(const AssertStmt* op) override; + void VisitStmt_(const Evaluate* op) override; + void VisitStmt_(const Block* op) override; + void VisitStmt_(const ProducerConsumer* op) override; + void VisitStmt_(const KernelDef* op) override; + void VisitStmt_(const KernelStmt* op) override; + void VisitStmt_(const Return* op) override; + void VisitStmt_(const Break* op) override; + void VisitStmt_(const While* op) override; + void VisitStmt_(const Partition* op) override; + /*! + * Print Type represetnation of type t. + * \param t The type representation. + * \param os The stream to print the ctype into + */ + void PrintType(Type t, std::ostream& os); // NOLINT(*) + std::string GetType(Type t); // NOLINT(*) + /*! + * \brief Print expr representing the thread tag + * \param IterVar iv The thread index to be binded; + */ + void BindThreadIndex(const IterVar& iv); // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os); // NOLINT(*) + void PrintStorageSync(const Call* op); // NOLINT(*) + // Binary vector op. + void PrintVecBinaryOp( + const std::string&op, Type op_type, + Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) + // print vector load + std::string GetVecLoad(Type t, const Variable* buffer, Expr base); + // print vector store + void PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value); // NOLINT(*) + // print load of single element + void PrintVecElemLoad( + const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) + // print store of single element. + void PrintVecElemStore( + const std::string& vec, Type t, int i, const std::string& value); + // Get a cast type from to + std::string CastFromTo(std::string value, Type from, Type target); + + protected: + // Print reference to struct location + std::string GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind); + // print reference to a buffer as type t in index. + virtual std::string GetBufferRef( + Type t, const Variable* buffer, Expr index); + /*! + * \brief If buffer is allocated as type t. + * \param buf_var The buffer variable. + * \param t The type to be checked. + */ + bool HandleTypeMatch(const Variable* buf_var, Type t) const; + /*! + * \brief Register the data type of buf_var + * \param buf_var The buffer variable. + * \param t The type to be checked. + */ + void RegisterHandleType(const Variable* buf_var, Type t); + // override + void PrintSSAAssign( + const std::string& target, const std::string& src, Type t) final; + /*! \brief restrict keyword */ + std::string restrict_keyword_{""}; + /*! \brief the storage scope of allocation */ + std::unordered_map alloc_storage_scope_; + /*! \brief the data type of allocated buffers */ + std::unordered_map handle_data_type_; + + private: + /*! \brief set of volatile buf access */ + std::unordered_set volatile_buf_; + /*! \brief map of function arguments to their types */ + str2tupleMap map_arg_type_; +}; + +} // namespace codegen +} // namespace TVM +#endif // TVM_CODEGEN_CODEGEN_C_H_ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index b3823b4bd..b4ddaae30 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -2,8 +2,8 @@ * @Description: In User Settings Edit * @Author: your name * @Date: 2019-07-25 21:24:18 - * @LastEditTime: 2019-07-25 21:24:18 - * @LastEditors: your name + * @LastEditTime: 2019-08-14 11:44:16 + * @LastEditors: Please set LastEditors */ /* Yang.Bai diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h new file mode 100644 index 000000000..4e3f12a0d --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -0,0 +1,58 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-25 21:24:23 + * @LastEditTime: 2019-08-14 11:37:28 + * @LastEditors: Please set LastEditors + */ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ +#define TVM_CODEGEN_CODEGEN_AOCL_H_ + +# include +# include +# include +# include "../codegen_c.h" + +namespace TVM { +namespace codegen { + +class CodeGenAOCL final : public CodeGenC { + public: + CodeGenAOCL(); + void AddFunction(LoweredFunc f); + std::string Finish(); + + void InitFuncState(LoweredFunc f) final; + void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) + void PrintStorageSync(const Call* op) final; //NOLINT(*) + void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + std::string GetVecLoad(Type t, const Variable * buffer, + Expr base) final; // NOLINT(*) + void PrintVecStore(const Variable * buffer, Type t, + Expr base, const std::string& value) final; //NOLINT(*) + void PrintVecAddr(const Variable * buffer, Type t, + Expr base, std::ostream& os); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target); //NOLINT(*) + + //overload visitor + void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) + + private: + bool enable_fp16_{false}; + bool enable_fp64_{false}; + + +}; +} // namespace codegen +} // namespace tvm + +#endif // TVM_CODEGEN_CODEGEN_AOCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h new file mode 100644 index 000000000..a0c46bf3e --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -0,0 +1,66 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-25 21:24:23 + * @LastEditTime: 2019-08-14 11:37:17 + * @LastEditors: Please set LastEditors + */ + +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ +#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "../codegen_c.h" + +namespace TVM { +namespace codegen { + + +class CodeGenSDACCEL final : public CodeGenC { + public: + CodeGenSDACCEL(); + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + std::string Finish(); + + void InitFuncState(LoweredFunc f) final; + void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) + void PrintStorageSync(const Call* op) final; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + + std::string GetVecLoad(Type t, const Variable * buffer, + Expr base) final; // NOLINT(*) + void PrintVecStore(const Variable * buffer, Type t, + Expr base, const std::string& value) final; //NOLINT(*) + void PrintVecAddr(const Variable * buffer, Type t, + Expr base, std::ostream& os); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) + + //overload visitor + void VisitStmt_(const LetStmt* op) final; // NOLINT(*) + void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) + + private: + // whether enable fp16 and fp64 extension + bool enable_fp16_{false}; + bool enable_fp64_{false}; + +}; +} // namespace codegen +} // namespace TVM + +#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc index 71d1a98cc..8a640e556 100644 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc @@ -2,8 +2,8 @@ * @Description: In User Settings Edit * @Author: your name * @Date: 2019-07-30 15:15:28 - * @LastEditTime: 2019-07-30 15:15:28 - * @LastEditors: your name + * @LastEditTime: 2019-08-14 16:16:03 + * @LastEditors: Please set LastEditors */ /* Yang.Bai diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h new file mode 100644 index 000000000..25db653b9 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h @@ -0,0 +1,30 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-30 15:15:15 + * @LastEditTime: 2019-07-30 15:15:15 + * @LastEditors: your name + */ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef SDACCEL_MODULE_H +#define SDACCEL_MODULE_H + +# include +# include +# include "../../build_common.h" + +namespace TVM { +namespace runtime { + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code); + +} // namespace runtime +} // namespace TVM + +#endif \ No newline at end of file From 3f03b415cebecdac763da6148233b55dd70f3a63 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 13:12:17 -0400 Subject: [PATCH 022/103] aocl-backend --- python/heterocl/tvm/target.py | 2 +- tvm/src/codegen/opencl/aocl/aocl_module.cc | 0 tvm/src/codegen/opencl/aocl/aocl_module.h | 0 tvm/src/codegen/opencl/build_opencl.cc | 35 +++- tvm/src/codegen/opencl/codeanalys_openclc.cc | 7 - tvm/src/codegen/opencl/codegen_aocl.cc | 201 +++++++++++++++++-- tvm/src/codegen/opencl/codegen_aocl.h | 26 +-- tvm/src/codegen/opencl/codegen_sdaccel.cc | 8 - tvm/src/codegen/opencl/codegen_sdaccel.h | 8 - 9 files changed, 226 insertions(+), 61 deletions(-) create mode 100644 tvm/src/codegen/opencl/aocl/aocl_module.cc create mode 100644 tvm/src/codegen/opencl/aocl/aocl_module.h diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index d5056a7ea..17b483858 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -17,7 +17,7 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'opencl', 'sdaccel', 'sdaccel_sw_emu', 'aocl'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'opencl', 'sdaccel', 'sdaccel_sw_emu', 'aocl', 'aocl_sw_emu'] def _merge_opts(opts, new_opts): """Helper function to merge options""" diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.cc b/tvm/src/codegen/opencl/aocl/aocl_module.cc new file mode 100644 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.h b/tvm/src/codegen/opencl/aocl/aocl_module.h new file mode 100644 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index da937addc..bba56d818 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -1,11 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-25 23:25:00 - * @LastEditTime: 2019-08-14 11:37:38 - * @LastEditors: Please set LastEditors - */ - /* Yang.Bai yb269@cornell.edu @@ -182,6 +174,31 @@ std::string BuildSDACCEL(Array funcs) { +// codegen for AOCL_WITH_ANALYSIS +std::string BuildAOCL(Array funcs) { + using TVM::runtime::Registry; + CodeAnalysOpenCLC ca; + CodeGenAOCL cg; + for (LoweredFunc f : funcs) { + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); + + cg.AddFunction(f, map_arg_type); + + } + std::string code = cg.Finish(); + + if (const auto* f = Registry::Get("tvm_callback_aocl_postproc")) { + code = (*f)(code).operator std::string(); + } + + LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; + return code; +} + + + // codegen for OpenCL // std::string BuildOpenCL(Array funcs) { // using TVM::runtime::Registry; @@ -231,7 +248,7 @@ TVM_REGISTER_API("codegen.build_sdaccel") TVM_REGISTER_API("codegen.build_aocl") .set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildSDACCEL(args[0]); + * rv = BuildAOCL(args[0]); }); // For runtime diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc index 5184a90fc..030453a94 100644 --- a/tvm/src/codegen/opencl/codeanalys_openclc.cc +++ b/tvm/src/codegen/opencl/codeanalys_openclc.cc @@ -1,10 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-29 17:29:40 - * @LastEditTime: 2019-08-14 12:10:25 - * @LastEditors: Please set LastEditors - */ /*! * Copyright (c) 2017 by Contributors * \file tvm/src/codegen/hlsc/codegen_hlsc.cc diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index b4ddaae30..5bbb71051 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -1,20 +1,13 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-25 21:24:18 - * @LastEditTime: 2019-08-14 11:44:16 - * @LastEditors: Please set LastEditors - */ /* Yang.Bai yb269@cornell.edu */ - # include # include # include # include # include +# include # include "./codegen_aocl.h" # include "../../runtime/thread_storage_scope.h" @@ -34,11 +27,138 @@ void CodeGenAOCL::InitFuncState(LoweredFunc f) { } } -void CodeGenAOCL::AddFunction(LoweredFunc f) { - this->stream << "__kernel "; - CodeGenC::AddFunction(f); + +// void CodeGenAOCL::AddFunction(LoweredFunc f) { +// this->stream << "__kernel "; +// CodeGenC::AddFunction(f); +// } + +// void CodeGenAOCL::AddFunction(LoweredFunc f) { + // this->stream << "# pragma once\n"; + // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n\n"; + // this->stream << "__kernel "; + +// CodeGenC::AddFunction(f); +// } + +void CodeGenAOCL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Write head files + // stream.open("host.cpp"); + // this->stream << "# pragma once\n"; + // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n\n"; + + // Write entry function name + // this->stream << "__kernel " << f->name << "("; + // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; + // this->stream << f->name << "("; + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; } + + + +// void CodeGenAOCL::AddFunction(LoweredFunc f, +// str2tupleMap map_arg_type) { +// // Don't Write header flies +// // Clear previous generated state +// this->InitFuncState(f); +// // Register alloc buffer type +// for ( const auto & kv : f->handle_data_type ) { +// this->stream << kv.first.get(); +// this->stream << kv.second.type(); +// RegisterHandleType(kv.first.get(), kv.second.type()); +// } +// // Write entry function name +// this->stream << "__kernel "; +// // Write arguments +// for ( size_t i = 0; i < f->args.size(); i++ ) { +// Var v = f->args[i]; +// std::string vid = AllocVarID(v.get()); +// if ( i!= 0 ) { +// this->stream << ", "; +// } +// if ( map_arg_type.find(vid) == map_arg_type.end()) { +// LOG(WARNING) << vid << " type not found\n"; +// PrintType(v.type(), this->stream); +// this->stream << ' ' << vid; +// } +// else { +// auto arg = map_arg_type[vid]; +// PrintType(std::get<1>(arg), this->stream); +// if (v.type().is_handle()) { +// this->stream << "*"; +// } +// this->stream << ' ' << std::get<0>(arg); + +// } +// stream << ") {\n"; +// int func_scope = this->BeginScope(); +// this->PrintStmt(f->body); +// this->EndScope(func_scope); +// this->PrintIndent(); +// this->stream << "}\n\n"; +// } +// CodeGenAOCL::AddFunction(f, map_arg_type); +// } + std::string CodeGenAOCL::Finish() { // inject extension enable pragma for fp16 and fp64 if (enable_fp16_) { @@ -83,7 +203,6 @@ void CodeGenAOCL::BindThreadIndex(const IterVar& iv) { void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); - os << t.bits(); if (t.is_handle()) { CHECK_EQ(lanes, 1) << "do not yet support vector types"; @@ -183,15 +302,25 @@ void CodeGenAOCL::PrintStorageSync(const Call* op) { } } +// void CodeGenAOCL::PrintStorageScope( +// const std::string& scope, std::ostream& os) { // NOLINT(*) +// if (scope == "global") { +// os << "__global "; +// } else if (scope == "shared") { +// os << "__local "; +// } +// } + void CodeGenAOCL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global") { - os << "__global"; + os << "global "; } else if (scope == "shared") { - os << "__local"; + os << "local "; } } + std::string CodeGenAOCL::CastFromTo(std::string value, Type from, Type target) { if (from == target) return value; std::ostringstream os; @@ -229,6 +358,23 @@ void CodeGenAOCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) CodeGenC::VisitExpr_(op, os); } +void CodeGenAOCL::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + + void CodeGenAOCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) if (std::isinf(op->value)) { if ( op->value < 0) { @@ -249,5 +395,30 @@ void CodeGenAOCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) CodeGenC::VisitExpr_(op, os); } +void CodeGenAOCL::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + } // namespace codegen -} // namespace tvm \ No newline at end of file +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 4e3f12a0d..30797e36c 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -1,10 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-25 21:24:23 - * @LastEditTime: 2019-08-14 11:37:28 - * @LastEditors: Please set LastEditors - */ /* Yang.Bai yb269@cornell.edu @@ -16,43 +9,50 @@ # include # include # include +# include "./codeanalys_openclc.h" # include "../codegen_c.h" namespace TVM { namespace codegen { + class CodeGenAOCL final : public CodeGenC { public: CodeGenAOCL(); - void AddFunction(LoweredFunc f); + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); std::string Finish(); void InitFuncState(LoweredFunc f) final; void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) void PrintStorageSync(const Call* op) final; //NOLINT(*) - void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + std::string GetVecLoad(Type t, const Variable * buffer, Expr base) final; // NOLINT(*) void PrintVecStore(const Variable * buffer, Type t, Expr base, const std::string& value) final; //NOLINT(*) void PrintVecAddr(const Variable * buffer, Type t, Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) //overload visitor + void VisitStmt_(const LetStmt* op) final; // NOLINT(*) void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) private: + // whether enable fp16 and fp64 extension bool enable_fp16_{false}; bool enable_fp64_{false}; - - + }; } // namespace codegen -} // namespace tvm +} // namespace TVM #endif // TVM_CODEGEN_CODEGEN_AOCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index abb39c3e0..47d66f68f 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -1,11 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-25 21:24:18 - * @LastEditTime: 2019-07-25 21:24:18 - * @LastEditors: your name - */ - /* Yang.Bai yb269@cornell.edu diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index a0c46bf3e..4e7f0e746 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -1,11 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-25 21:24:23 - * @LastEditTime: 2019-08-14 11:37:17 - * @LastEditors: Please set LastEditors - */ - /* Yang.Bai yb269@cornell.edu From ffb86739599e79efc68eadce9e9432b77850ba44 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Mon, 2 Sep 2019 10:54:38 -0400 Subject: [PATCH 023/103] move ppac codegen to ppac folder; fix argument name with merlinc analyser --- hlib/python/hlib/ppac.py | 198 +++++++++++++++++++++ samples/ppac/gemm/csrcPrint.py | 62 ------- samples/ppac/gemm/data.py | 59 ------- samples/ppac/gemm/gemm_ppac.py | 43 ----- samples/ppac/gemm/headcode.txt | 6 - samples/ppac/gemm/ppac_common.py | 48 ----- tests/test_codegen_ppac.py | 99 +++++++++++ tvm/src/codegen/ppac/build_rv64_ppac.cc | 32 ++++ tvm/src/codegen/ppac/codegen_rv64_ppac.cc | 202 ++++++++++++++++++++++ tvm/src/codegen/ppac/codegen_rv64_ppac.h | 28 +++ 10 files changed, 559 insertions(+), 218 deletions(-) create mode 100644 hlib/python/hlib/ppac.py delete mode 100644 samples/ppac/gemm/csrcPrint.py delete mode 100644 samples/ppac/gemm/data.py delete mode 100644 samples/ppac/gemm/gemm_ppac.py delete mode 100644 samples/ppac/gemm/headcode.txt delete mode 100644 samples/ppac/gemm/ppac_common.py create mode 100644 tests/test_codegen_ppac.py create mode 100644 tvm/src/codegen/ppac/build_rv64_ppac.cc create mode 100644 tvm/src/codegen/ppac/codegen_rv64_ppac.cc create mode 100644 tvm/src/codegen/ppac/codegen_rv64_ppac.h diff --git a/hlib/python/hlib/ppac.py b/hlib/python/hlib/ppac.py new file mode 100644 index 000000000..3754b3cf2 --- /dev/null +++ b/hlib/python/hlib/ppac.py @@ -0,0 +1,198 @@ +from collections import OrderedDict +import heterocl as hcl +import heterocl.tvm as tvm + +class PPAC_config: + """Wrap PPAC parameters and function names.""" + def __init__(self, multi_bit=False, word_bits=None, elem_bits=None): + """Initialize PPAC configurations + + Parameters + ---------- + multi_bit : Whether to use specialized ppac accelerator + or generalized ppac module. + See hardware implementation for more. + word_bits : Number of bits in a row in ppac. + elem_bits : Number of bits in a number in matrix (datatype) + + """ + self.word_bits = (word_bits if word_bits else 256) if multi_bit else 64 + self.elem_bits = (elem_bits if elem_bits else 8) if multi_bit else 1 + self.elem_num = self.word_bits // self.elem_bits + self.depth = self.elem_num + assert self.elem_bits in [1, 2, 4, 8, 16, 32], "elem_bits must be in {1, 2, 4, 8, 16, 32}" + assert (self.word_bits % 64 == 0) and (self.elem_num*self.elem_bits == self.word_bits), \ + "word_bits must be times of 64 and times of elem_bits" + if multi_bit: + self.func_call = ['PPACFunc_GeMMUInt', 'PPACFunc_GeMMSInt'] + else: + self.func_call = ['PPACFunc_HmmSim', 'PPACFunc_GeMMBin'] + + +class PPAC_func_params: + """ + names of PPAC function call parameters + used as annotation key on the stage + """ + + def __init__(self): + self.func_name = '_ppac_func_name' + self.ret = '_ret' + self.arg0 = '_arg0' + self.arg1 = '_arg1' + self.b_n = '_batch_num' + self.i_b_n = '_in_block_num' + self.o_c_n = '_out_channel_num' + +ppac_params = PPAC_func_params() + +def hmm_sim(x, y, name=None): + """Compute hamming-similarity between each element in x and y + Parameters + ---------- + x : 1-d tensor of datatype uint64 + y : 1-d tensor of datatype uint64 + + Returns + ------- + res: 2-d tensor of shape (x.shape[0], y.shape[0]) and datatype uint64 + """ + assert x.dtype == 'uint64' and y.dtype == 'uint64', "only support datatype uint64" + assert len(x.shape) == 1 and len(y.shape) == 1, "only support 1-dim hamming-similarity operation" + + ppac_config = PPAC_config() + + try: + res_shape = x.shape + y.shape + batch_num = x.shape[0] + except: + # x is scalar + res_shape = y.shape + batch_num = 1 + res_name = name if name else 'res' + in_block_num = 1 + out_channel_num = y.shape[0] + + def _assign_val(*args): + temp = hcl.local(0, name='sim_acc', dtype=hcl.UInt(64)) + temp[0] = tvm.popcount(~(x[args[0]] ^ y[args[1]])) + return temp[0] + return hcl.compute( res_shape, _assign_val, res_name, dtype=hcl.UInt(64), + attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(ppac_config.func_call[0])), + (ppac_params.ret, tvm.make.StringImm(res_name)), + (ppac_params.arg0, tvm.make.StringImm(x.name)), + (ppac_params.arg1, tvm.make.StringImm(y.name)), + (ppac_params.b_n, batch_num), + (ppac_params.i_b_n, in_block_num), + (ppac_params.o_c_n, out_channel_num)]) ) + +def gemm_binary(d, w, name=None): + """Compute general matrix multiplication of datatype {1, -1} + Parameters + ---------- + d : 2-d tensor of datatype uint1 + w : 2-d tensor of datatype uint1 + + Returns + ------- + res: 2-d tensor of shape (d.shape[0], w.shape[0]) and datatype uint64 + res = dot(d, w.T) (with datatype {1, -1}) + """ + assert d.dtype == 'uint1' and w.dtype == 'uint1', 'only support binary data' + assert len(w.shape) == 2 and len(d.shape) == 2, "only support 2-dim binary gemm" + assert d.shape[1] == w.shape[1] + + ppac_config = PPAC_config() + assert d.shape[1] % ppac_config.elem_num == 0, \ + "input channel should be times of " + str(ppac_config.elem_num) + + res_name = name if name else 'res' + batch_num = d.shape[0] + in_channel_num = w.shape[1] + in_block_num = in_channel_num // ppac_config.elem_num + out_channel_num = w.shape[0] + res_shape = (batch_num, out_channel_num) + block_size = ppac_config.elem_num // 8 + + def _bin_pack_uint8(tensor): + """Pack uint1 to uint8. + uint1 is cast to uint8 in c backend. + This operation squeezes memory 8 times. + """ + assert tensor.dtype == 'uint1' + + ishape = tensor.shape + n = len(ishape) + oshape = ishape[:-1] + (ishape[n-1] // 8, ) + + def _assign_val(*args): + temp = hcl.local(0, name='pack_acc', dtype=hcl.UInt(8)) + with hcl.for_(0, 8) as i: + temp[0] = temp[0] | (tensor[args[0], i + args[1]*8] << i) + return temp[0] + + return hcl.compute(oshape, _assign_val, + name=tensor.name+'_packed', dtype=hcl.UInt(8)) + + def _mvpodd_reduce(*args): + """compute {1, -1} dot product on packed data.""" + temp = hcl.local(0, name='mvpodd_acc', dtype=hcl.UInt(64)) + with hcl.for_(0, in_block_num) as o: + with hcl.for_(0, block_size) as i: + temp[0] += tvm.popcount(d_packed[args[0], i+block_size*o] ^ w_packed[args[1], i+block_size*o]) + temp[0] = ppac_config.elem_num - temp[0]*2 + return temp[0] + + d_packed = _bin_pack_uint8(d) + w_packed = _bin_pack_uint8(w) + return hcl.compute(res_shape, _mvpodd_reduce, name=res_name, dtype=hcl.UInt(64), + attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(ppac_config.func_call[1])), + (ppac_params.ret, tvm.make.StringImm(res_name)), + (ppac_params.arg0, tvm.make.StringImm(d_packed.name)), + (ppac_params.arg1, tvm.make.StringImm(w_packed.name)), + (ppac_params.b_n, batch_num), + (ppac_params.i_b_n, in_block_num), + (ppac_params.o_c_n, out_channel_num)]) ) + + +def gemm_multi_bit(d, w, name=None): + """Compute general matrix multiplication of multi-bit data + Parameters + ---------- + d : 2-d tensor + w : 2-d tensor + + Returns + ------- + res: 2-d tensor of shape (d.shape[0], w.shape[0]) and datatype uint64 + res = dot(d, w.T) + """ + assert w.dtype == d.dtype + assert w.dtype in ['uint8', 'int8', 'uint16', 'int16', 'uint32', 'int32'] + + assert len(w.shape) == 2 and len(d.shape) == 2, "only support 2-dim gemm" + assert d.shape[1] == w.shape[1] + + ppac_config = PPAC_config(multi_bit=True) + assert d.shape[1] % ppac_config.elem_num == 0, \ + "only support data with size of times of " + str(ppac_config.elem_num) + + res_name = name if name else 'res' + batch_num = d.shape[0] + in_channel_num = d.shape[1] + in_block_num = in_channel_num // ppac_config.elem_num + out_channel_num = w.shape[0] + res_shape = (batch_num, out_channel_num) + func_name = ppac_config.func_call[0] if ('u' in d.dtype) else ppac_config.func_call[1] + + r = hcl.reduce_axis(0, in_channel_num, name='k') + return hcl.compute(res_shape, + lambda i, j: hcl.sum(d[i, r] * w[j, r], axis=r), + name=res_name, dtype=hcl.UInt(64), + attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(func_name)), + (ppac_params.ret, tvm.make.StringImm(res_name)), + (ppac_params.arg0, tvm.make.StringImm(d.name)), + (ppac_params.arg1, tvm.make.StringImm(w.name)), + (ppac_params.b_n, batch_num), + (ppac_params.i_b_n, in_block_num), + (ppac_params.o_c_n, out_channel_num)])) \ No newline at end of file diff --git a/samples/ppac/gemm/csrcPrint.py b/samples/ppac/gemm/csrcPrint.py deleted file mode 100644 index 1eee291fd..000000000 --- a/samples/ppac/gemm/csrcPrint.py +++ /dev/null @@ -1,62 +0,0 @@ -"""author: Guyue Huang (gh424@cornell.edu) -ppac-gemm c code gen -""" -from ppac_common import * - -def getCSrc(data_fname, golden_fname, head_fname, o_fname, dims, bits=1, xLen=64): - - m, n, k = dims - - def printKernel(of): - - of.write('//save data_A\n') - for i in range(m): - of.write(doLoad(('data_A+%d'%(8*i)), i)) - - of.write('\n//do MVP\n') - for j in range(n): - of.write('\tlw a1, data_B+%d\n'%(8*j)) - #TODO: bit-mask - of.write(doMVP(funct='72')) #'1001000' - for i in range(m): - of.write(doStore(('data_C+%d'%(4*(j*m+i))),i)) - - def printTest(of, golden): - for n, gnum in enumerate(golden): - of.write('\tlw a0, data_C+%d\n'%(4*n)) - of.write(('\tTEST_CASE(%d, a0, '%(n+1+1)) + hex(int(gnum)) + ', )\n') - of.write('\n\tTEST_PASSFAIL\n') - - with open(o_fname, 'w') as of: - - with open(head_fname, 'r') as hf: - of.write(hf.read()) - hf.close() - - of.write('\tRVTEST_WITH_ROCC\n') - of.write('start:\n\tRVTEST_CODE_BEGIN\n') - - printKernel(of) - of.write('\n\n') - goldnum = [] - with open(golden_fname, 'r') as gf: - goldnum += (gf.read()).split() - if not len(goldnum) == m*n: - raise Exception('golden number should have %d but %d'%(m*n, len(goldnum))) - printTest(of, goldnum) - - of.write('\tRVTEST_CODE_END\n\n') - - with open(data_fname, 'r') as df: - s = "\t.data\n\tRVTEST_DATA_BEGIN\n\n" + "\tTEST_DATA\n" + df.read() - df.close() - of.write(s) - - of.write('\tRVTEST_DATA_END\n') - -if __name__== '__main__': - getCSrc('bareMdata.txt', 'goldennumber.txt', 'headcode.txt', 'test.S', - dims=[4, 4, 64]) - - #TODO: headcode.txt ppac_common code definition - diff --git a/samples/ppac/gemm/data.py b/samples/ppac/gemm/data.py deleted file mode 100644 index 3433d0ec2..000000000 --- a/samples/ppac/gemm/data.py +++ /dev/null @@ -1,59 +0,0 @@ -""" author: Guyue Huang (gh424@cornell.edu) -ppac-gemm data generater -""" -import numpy as np -import numpy.random as rd -from ppac_common import compact, bvec2x - -m, n, k = 4, 4, 64 -v_bits, m_bits = 1, 1 -xLen = 64 - -mat_A = rd.randint(2**m_bits, size=(m, k)) -mat_B = rd.randint(2**v_bits, size=(k, n)) -mat_C = np.dot(mat_A, mat_B) -golden = list(mat_C.flatten('F')) # column major - -data_A = np.zeros((m, xLen)) -data_B = np.zeros((n, xLen)) -for i in range(m): - data_A[i,:] = compact(mat_A[i,:], k, m_bits, xLen) -for j in range(n): - data_B[j,:] = compact(mat_B[:,j], k, v_bits, xLen) - -word_A = [bvec2x(vec) for vec in list(data_A)] -word_B = [bvec2x(vec) for vec in list(data_B)] - -with open('csrcmacro.txt', 'w') as of: - of.write('#define M '+str(m)+'\n') - of.write('#define N ' + str(n) + '\n') - of.write('#define K ' + str(k) + '\n') - of.write('#define DATAA '+'0x'+word_A[0]) - for s in word_A[1:]: - of.write(',\\\n'+'\t0x'+s) - of.write('\n\n') - of.write('#define DATAB '+'\t0x'+word_B[0]) - for s in word_B[1:]: - of.write(',\\\n'+'\t0x'+s) - of.write('\n\n') - of.write('#define DATAGOLD ') - for n in golden[:-1]: - of.write(str(n)+', ') - of.write(str(golden[-1])+'\n') -of.close() - -with open('bareMdata.txt','w') as of: - of.write('data_A:\n') - for s in word_A: - of.write('\t.dword '+s+'\n') - of.write('\ndata_B:\n') - for s in word_B: - of.write('\t.dword '+s+'\n') - of.write('\ndata_C:\n') - for s in range(m*n): - of.write('\t.dword 0x0\n') -of.close() - -with open('goldennumber.txt','w') as of: - for n in golden: - of.write(str(n)+'\n') diff --git a/samples/ppac/gemm/gemm_ppac.py b/samples/ppac/gemm/gemm_ppac.py deleted file mode 100644 index 664ecf01d..000000000 --- a/samples/ppac/gemm/gemm_ppac.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -author: Guyue Huang (gh424@cornell.edu) - -General Matrix Multiplication -target : rv64_ppac -""" -""" -modified on Aug 1 -bit_width = 1 -m, n, k = 16, 2, 64 -""" - -import heterocl as hcl -import numpy as np - - -def gemm(m, n, k, dtype=hcl.Int(), target=None): - matrix_1 = hcl.placeholder((m, k), dtype=dtype) - matrix_2 = hcl.placeholder((k, n), dtype=dtype) - - def kernel(matrix_1, matrix_2): - r = hcl.reduce_axis(0, k, 'k') - return hcl.compute((m, n), - lambda x, y: hcl.sum(matrix_1[x, r] * matrix_2[r, y], - axis=r, dtype=dtype), - dtype=dtype, - name="out_matrix") - - s = hcl.create_schedule([matrix_1, matrix_2], kernel) - f = hcl.build(s, target=target, name='gemm') - return f - -dtype = hcl.UInt(8) -hcl.init(dtype) -m, n, k = 4, 4, 64 -f = gemm(m, n, k, dtype, target="rv64_ppac") - -print(f) -""" -with open("csrc.cc", "w") as ofile: - ofile.write('/*CodeGenC backend*/\n'+str(f)) -ofile.close() -""" \ No newline at end of file diff --git a/samples/ppac/gemm/headcode.txt b/samples/ppac/gemm/headcode.txt deleted file mode 100644 index 2e19284f5..000000000 --- a/samples/ppac/gemm/headcode.txt +++ /dev/null @@ -1,6 +0,0 @@ -#include "riscv_test.h" -#include "riscv-tests/isa/macros/scalar/test_macros.h" -#include "include/ppac.h" -#include "rocc-software/src/riscv_test_rocc.h" - -#define CUSTOM_X 1 diff --git a/samples/ppac/gemm/ppac_common.py b/samples/ppac/gemm/ppac_common.py deleted file mode 100644 index c5d17c9ac..000000000 --- a/samples/ppac/gemm/ppac_common.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy as np - -def compact(arr, arrlen, dataBits, wordBits): - if arr.size < arrlen or wordBits / dataBits < arrlen: - raise Exception("error in length") - word = np.zeros(wordBits, dtype=int) - for l in range(arrlen): - for b in range(dataBits): - word[l * dataBits + b] = ((arr[l] & (1 << b))>>b) - return word - -def bvec2x(vec): - n2c = {0:'0', 1:'1', 2:'2', 3:'3', - 4:'4', 5:'5', 6:'6', 7:'7', - 8:'8', 9:'9',10:'a',11:'b', - 12:'c',13:'d',14:'e',15:'f'} - base = np.array([1,2,4,8]) - vec_int = vec % 2 - len = vec.size - if not len % 4 == 0: - vec_int = np.append(vec_int, np.zeros((4-(len%4)))) - len = len + (4 - len % 4) - x = '' - ct = int(len/4) - for c in range(ct, 0, -1): - x = x + n2c[(np.dot( vec[4*c-4:4*c], base))] - return x - -def doWrite(rocc_addr): - s = '\tli a0, '+str(rocc_addr)+'\n' - return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 0, 11, 10, K_DO_WRITE)\n' - -def doRead(rocc_addr): - s = '\tli a0, '+str(rocc_addr)+'\n' - return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 0, 10, K_DO_READ)\n' - -def doLoad(addr, rocc_addr): - s = '\tla a1, '+addr+'\n\tli a0, '+str(rocc_addr)+'\n' - return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 11, 10, K_DO_LOAD)\n' - -def doStore(addr, rocc_res_addr): - s = '\tla a1, ' + addr + '\n\tli a0, ' + str(rocc_res_addr) + '\n' - return s + '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 11, 10, K_DO_STORE)\n' - -def doMVP(funct): - return '\tROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, 10, 11, 10, '+str(funct)+')\n' - - diff --git a/tests/test_codegen_ppac.py b/tests/test_codegen_ppac.py new file mode 100644 index 000000000..43b7e04cc --- /dev/null +++ b/tests/test_codegen_ppac.py @@ -0,0 +1,99 @@ +import heterocl as hcl +import hlib + +def test_func_print(): + def test_hmm_sim(): + hcl.init() + x = hcl.placeholder((1,), 'x', dtype=hcl.UInt(64)) + y = hcl.placeholder((64,), 'y', dtype=hcl.UInt(64)) + def kernel(X, Y): + return hlib.ppac.hmm_sim(X, Y, name='Z') + s = hcl.create_schedule([x, y], kernel) + f = hcl.build(s, target='rv64_ppac') + code = str(f) + assert 'PPACFunc_HmmSim' in code + + def test_gemm_binary(): + hcl.init() + data = hcl.placeholder((64, 64), 'd', dtype=hcl.UInt(1)) + weight = hcl.placeholder((64, 64), 'w', dtype=hcl.UInt(1)) + def kernel(d, w): + return hlib.ppac.gemm_binary(d, w, 'res') + s = hcl.create_schedule([data, weight], kernel) + f = hcl.build(s, target='rv64_ppac') + code = str(f) + assert 'PPACFunc_GeMMBin' in code + + def test_gemm_multi_bit_unsigned(): + hcl.init() + data = hcl.placeholder((32, 32), 'd', dtype=hcl.UInt(8)) + weight = hcl.placeholder((32, 32), 'w', dtype=hcl.UInt(8)) + def kernel(d, w): + return hlib.ppac.gemm_multi_bit(d, w, 'res') + s = hcl.create_schedule([data, weight], kernel) + f = hcl.build(s, target='rv64_ppac') + code = str(f) + assert 'PPACFunc_GeMMUInt' in code + + def test_gemm_multi_bit_signed(): + hcl.init() + data = hcl.placeholder((32, 32), 'd', dtype=hcl.Int(8)) + weight = hcl.placeholder((32, 32), 'w', dtype=hcl.Int(8)) + def kernel(d, w): + return hlib.ppac.gemm_multi_bit(d, w, 'res') + s = hcl.create_schedule([data, weight], kernel) + f = hcl.build(s, target='rv64_ppac') + code = str(f) + assert 'PPACFunc_GeMMSInt' in code + + test_hmm_sim() + test_gemm_binary() + test_gemm_multi_bit_unsigned() + test_gemm_multi_bit_signed() + +def test_tile(): + def test_hmm_sim(): + hcl.init() + b_n = 10 + d_n = 256 + X = hcl.placeholder((b_n,), 'X', dtype=hcl.UInt(64)) + Y = hcl.placeholder((d_n,), 'Y', dtype=hcl.UInt(64)) + def kernel(X, Y): + return hlib.ppac.hmm_sim(X, Y, name='Z') + s = hcl.create_schedule([X, Y], kernel) + ir = str(hcl.lower(s)) + assert ('\"_batch_num\"=' + str(b_n)) in ir + assert ('\"_in_block_num\"=' + str(1)) in ir + assert ('\"_out_channel_num\"=' + str(d_n)) in ir + + def test_gemm_binary(): + hcl.init() + b_n, i_c, o_c = 64, 256, 256 + ppac_config = hlib.ppac.PPAC_config(multi_bit=False) + data = hcl.placeholder((b_n, i_c), 'd', dtype=hcl.UInt(1)) + weight = hcl.placeholder((o_c, i_c), 'w', dtype=hcl.UInt(1)) + def kernel(d, w): + return hlib.ppac.gemm_binary(d, w, 'res') + s = hcl.create_schedule([data, weight], kernel) + ir = str(hcl.lower(s)) + assert ('\"_batch_num\"=' + str(b_n)) in ir + assert ('\"_in_block_num\"=' + str(i_c // ppac_config.elem_num)) in ir + assert ('\"_out_channel_num\"=' + str(o_c)) in ir + + def test_gemm_multi_bit(): + hcl.init() + b_n, i_c, o_c = 64, 256, 256 + ppac_config = hlib.ppac.PPAC_config(multi_bit=True) + data = hcl.placeholder((b_n, i_c), 'd', dtype=hcl.Int(8)) + weight = hcl.placeholder((o_c, i_c), 'w', dtype=hcl.Int(8)) + def kernel(d, w): + return hlib.ppac.gemm_multi_bit(d, w, 'res') + s = hcl.create_schedule([data, weight], kernel) + ir = str(hcl.lower(s)) + assert ('\"_batch_num\"=' + str(b_n)) in ir + assert ('\"_in_block_num\"=' + str(i_c // ppac_config.elem_num)) in ir + assert ('\"_out_channel_num\"=' + str(o_c)) in ir + + test_hmm_sim() + test_gemm_binary() + test_gemm_multi_bit() \ No newline at end of file diff --git a/tvm/src/codegen/ppac/build_rv64_ppac.cc b/tvm/src/codegen/ppac/build_rv64_ppac.cc new file mode 100644 index 000000000..c14a1cdf3 --- /dev/null +++ b/tvm/src/codegen/ppac/build_rv64_ppac.cc @@ -0,0 +1,32 @@ +/* + * \file build_rv64_ppac.cc + */ + +#include "./codegen_rv64_ppac.h" +#include "../build_common.h" + +namespace TVM{ +namespace codegen{ + +std::string BuildRV64PPAC(Array funcs) { + CodeAnalysMerlinC ca; + CodeGenRV64PPAC cg; + for (LoweredFunc f: funcs) { + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); + cg.AddFunction(f, map_arg_type); + } + std::string code = cg.Finish(); + + LOG(WARNING) << "RV64_PPAC backend doesn't have runtime, return kernel code"; + return code; +} + +TVM_REGISTER_API("codegen.build_rv64_ppac") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildRV64PPAC(args[0]); + }); + +} // namespace codegen +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/ppac/codegen_rv64_ppac.cc b/tvm/src/codegen/ppac/codegen_rv64_ppac.cc new file mode 100644 index 000000000..1fd5e2b6e --- /dev/null +++ b/tvm/src/codegen/ppac/codegen_rv64_ppac.cc @@ -0,0 +1,202 @@ +/* + * \file codegen_rv64_ppac.cc + */ + +#include +#include +#include +#include +#include +#include +#include +#include "./codegen_rv64_ppac.h" +#include "../build_common.h" + +namespace TVM { +namespace codegen { + +void CodeGenRV64PPAC::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + // Write entry function name + this->stream << "void " << f->name << "("; + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + PrintType(std::get<1>(arg), this->stream); + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + this->stream << "}\n\n"; +} + +void CodeGenRV64PPAC::VisitStmt_(const For* op) { + std::string func_name; + bool is_ppac_func = false; + uint8_t i = 0; + for (auto key: op->annotate_keys) { + if (auto str = key.as()) { + if (str->value == "_ppac_func_name") { + auto name = op->annotate_values[i].as(); + func_name = name->value; + is_ppac_func = true; + break; + } + } + ++i; + } + if (is_ppac_func) { + // scan along the annotate list to find parameters + std::string ret, arg0, arg1; + int batch_num, in_block_num, out_channel_num; + i = 0; + uint8_t param_num = 0; + for (auto key: op->annotate_keys) { + if (auto str = key.as()) { + if (str->value == "_ret") { + auto v = op->annotate_values[i].as(); + ret = v->value; + ++param_num; + } else if (str->value == "_arg0") { + auto v = op->annotate_values[i].as(); + arg0 = v->value; + ++param_num; + } else if (str->value == "_arg1") { + auto v = op->annotate_values[i].as(); + arg1 = v->value; + ++param_num; + } else if (str->value == "_batch_num") { + auto v = op->annotate_values[i].as(); + batch_num = v->value; + ++param_num; + } else if (str->value == "_in_block_num") { + auto v = op->annotate_values[i].as(); + in_block_num = v->value; + ++param_num; + } else if (str->value == "_out_channel_num") { + auto v = op->annotate_values[i].as(); + out_channel_num = v->value; + ++param_num; + } + } + ++i; + } + if (param_num != 6) { + LOG(FATAL) << "PPAC function call need exactly 6 parameters but found " << param_num; + } + // print ppac function call + PrintIndent(); + stream << func_name << "(" + << ret << ", " + << arg0 << ", " + << arg1 << ", " + << batch_num << ", " + << in_block_num << ", " + << out_channel_num + << ");\n"; + return; + } + CodeGenC::VisitStmt_(op); +} + +void CodeGenRV64PPAC::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + +void CodeGenRV64PPAC::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { + CHECK_EQ(t.lanes(), 1) + << "do not support vector types"; + if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + if (t.bits() <= 8) { + os << "uint8_t"; return; + } else if (t.bits() <= 16) { + os << "uint16_t"; return; + } else if (t.bits() <= 32) { + os << "uint32_t"; return; + } else if (t.bits() <= 64) { + os << "uint64_t"; return; + } else { + LOG(WARNING) << "Casting type " << t << " to uint64_t"; + os << "uint64_t"; + return; + } + } + else if (t.is_int()) { + if (t.bits() <= 8) { + os << "int8_t"; return; + } else if (t.bits() <= 16) { + os << "int16_t"; return; + } else if (t.bits() <= 32) { + os << "int32_t"; return; + } else if (t.bits() <= 64) { + os << "int64_t"; return; + } else { + LOG(WARNING) << "Casting type " << t << " to int64_t"; + os << "int64_t"; + return; + } + } + } + os << t; +} + +} //namespace codegen +} //namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/ppac/codegen_rv64_ppac.h b/tvm/src/codegen/ppac/codegen_rv64_ppac.h new file mode 100644 index 000000000..881bdea05 --- /dev/null +++ b/tvm/src/codegen/ppac/codegen_rv64_ppac.h @@ -0,0 +1,28 @@ +/* + * \file codegen_rv64_ppac.h + */ + +#ifndef TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ +#define TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ + +#include +#include +#include "../codegen_c.h" +#include "../merlinc/codeanalys_merlinc.h" + +namespace TVM { +namespace codegen { + +class CodeGenRV64PPAC : public CodeGenC { + public: + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + void PrintType(Type t, std::ostream& os) override; + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const IfThenElse* op) override; + void VisitStmt_(const For* op) override; +}; + +} // namespace codegen +} // namespace TVM + +#endif //TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ \ No newline at end of file From 8afdea8d945b598c03fe93be62631a67bcb9d96f Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Mon, 2 Sep 2019 10:57:01 -0400 Subject: [PATCH 024/103] discard the new for-loop type; include ppac in hlib --- hlib/python/hlib/__init__.py | 2 +- python/heterocl/tvm/schedule.py | 14 +- python/heterocl/tvm/stmt.py | 1 - tvm/HalideIR/src/ir/Expr.h | 3 +- tvm/HalideIR/src/ir/IRPrinter.cpp | 3 - tvm/include/tvm/expr.h | 4 +- tvm/include/tvm/schedule.h | 5 +- tvm/src/api/api_lang.cc | 2 +- tvm/src/codegen/build_rv64_ppac.cc | 31 ---- tvm/src/codegen/codegen_rv64_ppac.cc | 195 -------------------------- tvm/src/codegen/codegen_rv64_ppac.h | 34 ----- tvm/src/codegen/llvm/codegen_cpu.cc | 3 +- tvm/src/op/op_util.cc | 1 - tvm/src/schedule/compute_primitive.cc | 1 - tvm/src/schedule/schedule_lang.cc | 11 +- 15 files changed, 11 insertions(+), 299 deletions(-) delete mode 100644 tvm/src/codegen/build_rv64_ppac.cc delete mode 100644 tvm/src/codegen/codegen_rv64_ppac.cc delete mode 100644 tvm/src/codegen/codegen_rv64_ppac.h diff --git a/hlib/python/hlib/__init__.py b/hlib/python/hlib/__init__.py index 416239f3a..3d0ef336b 100644 --- a/hlib/python/hlib/__init__.py +++ b/hlib/python/hlib/__init__.py @@ -1 +1 @@ -from . import nn +from . import nn, ppac diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 458e8e1b9..7bbbe0923 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -612,7 +612,7 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) - def pragma(self, var, pragma_type, annotate_key=None, annotate_value=None): + def pragma(self, var, pragma_type): """Annotate the iteration with pragma This will translate to a pragma_scope surrounding @@ -627,12 +627,6 @@ def pragma(self, var, pragma_type, annotate_key=None, annotate_value=None): pragma_type : str The pragma string to be annotated - annotate_key : str (optional) - The attribute key to be annotated - - annotate_value : Expr (optional) - The attribute value to be annotated - Note ---- @@ -662,12 +656,10 @@ def pragma(self, var, pragma_type, annotate_key=None, annotate_value=None): Hint parallel loop to execute in strided pattern. :code:`for (int i = task_id; i < end; i += num_task)` - - - **PPAC_MVPb_func** - PPAC backend binary matrix-vector product kernel function + """ - _api_internal._StagePragma(self, var, pragma_type, annotate_key, annotate_value) + _api_internal._StagePragma(self, var, pragma_type) def prefetch(self, tensor, var, offset): """Prefetch the specified variable diff --git a/python/heterocl/tvm/stmt.py b/python/heterocl/tvm/stmt.py index d0f281797..4db84970f 100644 --- a/python/heterocl/tvm/stmt.py +++ b/python/heterocl/tvm/stmt.py @@ -38,7 +38,6 @@ class For(Stmt): Vectorized = 2 Unrolled = 3 Pipelined = 4 - PPACFuncLoop = 5 @register_node class Store(Stmt): diff --git a/tvm/HalideIR/src/ir/Expr.h b/tvm/HalideIR/src/ir/Expr.h index 850a0800e..b78a466ed 100644 --- a/tvm/HalideIR/src/ir/Expr.h +++ b/tvm/HalideIR/src/ir/Expr.h @@ -292,8 +292,7 @@ enum class ForType : int { Parallel = 1, Vectorized = 2, Unrolled = 3, - Pipelined = 4, - PPACFuncLoop = 5 + Pipelined = 4 }; /** An enum describing the partition type */ diff --git a/tvm/HalideIR/src/ir/IRPrinter.cpp b/tvm/HalideIR/src/ir/IRPrinter.cpp index ac5fe6f52..6a3a5d651 100644 --- a/tvm/HalideIR/src/ir/IRPrinter.cpp +++ b/tvm/HalideIR/src/ir/IRPrinter.cpp @@ -67,9 +67,6 @@ ostream &operator<<(ostream &out, const ForType &type) { case ForType::Pipelined: out << "pipelined"; break; - case ForType::PPACFuncLoop: - out << "PPACFuncLoop"; - break; } return out; } diff --git a/tvm/include/tvm/expr.h b/tvm/include/tvm/expr.h index f7fb31d22..233e6a172 100644 --- a/tvm/include/tvm/expr.h +++ b/tvm/include/tvm/expr.h @@ -195,8 +195,7 @@ enum IterVarType : int { * \brief Marks boundary of tensorization intrinsic. */ kTensorized = 8, - kPipelined = 9, - kPPACFuncLoop = 10 + kPipelined = 9 }; /*! @@ -300,7 +299,6 @@ inline const char* IterVarType2String(IterVarType t) { case kParallelized: return "Parallelized"; case kTensorized: return "Tensorized"; case kPipelined: return "Pipelined"; - case kPPACFuncLoop: return "PPACFuncLoop"; } return "Unknown"; } diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index 422c23929..9dc1956c8 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -219,10 +219,7 @@ class Stage : public NodeRef { * * \return reference to self. */ - EXPORT Stage& pragma(IterVar var, - const std::string& pragma_type, - const std::string& annotate_key, - const Expr& annotate_value); // NOLINT(*) + EXPORT Stage& pragma(IterVar var, const std::string& pragma_type); // NOLINT(*) /*! * \brief Fetch data in advance. * \param domain the tensor to be prefetched diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index 048b98426..f07d590a5 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -393,7 +393,7 @@ TVM_REGISTER_API("_StageStencil") TVM_REGISTER_API("_StagePragma") .set_body([](TVMArgs args, TVMRetValue* ret) { args[0].operator Stage() - .pragma(args[1], args[2], args[3], args[4]); + .pragma(args[1], args[2]); }); TVM_REGISTER_API("_StagePrefetch") diff --git a/tvm/src/codegen/build_rv64_ppac.cc b/tvm/src/codegen/build_rv64_ppac.cc deleted file mode 100644 index 2ee3713b3..000000000 --- a/tvm/src/codegen/build_rv64_ppac.cc +++ /dev/null @@ -1,31 +0,0 @@ -/* - author Guyue Huang (gh424@cornell.edu) - */ - -#include "./codegen_rv64_ppac.h" -#include "./build_common.h" - -namespace TVM{ -namespace codegen{ - -std::string BuildRV64PPAC(Array funcs) { - //CodeAnalysMerlinC ca; - CodeGenRV64PPAC cg; - for (LoweredFunc f: funcs) { - //ca.AddFunction(f); - //str2tupleMap map_arg_type; - //map_arg_type = ca.Finish(); - cg.AddFunction(f); - } - std::string code = cg.Finish(); - LOG(WARNING) << "RV64_PPAC backend doesn't yet have runtime, return kernel code"; - return code; -} - -TVM_REGISTER_API("codegen.build_rv64_ppac") -.set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = BuildRV64PPAC(args[0]); - }); - -} -} \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.cc b/tvm/src/codegen/codegen_rv64_ppac.cc deleted file mode 100644 index 661fe88a5..000000000 --- a/tvm/src/codegen/codegen_rv64_ppac.cc +++ /dev/null @@ -1,195 +0,0 @@ -/* - author Guyue Huang (gh424@cornell.edu) - */ - -#include -#include -#include -#include -#include -#include -#include -#include "./codegen_rv64_ppac.h" -#include "./build_common.h" - -namespace TVM { -namespace codegen { - -/* -void CodeGenRV64PPAC::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - - this->InitFuncState(f); - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - // Write entry function name - this->stream << "void " << f->name << "("; - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - PrintType(std::get<1>(arg), this->stream); - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - //range_ = CollectIterRange(f->body); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - this->stream << "}\n\n"; -} -*/ -void CodeGenRV64PPAC::PrintMVPb(const For* op, std::string m, bool compacted) { - PrintIndent(); - stream << "WHERE SUPPOSED TO BE MVPb KERNEL\n" << "We get M! m = " << m << "\n"; -} - -void CodeGenRV64PPAC::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::PPACFuncLoop) { - int i = 0, matrix_m = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto m = op->annotate_values[i].as(); - if (str->value == "matrix_row_num" && m != nullptr && m->value > 0) { - matrix_m = m->value; - break; - } - } - } - i++; - if (matrix_m > 0) { - os << matrix_m; - PrintMVPb(op, os.str(), false); - return; - } - } - CodeGenC::VisitStmt_(op); -} - -void CodeGenRV64PPAC::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - -void CodeGenRV64PPAC::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - - - -void CodeGenRV64PPAC::PrintType(Type t, std::ostream& os) { - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_uint() || t.is_int() || t.is_fixed() || t.is_ufixed()) { - if (t.is_uint()) { - if (t.bits() == 1) { - os << "int"; return; - } else if (t.bits() <= 32) { - os << "uint32_t"; return; - } else if (t.bits() <= 64) { - os << "uint64_t"; return; - } else { - os << "int"; return; - } - } - else if (t.is_int()) { - if (t.bits() == 1) { - os << "int"; return; - } else if (t.bits() <= 32) { - os << "int32_t"; return; - } else if (t.bits() <= 64) { - os << "int64_t"; return; - } else { - os << "int"; return; - } - } - else if (t.is_ufixed() && t.fracs()==0 ) { - if (t.bits() <= 8) { - os << "uint8_t"; return; - } - else if (t.bits() <= 16) { - os << "uint16_t"; return; - } - else if (t.bits() <= 32) { - os << "uint32_t"; return; - } - else if (t.bits() <= 64) { - os << "uint64_t"; return; - } - else { - os << "uint64_t"; - LOG(WARNING) << "Casting type " << t << " to int64_t"; - return; - } - } else if (t.fracs()==0 ) { - if (t.bits() <= 8) { - os << "int8_t"; return; - } - else if (t.bits() <= 16) { - os << "int16_t"; return; - } - else if (t.bits() <= 32) { - os << "int32_t"; return; - } - else if (t.bits() <= 64) { - os << "int64_t"; return; - } - else { - os << "int64_t"; - LOG(WARNING) << "Casting type " << t << " to int64_t"; - return; - } - } - } - os << t; - //LOG(FATAL) << "Cannot convert type " << t << " to C type"; -} - -} //namespace codegen -} //namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/codegen_rv64_ppac.h b/tvm/src/codegen/codegen_rv64_ppac.h deleted file mode 100644 index 4f51c2040..000000000 --- a/tvm/src/codegen/codegen_rv64_ppac.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - author Guyue Huang (gh424@cornell.edu) - */ - -#ifndef TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ -#define TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ - -#include -#include -#include "./merlinc/codeanalys_merlinc.h" -#include "./codegen_c.h" - -namespace TVM { -namespace codegen { - -class CodeGenRV64PPAC : public CodeGenC { - public: - //void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void PrintMVPb(const For* op, std::string m, bool compacted); - void VisitStmt_(const For* op); - void PrintType(Type t, std::ostream& os) override; - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const IfThenElse* op) override; - //void VisitStmt_(const Allocate* op) override; - //std::map > var_shape_map_; - - protected: - //std::string GetBufferRef(Type t, const Variable* buffer, Expr index); -}; - -} -} - -#endif //TVM_CODEGEN_CODEGEN_RV64_PPAC_H_ \ No newline at end of file diff --git a/tvm/src/codegen/llvm/codegen_cpu.cc b/tvm/src/codegen/llvm/codegen_cpu.cc index fe92530f4..796fa9d4b 100644 --- a/tvm/src/codegen/llvm/codegen_cpu.cc +++ b/tvm/src/codegen/llvm/codegen_cpu.cc @@ -719,8 +719,7 @@ void CodeGenCPU::VisitStmt_(const For* op) { CHECK(is_zero(op->min)); if (op->for_type == ForType::Serial || op->for_type == ForType::Unrolled || - op->for_type == ForType::Pipelined || - op->for_type == ForType::PPACFuncLoop ) { + op->for_type == ForType::Pipelined) { CodeGenLLVM::VisitStmt_(op); } else if (op->for_type == ForType::Parallel) { if (parallel_env_.penv == nullptr) { diff --git a/tvm/src/op/op_util.cc b/tvm/src/op/op_util.cc index 5dac3d853..9cf9e6713 100644 --- a/tvm/src/op/op_util.cc +++ b/tvm/src/op/op_util.cc @@ -68,7 +68,6 @@ MakeLoopNest(const Stage& stage, case kDataPar: break; case kTensorized: break; case kPipelined: break; - case kPPACFuncLoop: break; default: LOG(FATAL) << "Unknown iter type" << it_attr->iter_type << " in the iter_var_attrs"; diff --git a/tvm/src/schedule/compute_primitive.cc b/tvm/src/schedule/compute_primitive.cc index 709b722bd..ae59872b3 100644 --- a/tvm/src/schedule/compute_primitive.cc +++ b/tvm/src/schedule/compute_primitive.cc @@ -205,7 +205,6 @@ class IterVarAttrUpdater final : public IRMutator { case kVectorized: for_type = ForType::Vectorized; break; case kParallelized: for_type = ForType::Parallel; break; case kPipelined: for_type = ForType::Pipelined; break; - case kPPACFuncLoop: for_type = ForType::PPACFuncLoop; break; case kDataPar: break; case kTensorized: break; default: LOG(FATAL) << "Unknown iter type" << node_->iter_type; diff --git a/tvm/src/schedule/schedule_lang.cc b/tvm/src/schedule/schedule_lang.cc index 22c47819f..624c159a1 100644 --- a/tvm/src/schedule/schedule_lang.cc +++ b/tvm/src/schedule/schedule_lang.cc @@ -511,24 +511,17 @@ Stage& Stage::stencil(int burst_width, int unroll_factor, int num_iteration) { / return *this; } -Stage& Stage::pragma(IterVar var, const std::string& pragma_type, - const std::string& annotate_key, const Expr& annotate_value) { // NOLINT(*) +Stage& Stage::pragma(IterVar var, const std::string& pragma_type) { // NOLINT(*) if (pragma_type == "unroll") { this->unroll(var); } else if (pragma_type == "vectorize") { this->vectorize(var); - } else if (pragma_type == "PPAC_MVPb_func"){ + } else { /* UpdateIterVarAttr(operator->(), var, [pragma_type](IterVarAttrNode* n) { n->pragmas.push_back(ir::StringImm::make(pragma_type)); }); */ - std::shared_ptr node = std::make_shared(); - node->iter_type = kPPACFuncLoop; - node->for_loop_annotate_keys.push_back(ir::StringImm::make(annotate_key)); - node->for_loop_annotate_values.push_back(annotate_value); - SetIterVarAttr(operator->(), var, node.get()); - return *this; } return *this; } From 819eae8a8009cc96e6b2a84588975598c3924ca8 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Mon, 2 Sep 2019 11:01:08 -0400 Subject: [PATCH 025/103] discard some previous changes --- python/heterocl/tvm/schedule.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 7bbbe0923..06a49cc78 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -626,7 +626,6 @@ def pragma(self, var, pragma_type): pragma_type : str The pragma string to be annotated - Note ---- @@ -655,8 +654,7 @@ def pragma(self, var, pragma_type): - **parallel_stride_pattern** Hint parallel loop to execute in strided pattern. - :code:`for (int i = task_id; i < end; i += num_task)` - + :code:`for (int i = task_id; i < end; i += num_task)` """ _api_internal._StagePragma(self, var, pragma_type) From 4ee0a9345404d808ce939f9c2cb5143392457042 Mon Sep 17 00:00:00 2001 From: hgyhungry Date: Tue, 3 Sep 2019 09:02:27 -0400 Subject: [PATCH 026/103] Use int64_t as return type of GeMM on ppac --- hlib/python/hlib/ppac.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hlib/python/hlib/ppac.py b/hlib/python/hlib/ppac.py index 3754b3cf2..cbfd473e0 100644 --- a/hlib/python/hlib/ppac.py +++ b/hlib/python/hlib/ppac.py @@ -136,7 +136,7 @@ def _assign_val(*args): def _mvpodd_reduce(*args): """compute {1, -1} dot product on packed data.""" - temp = hcl.local(0, name='mvpodd_acc', dtype=hcl.UInt(64)) + temp = hcl.local(0, name='mvpodd_acc', dtype=hcl.Int(64)) with hcl.for_(0, in_block_num) as o: with hcl.for_(0, block_size) as i: temp[0] += tvm.popcount(d_packed[args[0], i+block_size*o] ^ w_packed[args[1], i+block_size*o]) @@ -145,7 +145,7 @@ def _mvpodd_reduce(*args): d_packed = _bin_pack_uint8(d) w_packed = _bin_pack_uint8(w) - return hcl.compute(res_shape, _mvpodd_reduce, name=res_name, dtype=hcl.UInt(64), + return hcl.compute(res_shape, _mvpodd_reduce, name=res_name, dtype=hcl.Int(64), attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(ppac_config.func_call[1])), (ppac_params.ret, tvm.make.StringImm(res_name)), (ppac_params.arg0, tvm.make.StringImm(d_packed.name)), @@ -178,6 +178,7 @@ def gemm_multi_bit(d, w, name=None): "only support data with size of times of " + str(ppac_config.elem_num) res_name = name if name else 'res' + res_dtype = hcl.UInt(64) if ('u' in d.dtype) else hcl.Int(64) batch_num = d.shape[0] in_channel_num = d.shape[1] in_block_num = in_channel_num // ppac_config.elem_num @@ -188,7 +189,7 @@ def gemm_multi_bit(d, w, name=None): r = hcl.reduce_axis(0, in_channel_num, name='k') return hcl.compute(res_shape, lambda i, j: hcl.sum(d[i, r] * w[j, r], axis=r), - name=res_name, dtype=hcl.UInt(64), + name=res_name, dtype=res_dtype, attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(func_name)), (ppac_params.ret, tvm.make.StringImm(res_name)), (ppac_params.arg0, tvm.make.StringImm(d.name)), From 66851f0868ec02bf5e0db2faa18dbabae77545a5 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 3 Sep 2019 17:19:49 -0400 Subject: [PATCH 027/103] [add] codegenc kernedef + stream init --- Makefile | 8 +- python/heterocl/schedule.py | 25 ++++++ python/heterocl/tvm/device.py | 16 ++++ python/heterocl/tvm/schedule.py | 16 ++++ python/heterocl/tvm/stmt.py | 6 ++ samples/stream/primitive.py | 27 +++++++ tvm/src/api/api_lang.cc | 7 ++ tvm/src/codegen/codegen_c.cc | 81 +++++++++++++++++-- tvm/src/codegen/codegen_c.h | 12 +++ tvm/src/codegen/codegen_source_base.cc | 20 +++++ tvm/src/codegen/codegen_source_base.h | 13 +++ tvm/src/codegen/merlinc/codeanalys_merlinc.cc | 4 - tvm/src/schedule/schedule_dataflow_rewrite.cc | 13 +++ 13 files changed, 235 insertions(+), 13 deletions(-) create mode 100644 python/heterocl/tvm/device.py create mode 100644 samples/stream/primitive.py diff --git a/Makefile b/Makefile index 88c653d77..c972857c7 100644 --- a/Makefile +++ b/Makefile @@ -12,15 +12,15 @@ build-tvm: build-pkgs build-hcl: build-tvm cd python; \ - python setup.py install --user; \ + python setup.py develop; \ cd ../hlib/python; \ - python setup.py install --user; + python setup.py develop; build-python: cd python; \ - python setup.py install --user; \ + python setup.py develop; \ cd ../hlib/python; \ - python setup.py install --user; + python setup.py develop; clean: rm -rf build diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index abd74acdc..419262540 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -134,6 +134,31 @@ def reuse_at(self, target, parent, axis, name=None): name = target.name + ".reuse" return self.sch.reuse_at(target, parent, axis, name) + def to(self, tensors, place=_stmt.Stream.FPGA): + """Stream a list of Tensors to dst devices + + Parameters + ---------- + tensors : list of Tensor + The tensors to be moved + + stream_type : {FIFO, Channel, Burst}, optional + The stream type + """ + if place > 2: + raise APIError("Invalid device type") + rets = [] + for tensor in tensors: + try: + target = target.tensor + except (AttributeError, ValueError): + try: + target = target._op + except AttributeError: + pass + rets.append(self.sch.stream(tensor, place)) + return rets + def partition(self, target, partition_type=_stmt.Partition.Complete, dim=0, factor=0): """Partition a Tensor into smaller Tensors or even registers diff --git a/python/heterocl/tvm/device.py b/python/heterocl/tvm/device.py new file mode 100644 index 000000000..194d71850 --- /dev/null +++ b/python/heterocl/tvm/device.py @@ -0,0 +1,16 @@ + +class device(object): + def __init__(self, name): + self.name = name + def __str__(self): + return self.name + def __repr__(self): + return self.__str__() + +class cpu(device): + def __init__(self): + super(cpu, self).__init__("cpu") + +class fpga(device) + def __init__(self): + super(cpu, self).__init__("fpga") diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 21905b443..76724d978 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -332,6 +332,9 @@ def reuse_at(self, target, parent, axis, name): def partition(self, target, partition_type, dim, factor): return _api_internal._SchedulePartition(self, target, dim, factor, partition_type) + def stream(self, tensor, stream_type): + return _api_internal._ScheduleStream(self, tensor,stream_type) + @register_node("Stage") class _Stage(NodeBase): """A Stage represents schedule for one operation. @@ -612,6 +615,19 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) + def stream_to(self, var, place, depth=10): + """Stream var to devices. + + Parameters + ---------- + var : IterVar + The iteration to be streamed. + + place : str + The device to be + """ + _api_internal._StageStreamTo(self, place, channel, depth) + def pragma(self, var, pragma_type): """Annotate the iteration with pragma diff --git a/python/heterocl/tvm/stmt.py b/python/heterocl/tvm/stmt.py index 4db84970f..9dfd24b74 100644 --- a/python/heterocl/tvm/stmt.py +++ b/python/heterocl/tvm/stmt.py @@ -112,3 +112,9 @@ class Partition(Stmt): @register_node class Stencil(Stmt): pass + +@register_node +class Stream(Stmt): + CPU = 0 + FPGA = 1 + diff --git a/samples/stream/primitive.py b/samples/stream/primitive.py new file mode 100644 index 000000000..6a72740d9 --- /dev/null +++ b/samples/stream/primitive.py @@ -0,0 +1,27 @@ +import heterocl as hcl + +hcl.init() +initiation_interval = 4 +a = hcl.placeholder((10, 20)) +b = hcl.placeholder((10, 20)) + +@hcl.def_([a.shape, b.shape, (), ()]) +def ret_add(A, B, x, y): + hcl.return_(A[x, y] + B[x, y]) + +@hcl.def_([a.shape, b.shape, (), ()]) +def ret_mul(A, B, x, y): + hcl.return_(A[x, y] * B[x, y]) + +c = hcl.compute(a.shape, lambda i, j: ret_add(a, b, i, j)) +d = hcl.compute(b.shape, lambda i, j: ret_mul(a, b, i, j)) +s = hcl.create_schedule([a, b, c, d]) + +s[c].pipeline(c.axis[0], initiation_interval) +# s[c].stream_to(hcl.FPGA) + +print(hcl.lower(s)) +code = hcl.build(s, target="vhls") +print(code) + + diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index f07d590a5..85f383826 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -461,6 +461,13 @@ TVM_REGISTER_API("_SchedulePartition") static_cast(args[4].operator int())); }); +// TVM_REGISTER_API("_ScheduleStream") +// .set_body([](TVMArgs args, TVMRetValue *ret) { +// *ret = args[0].operator Schedule() +// .stream(args[1], +// static_cast(args[2].operator int())); +// }); + TVM_REGISTER_API("_ScheduleReshape") .set_body([](TVMArgs args, TVMRetValue *ret) { args[0].operator Schedule().reshape(args[1], args[2]); diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 7373711f4..0edb13d8e 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -21,6 +21,7 @@ void CodeGenC::InitFuncState(LoweredFunc f) { handle_data_type_.clear(); CodeGenSourceBase::ClearFuncState(); } + void CodeGenC::AddFunction(LoweredFunc f) { // clear previous generated state. this->InitFuncState(f); @@ -31,6 +32,7 @@ void CodeGenC::AddFunction(LoweredFunc f) { RegisterHandleType(kv.first.get(), kv.second.type()); } + // second move to generate this->stream << "void " << f->name << "("; for (size_t i = 0; i < f->args.size(); ++i) { Var v = f->args[i]; @@ -66,7 +68,7 @@ void CodeGenC::AddFunction(LoweredFunc f) { } std::string CodeGenC::Finish() { - return decl_stream.str() + stream.str(); + return decl_stream.str() + module_stream.str() + stream.str(); } void CodeGenC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) @@ -722,11 +724,16 @@ void CodeGenC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) } void CodeGenC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support"; + LOG(FATAL) << "Quantize is not yet support"; } void CodeGenC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "KernelExpr is not yet support"; + os << op->name << "("; + for (size_t i = 0; i < op->args.size(); ++i) { + PrintExpr(op->args[i], os); + if (i != op->args.size() - 1) os << ", "; + } + os << ")"; } void CodeGenC::VisitStmt_(const LetStmt* op) { @@ -889,8 +896,55 @@ void CodeGenC::VisitStmt_(const ProducerConsumer *op) { PrintStmt(op->body); } -void CodeGenC::VisitStmt_(const KernelDef *op) { - LOG(FATAL) << "KernelDef is not yet support"; +void CodeGenC::VisitStmt_(const KernelDef* op) { + CodeGenC* cg; + LoweredFunc f; + cg->InitFuncState(f); + // skip the first underscore + cg->GetUniqueName("_"); + // add to alloc buffer : type. + for (const auto & k : op->args) { + cg->RegisterHandleType(k.get(), k.get()->type); + } + + // print function signature + PrintType(op->ret_type, cg->stream); + cg->stream << " " << op->name << "("; + cg->stream << " " << op->name << "("; + for (size_t i = 0; i < op->args.size(); ++i) { + VarExpr v = op->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) cg->stream << ", "; + if (v.type().is_handle()) { + auto it = alloc_storage_scope_.find(v.get()); + if (it != alloc_storage_scope_.end()) + PrintStorageScope(it->second, cg->stream); + cg->stream << ' '; + + if (handle_data_type_.count(v.get())) { + PrintType(handle_data_type_.at(v.get()), cg->stream); + } else { + cg->stream << "void"; + } + cg->stream << "*"; + // if (f->is_restricted && restrict_keyword_.length() != 0) { + // stream << ' ' << restrict_keyword_; + // } + } else { + PrintType(v.type(), cg->stream); + } + cg->stream << ' ' << vid; + } + cg->stream << ") {\n"; + int func_scope = this->BeginScope(); + cg->PrintStmt(op->body); + cg->EndScope(func_scope); + cg->PrintIndent(); + cg->stream << "}\n\n"; + + // write code into cpp files + std::string code = cg->Finish(); + module_stream << code; } void CodeGenC::VisitStmt_(const KernelStmt *op) { @@ -922,5 +976,22 @@ void CodeGenC::VisitStmt_(const While *op) { void CodeGenC::VisitStmt_(const Partition* op) { } +void CodeGenC::SaveFuncState(LoweredFunc f) { + // clear save info copy + alloc_storage_scope_save.clear(); + handle_data_type_save.clear(); + // backup func info and clear + alloc_storage_scope_save = alloc_storage_scope_; + handle_data_type_save = handle_data_type_; + CodeGenSourceBase::SaveFuncState(); +} + +void CodeGenC::RestoreFuncState(LoweredFunc f) { + this->InitFuncState(f); + alloc_storage_scope_ = alloc_storage_scope_save; + handle_data_type_ = handle_data_type_save; + CodeGenSourceBase::RestoreFuncState(); +} + } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index f579ca579..7e0a94e13 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -15,6 +15,7 @@ #include #include #include "./codegen_source_base.h" +#include "../runtime/thread_storage_scope.h" namespace TVM { namespace codegen { @@ -163,6 +164,9 @@ class CodeGenC : virtual std::string CastFromTo(std::string value, Type from, Type target); protected: + void SaveFuncState(LoweredFunc f); + void RestoreFuncState(LoweredFunc f); + // Print reference to struct location std::string GetStructRef( Type t, const Expr& buffer, const Expr& index, int kind); @@ -192,6 +196,14 @@ class CodeGenC : std::unordered_map handle_data_type_; std::unordered_map buf_length_map_; + // save for kernel gen + std::unordered_map alloc_storage_scope_save; + std::unordered_map handle_data_type_save; + std::unordered_map var_idmap_save; + std::unordered_map name_alloc_map_save; + std::unordered_map ssa_assign_map_save; + std::vector scope_mark_save; + private: /*! \brief whether to print in SSA form */ bool print_ssa_form_{false}; diff --git a/tvm/src/codegen/codegen_source_base.cc b/tvm/src/codegen/codegen_source_base.cc index 0df1ad276..28c5b99f0 100644 --- a/tvm/src/codegen/codegen_source_base.cc +++ b/tvm/src/codegen/codegen_source_base.cc @@ -14,6 +14,26 @@ void CodeGenSourceBase::ClearFuncState() { scope_mark_.clear(); } +void CodeGenSourceBase::SaveFuncState() { + name_alloc_map_save.clear(); + ssa_assign_map_save.clear(); + var_idmap_save.clear(); + scope_mark_save.clear(); + // save state into private member + name_alloc_map_save = name_alloc_map_; + ssa_assign_map_save = ssa_assign_map_; + var_idmap_save = var_idmap_; + scope_mark_save = scope_mark_; +} + +void CodeGenSourceBase::RestoreFuncState() { + this->ClearFuncState(); + name_alloc_map_ = name_alloc_map_save; + ssa_assign_map_ = ssa_assign_map_save; + var_idmap_ = var_idmap_save; + scope_mark_ = scope_mark_save; +} + std::string CodeGenSourceBase::GetUniqueName(std::string prefix) { for (size_t i = 0; i < prefix.size(); ++i) { if (prefix[i] == '.') prefix[i] = '_'; diff --git a/tvm/src/codegen/codegen_source_base.h b/tvm/src/codegen/codegen_source_base.h index e140662c1..6700550d3 100644 --- a/tvm/src/codegen/codegen_source_base.h +++ b/tvm/src/codegen/codegen_source_base.h @@ -39,6 +39,10 @@ class CodeGenSourceBase { }; /*! \brief Clear the states that might relates to function generation */ void ClearFuncState(); + /*! \brief Save the states that might relates to function generation */ + void SaveFuncState(); + /*! \brief Restore the states that might relates to function generation */ + void RestoreFuncState(); /*! \brief print the current indented value */ void PrintIndent(); /*! @@ -89,8 +93,12 @@ class CodeGenSourceBase { std::ostringstream decl_stream; /*! \brief the stream to be printed */ std::ostringstream stream; + /*! \brief the stream for mocule */ + std::ostringstream module_stream; /*! \brief name of each variable */ std::unordered_map var_idmap_; + /*! \brief Save states as copy */ + std::unordered_map var_idmap_save; private: /*! \brief assignment map of ssa */ @@ -101,6 +109,11 @@ class CodeGenSourceBase { std::vector scope_mark_; /*! \brief The current indentation value */ int indent_{0}; + /*! \brief Save states as copy */ + std::unordered_map ssa_assign_map_save; + std::unordered_map name_alloc_map_save; + std::vector scope_mark_save; + }; /*! diff --git a/tvm/src/codegen/merlinc/codeanalys_merlinc.cc b/tvm/src/codegen/merlinc/codeanalys_merlinc.cc index 56b4e1d97..3bd835783 100644 --- a/tvm/src/codegen/merlinc/codeanalys_merlinc.cc +++ b/tvm/src/codegen/merlinc/codeanalys_merlinc.cc @@ -716,10 +716,8 @@ void CodeAnalysMerlinC::VisitExpr_(const Quantize *op, std::ostream& os) { // NO } void CodeAnalysMerlinC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "KernelExpr is not yet support"; } - void CodeAnalysMerlinC::VisitStmt_(const LetStmt* op) { // TODO comaniac //std::vector vec_var = GetNodesByType(op->value); @@ -882,11 +880,9 @@ void CodeAnalysMerlinC::VisitStmt_(const ProducerConsumer *op) { } void CodeAnalysMerlinC::VisitStmt_(const KernelDef *op) { - LOG(FATAL) << "KernelDef is not yet support"; } void CodeAnalysMerlinC::VisitStmt_(const KernelStmt *op) { - LOG(FATAL) << "KernelStmt is not yet support"; } void CodeAnalysMerlinC::VisitStmt_(const Return *op) { diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index b2bd520e7..d5d136814 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -181,6 +181,19 @@ Tensor Schedule::reuse_at(const Tensor& target, return reuse; } +// Tensor Schedule::stream(const Tensor& target, +// Type partition_type) { +// Stage target_stage = (*this)[target]; +// std::vector consumers; +// size_t num_stage = (*this)->stages.size(); +// size_t min_pos = num_stage; +// ArrayNode* stages = (*this)->stages.CopyOnWrite(); +// Buffer target_buffer; +// const PlaceholderOpNode* op = target_stage->op.as(); +// bool is_placeholder = op ? true : false; +// // check if it is a placeholder or not +// } + Tensor Schedule::partition(const Tensor& target, int dim, int factor, PartitionType partition_type) { Stage target_stage = (*this)[target]; From 63833c2d2072dfb3c43ed32f850ad58ead1b9096 Mon Sep 17 00:00:00 2001 From: Shawn Xiang Date: Wed, 4 Sep 2019 13:30:33 -0400 Subject: [PATCH 028/103] [add] var_shape_map --- Makefile | 8 ++-- samples/stream/primitive.py | 8 ++-- tvm/src/codegen/codegen_c.cc | 73 +++++++++++++++++------------------- tvm/src/codegen/codegen_c.h | 7 ++++ 4 files changed, 50 insertions(+), 46 deletions(-) diff --git a/Makefile b/Makefile index c972857c7..9508b9171 100644 --- a/Makefile +++ b/Makefile @@ -12,15 +12,15 @@ build-tvm: build-pkgs build-hcl: build-tvm cd python; \ - python setup.py develop; \ + python setup.py develop --user; \ cd ../hlib/python; \ - python setup.py develop; + python setup.py develop --user; build-python: cd python; \ - python setup.py develop; \ + python setup.py develop --user; \ cd ../hlib/python; \ - python setup.py develop; + python setup.py develop --user; clean: rm -rf build diff --git a/samples/stream/primitive.py b/samples/stream/primitive.py index 6a72740d9..7a39068bb 100644 --- a/samples/stream/primitive.py +++ b/samples/stream/primitive.py @@ -6,12 +6,12 @@ b = hcl.placeholder((10, 20)) @hcl.def_([a.shape, b.shape, (), ()]) -def ret_add(A, B, x, y): - hcl.return_(A[x, y] + B[x, y]) +def ret_add(a, b, x, y): + hcl.return_(a[x, y] + b[x, y]) @hcl.def_([a.shape, b.shape, (), ()]) -def ret_mul(A, B, x, y): - hcl.return_(A[x, y] * B[x, y]) +def ret_mul(a, b, x, y): + hcl.return_(a[x, y] * b[x, y]) c = hcl.compute(a.shape, lambda i, j: ret_add(a, b, i, j)) d = hcl.compute(b.shape, lambda i, j: ret_mul(a, b, i, j)) diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 0edb13d8e..6196f30e2 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -2,6 +2,7 @@ * Copyright (c) 2017 by Contributors * \file codegen_c.cc */ +#include #include #include #include "./codegen_c.h" @@ -25,8 +26,6 @@ void CodeGenC::InitFuncState(LoweredFunc f) { void CodeGenC::AddFunction(LoweredFunc f) { // clear previous generated state. this->InitFuncState(f); - // skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { RegisterHandleType(kv.first.get(), kv.second.type()); @@ -897,54 +896,52 @@ void CodeGenC::VisitStmt_(const ProducerConsumer *op) { } void CodeGenC::VisitStmt_(const KernelDef* op) { - CodeGenC* cg; LoweredFunc f; - cg->InitFuncState(f); + // save func states + SaveFuncState(f); + InitFuncState(f); + std::ostringstream save; + save << stream.rdbuf(); + stream.clear(); + // skip the first underscore - cg->GetUniqueName("_"); + GetUniqueName("_"); // add to alloc buffer : type. for (const auto & k : op->args) { - cg->RegisterHandleType(k.get(), k.get()->type); + RegisterHandleType(k.get(), k.get()->type); } // print function signature - PrintType(op->ret_type, cg->stream); - cg->stream << " " << op->name << "("; - cg->stream << " " << op->name << "("; + PrintType(op->ret_type, stream); + stream << " " << op->name << "("; for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; std::string vid = AllocVarID(v.get()); - if (i != 0) cg->stream << ", "; - if (v.type().is_handle()) { - auto it = alloc_storage_scope_.find(v.get()); - if (it != alloc_storage_scope_.end()) - PrintStorageScope(it->second, cg->stream); - cg->stream << ' '; - - if (handle_data_type_.count(v.get())) { - PrintType(handle_data_type_.at(v.get()), cg->stream); - } else { - cg->stream << "void"; + if (i != 0) stream << ", "; + auto arg = map_arg_type_[vid]; + PrintType(std::get<1>(arg), this->stream); + this->stream << ' ' << std::get<0>(arg); + const BufferNode* buf = f->api_args[i].as(); + if (v.type().is_handle() && buf) { + var_shape_map_[buf->data.get()] = buf->shape; + for (size_t i = 0; i < buf->shape.size(); i++) { + this->stream << '['; + this->PrintExpr(buf->shape[i], this->stream); + this->stream << ']'; } - cg->stream << "*"; - // if (f->is_restricted && restrict_keyword_.length() != 0) { - // stream << ' ' << restrict_keyword_; - // } - } else { - PrintType(v.type(), cg->stream); } - cg->stream << ' ' << vid; - } - cg->stream << ") {\n"; - int func_scope = this->BeginScope(); - cg->PrintStmt(op->body); - cg->EndScope(func_scope); - cg->PrintIndent(); - cg->stream << "}\n\n"; - - // write code into cpp files - std::string code = cg->Finish(); - module_stream << code; + } + stream << ") {\n"; + int func_scope = BeginScope(); + PrintStmt(op->body); + EndScope(func_scope); + PrintIndent(); + stream << "}\n\n"; + + // restore default stream + module_stream << stream.str(); + stream.clear(); + stream << save.rdbuf(); } void CodeGenC::VisitStmt_(const KernelStmt *op) { diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index 7e0a94e13..13e737106 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -21,6 +21,9 @@ namespace TVM { namespace codegen { using namespace ir; +template +using str2tupleMap = std::unordered_map>; + /*! * \brief A base class to generate C code. * @@ -162,6 +165,10 @@ class CodeGenC : const std::string& vec, Type t, int i, const std::string& value); // Get a cast type from to virtual std::string CastFromTo(std::string value, Type from, Type target); + // map from var to shape, range and type + std::map > var_shape_map_; + std::unordered_map range_; + str2tupleMap map_arg_type_; protected: void SaveFuncState(LoweredFunc f); From 69fd36f94645fb891f168190fe43db0ab2ab0840 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 5 Sep 2019 01:33:58 -0400 Subject: [PATCH 029/103] [update] kerneldef struct shape --- python/heterocl/dsl.py | 9 ++++++--- tvm/HalideIR/src/ir/IR.cpp | 6 +++++- tvm/HalideIR/src/ir/IR.h | 4 +++- tvm/HalideIR/src/ir/IRMutator.cpp | 2 +- tvm/HalideIR/src/ir/IRPrinter.cpp | 8 ++++++++ tvm/src/api/api_ir.cc | 2 +- tvm/src/codegen/codegen_c.cc | 14 ++++++-------- tvm/src/pass/ir_mutator.cc | 2 +- 8 files changed, 31 insertions(+), 16 deletions(-) diff --git a/python/heterocl/dsl.py b/python/heterocl/dsl.py index 6d42031f1..a7bc98f95 100644 --- a/python/heterocl/dsl.py +++ b/python/heterocl/dsl.py @@ -414,15 +414,18 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n # prepare inputs for IR generation inputs = [] inputs_tvm = [] + arg_shapes = [] for shape, name_, dtype in zip(shapes, new_names, dtypes): - if shape == (): + if shape == (): var_ = placeholder((), name_, dtype) inputs.append(var_) inputs_tvm.append(var_.var) - else: + arg_shapes.append([1]) + else: # tensor inputs placeholder_ = placeholder(shape, name_, dtype) inputs.append(placeholder_) inputs_tvm.append(placeholder_.buf.data) + arg_shapes.append(list(shape)) s.ret_dtype = ret_dtype fmodule(*inputs) @@ -435,7 +438,7 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n ret_void = _make.UIntImm("uint1", 0) if s.has_return else _make.UIntImm("uint1", 1) body = s.pop_stmt() s.stmt_stack.append([]) - s.emit(_make.KernelDef(inputs_tvm, body, ret_void, ret_dtype, name)) + s.emit(_make.KernelDef(inputs_tvm, arg_shapes, body, ret_void, ret_dtype, name)) for name_, i in zip(names, inputs): s.var_dict[name_] = i s.input_stages.clear() diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index a9718b40e..ddb790e01 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -692,14 +692,18 @@ Expr Quantize::make(Expr body, Expr bitwidth) { return Expr(node); } -Stmt KernelDef::make(Array args, Stmt body, Expr ret_void, Type ret_type, std::string name) { +Stmt KernelDef::make(Array args, Array> api_args, Stmt body, Expr ret_void, Type ret_type, std::string name) { for (size_t i = 0; i < args.size(); i++) { internal_assert(args[i].defined()) << "KernelDef of undefined arg\n"; + for (size_t j = 0; j < api_args[i].size(); j++) { + internal_assert(api_args[i][j].defined()) << "KernelDef of undefined shape\n"; + } } internal_assert(body.defined()) << "KernelDef of undefined body\n"; internal_assert(ret_void.defined()) << "KernelDef of undefined return type\n"; std::shared_ptr node = std::make_shared(); node->args = std::move(args); + node->api_args = std::move(api_args); node->body = std::move(body); node->ret_void = std::move(ret_void); node->ret_type = ret_type; diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index fae48da29..eba381218 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -1049,15 +1049,17 @@ struct Quantize : public ExprNode { /** The imperative function definition */ struct KernelDef : public StmtNode { Array args; + Array> api_args; Stmt body; Expr ret_void; Type ret_type; std::string name; - EXPORT static Stmt make(Array args, Stmt body, Expr ret_void, Type ret_type, std::string name); + EXPORT static Stmt make(Array args, Array> api_args, Stmt body, Expr ret_void, Type ret_type, std::string name); void VisitAttrs(IR::AttrVisitor* v) final { v -> Visit("args", &args); + v -> Visit("api_args", &api_args); v -> Visit("body", &body); v -> Visit("ret_void", &ret_void); v -> Visit("ret_type", &ret_type); diff --git a/tvm/HalideIR/src/ir/IRMutator.cpp b/tvm/HalideIR/src/ir/IRMutator.cpp index 13b346e93..1fa29ce0a 100644 --- a/tvm/HalideIR/src/ir/IRMutator.cpp +++ b/tvm/HalideIR/src/ir/IRMutator.cpp @@ -480,7 +480,7 @@ void IRMutator::visit(const KernelDef *op, const Stmt &s) { stmt = s; } else { - stmt = KernelDef::make(op->args, body, ret_void, op->ret_type, op->name); + stmt = KernelDef::make(op->args, op->api_args, body, ret_void, op->ret_type, op->name); } } diff --git a/tvm/HalideIR/src/ir/IRPrinter.cpp b/tvm/HalideIR/src/ir/IRPrinter.cpp index 6a3a5d651..4c5463d73 100644 --- a/tvm/HalideIR/src/ir/IRPrinter.cpp +++ b/tvm/HalideIR/src/ir/IRPrinter.cpp @@ -724,6 +724,14 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) p->stream << "def " << op->name << "("; for (size_t i = 0; i < op->args.size(); i++) { p->print(op->args[i]); + if (op->api_args[i].size() > 1) { + p->stream << "["; + for (size_t j = 0; j < op->api_args[i].size(); j++) { + p->print(op->api_args[i][j]); + if (j < op->api_args[i].size() - 1) p->stream << "*"; + } + p->stream << "]"; + } if (i < op->args.size() - 1) { p->stream << ", "; } diff --git a/tvm/src/api/api_ir.cc b/tvm/src/api/api_ir.cc index 825f7580d..0411575cc 100644 --- a/tvm/src/api/api_ir.cc +++ b/tvm/src/api/api_ir.cc @@ -222,7 +222,7 @@ REGISTER_MAKE3(GetSlice); REGISTER_MAKE3(SetBit); REGISTER_MAKE4(SetSlice); REGISTER_MAKE2(Quantize); -REGISTER_MAKE5(KernelDef); +REGISTER_MAKE6(KernelDef); REGISTER_MAKE3(KernelExpr); REGISTER_MAKE2(KernelStmt); REGISTER_MAKE1(Return); diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 6196f30e2..9f8d0a531 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -286,8 +286,8 @@ void CodeGenC::PrintStorageScope(const std::string& scope, std::ostream& os) { / } void CodeGenC::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; + // CHECK_EQ(t.lanes(), 1) + // << "do not yet support vector types"; if (t.is_handle()) { os << "void*"; return; } @@ -910,7 +910,6 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { for (const auto & k : op->args) { RegisterHandleType(k.get(), k.get()->type); } - // print function signature PrintType(op->ret_type, stream); stream << " " << op->name << "("; @@ -921,12 +920,11 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { auto arg = map_arg_type_[vid]; PrintType(std::get<1>(arg), this->stream); this->stream << ' ' << std::get<0>(arg); - const BufferNode* buf = f->api_args[i].as(); - if (v.type().is_handle() && buf) { - var_shape_map_[buf->data.get()] = buf->shape; - for (size_t i = 0; i < buf->shape.size(); i++) { + if (v.type().is_handle()) { + var_shape_map_[op->args[i].get()] = op->api_args[i]; + for (size_t j = 0; j < op->api_args[i].size(); j++) { this->stream << '['; - this->PrintExpr(buf->shape[i], this->stream); + this->PrintExpr(op->api_args[i][j], this->stream); this->stream << ']'; } } diff --git a/tvm/src/pass/ir_mutator.cc b/tvm/src/pass/ir_mutator.cc index ec67aa314..61a09c75d 100644 --- a/tvm/src/pass/ir_mutator.cc +++ b/tvm/src/pass/ir_mutator.cc @@ -321,7 +321,7 @@ Stmt IRMutator::Mutate_(const KernelDef *op, const Stmt &s) { if (body.same_as(op->body) && ret_void.same_as(op->ret_void)) { return s; } else { - return KernelDef::make(op->args, body, ret_void, op->ret_type, op->name); + return KernelDef::make(op->args, op->api_args, body, ret_void, op->ret_type, op->name); } } From 32a522d61c9d10a8e10081f28e3d5fa068ef02f8 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 5 Sep 2019 14:05:23 -0400 Subject: [PATCH 030/103] [update] use noderef and restore --- tvm/src/codegen/codegen_c.cc | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 9f8d0a531..810308566 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -286,8 +286,8 @@ void CodeGenC::PrintStorageScope(const std::string& scope, std::ostream& os) { / } void CodeGenC::PrintType(Type t, std::ostream& os) { // NOLINT(*) - // CHECK_EQ(t.lanes(), 1) - // << "do not yet support vector types"; + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; if (t.is_handle()) { os << "void*"; return; } @@ -901,8 +901,9 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { SaveFuncState(f); InitFuncState(f); std::ostringstream save; - save << stream.rdbuf(); - stream.clear(); + save << this->stream.str(); + this->stream.str(""); + this->stream.clear(); // skip the first underscore GetUniqueName("_"); @@ -920,11 +921,13 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { auto arg = map_arg_type_[vid]; PrintType(std::get<1>(arg), this->stream); this->stream << ' ' << std::get<0>(arg); - if (v.type().is_handle()) { - var_shape_map_[op->args[i].get()] = op->api_args[i]; - for (size_t j = 0; j < op->api_args[i].size(); j++) { + + const BufferNode* buf = v.as(); + if (v.type().is_handle() && buf) { + var_shape_map_[buf->data.get()] = buf->shape; + for (size_t j = 0; j < buf->shape.size(); j++) { this->stream << '['; - this->PrintExpr(op->api_args[i][j], this->stream); + this->PrintExpr(buf->shape[], this->stream); this->stream << ']'; } } @@ -937,9 +940,11 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { stream << "}\n\n"; // restore default stream - module_stream << stream.str(); - stream.clear(); - stream << save.rdbuf(); + module_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + this->stream << save.str(); + RestoreFuncState(f); } void CodeGenC::VisitStmt_(const KernelStmt *op) { From 171699cc3268b77458c8de4982be4d4da8c2708b Mon Sep 17 00:00:00 2001 From: Shawn Xiang Date: Fri, 6 Sep 2019 00:26:45 -0400 Subject: [PATCH 031/103] [fix] return op --- samples/stream/primitive.py | 3 ++- tvm/src/codegen/codegen_c.cc | 28 +++++++++++++++++----------- tvm/src/codegen/codegen_c.h | 7 ++++++- tvm/src/codegen/hlsc/codegen_hlsc.cc | 2 +- tvm/src/codegen/hlsc/codegen_hlsc.h | 4 ++-- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/samples/stream/primitive.py b/samples/stream/primitive.py index 7a39068bb..a6e72049e 100644 --- a/samples/stream/primitive.py +++ b/samples/stream/primitive.py @@ -18,7 +18,8 @@ def ret_mul(a, b, x, y): s = hcl.create_schedule([a, b, c, d]) s[c].pipeline(c.axis[0], initiation_interval) -# s[c].stream_to(hcl.FPGA) +# s[c].stream_to(ret_mul) +# s[d].stream_to(hcl.FPGA) print(hcl.lower(s)) code = hcl.build(s, target="vhls") diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 810308566..2fa1f1936 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -3,6 +3,7 @@ * \file codegen_c.cc */ #include +#include #include #include #include "./codegen_c.h" @@ -20,6 +21,8 @@ void CodeGenC::Init(bool output_ssa) { void CodeGenC::InitFuncState(LoweredFunc f) { alloc_storage_scope_.clear(); handle_data_type_.clear(); + var_shape_map_.clear(); + range_.clear(); CodeGenSourceBase::ClearFuncState(); } @@ -916,27 +919,24 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { stream << " " << op->name << "("; for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; + var_shape_map_[v.get()] = op->api_args[i]; std::string vid = AllocVarID(v.get()); if (i != 0) stream << ", "; - auto arg = map_arg_type_[vid]; - PrintType(std::get<1>(arg), this->stream); - this->stream << ' ' << std::get<0>(arg); - - const BufferNode* buf = v.as(); - if (v.type().is_handle() && buf) { - var_shape_map_[buf->data.get()] = buf->shape; - for (size_t j = 0; j < buf->shape.size(); j++) { + this->stream << vid; + if (v.type().is_handle()) { + for (size_t j = 0; j < op->api_args[i].size(); j++) { this->stream << '['; - this->PrintExpr(buf->shape[], this->stream); + this->PrintExpr(op->api_args[i][j], this->stream); this->stream << ']'; } } } stream << ") {\n"; int func_scope = BeginScope(); + range_ = CollectIterRange(op->body); + PrintIndent(); PrintStmt(op->body); EndScope(func_scope); - PrintIndent(); stream << "}\n\n"; // restore default stream @@ -953,7 +953,7 @@ void CodeGenC::VisitStmt_(const KernelStmt *op) { void CodeGenC::VisitStmt_(const Return *op) { this->stream << "return "; - PrintExpr(op->value); + PrintExpr(op->value, stream); this->stream << ";\n"; } @@ -980,9 +980,13 @@ void CodeGenC::SaveFuncState(LoweredFunc f) { // clear save info copy alloc_storage_scope_save.clear(); handle_data_type_save.clear(); + var_shape_map_save.clear(); + range_save.clear(); // backup func info and clear alloc_storage_scope_save = alloc_storage_scope_; handle_data_type_save = handle_data_type_; + var_shape_map_save = var_shape_map_; + range_save = range_; CodeGenSourceBase::SaveFuncState(); } @@ -990,6 +994,8 @@ void CodeGenC::RestoreFuncState(LoweredFunc f) { this->InitFuncState(f); alloc_storage_scope_ = alloc_storage_scope_save; handle_data_type_ = handle_data_type_save; + var_shape_map_ = var_shape_map_save; + range_ = range_save; CodeGenSourceBase::RestoreFuncState(); } diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index 13e737106..ae6093df0 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -163,13 +163,18 @@ class CodeGenC : // print store of single element. virtual void PrintVecElemStore( const std::string& vec, Type t, int i, const std::string& value); - // Get a cast type from to + // get a cast type from to virtual std::string CastFromTo(std::string value, Type from, Type target); + // map from var to shape, range and type std::map > var_shape_map_; std::unordered_map range_; str2tupleMap map_arg_type_; + // save for kernel + std::map > var_shape_map_save; + std::unordered_map range_save; + protected: void SaveFuncState(LoweredFunc f); void RestoreFuncState(LoweredFunc f); diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index 3e8696fba..f6437dcae 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -68,7 +68,7 @@ std::string CodeGenHLSC::GetBufferRef(Type t, const Variable* buffer, Expr index buf_length_map_[buffer] == 1); if (is_scalar) { os << vid; - } else { + } else { os << vid; std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); for (size_t i = 0; i < indices.size(); i++) { diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.h b/tvm/src/codegen/hlsc/codegen_hlsc.h index c85cbc699..9403d9cff 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.h +++ b/tvm/src/codegen/hlsc/codegen_hlsc.h @@ -28,8 +28,8 @@ class CodeGenHLSC : public CodeGenC { void GenForStmt(const For* op, std::string pragma, bool before); - std::map > var_shape_map_; - std::unordered_map range_; + // std::map > var_shape_map_; + // std::unordered_map range_; protected: std::string GetBufferRef(Type t, const Variable* buffer, Expr index); }; From 2cd15d2ba4e718707ef014ce6293eb11b366c63f Mon Sep 17 00:00:00 2001 From: Shawn Xiang Date: Sat, 7 Sep 2019 11:26:24 -0400 Subject: [PATCH 032/103] [add] hcl device & kernelstmt printer --- python/heterocl/__init__.py | 1 + python/heterocl/api.py | 10 ++- python/heterocl/config.py | 1 + python/heterocl/debug.py | 5 ++ python/heterocl/devices.py | 112 ++++++++++++++++++++++++ python/heterocl/tvm/device.py | 16 ---- python/heterocl/util.py | 27 ++++++ samples/stream/{primitive.py => mod.py} | 4 + samples/stream/stream.py | 40 +++++++++ tvm/src/codegen/codegen_c.cc | 9 +- 10 files changed, 204 insertions(+), 21 deletions(-) create mode 100644 python/heterocl/devices.py rename samples/stream/{primitive.py => mod.py} (87%) create mode 100644 samples/stream/stream.py diff --git a/python/heterocl/__init__.py b/python/heterocl/__init__.py index 588196177..4b90160f0 100644 --- a/python/heterocl/__init__.py +++ b/python/heterocl/__init__.py @@ -3,6 +3,7 @@ from .compute_api import * from .dsl import * from .types import * +from .devices import * from .nparray import * from .debug import hcl_excepthook from .tvm.intrin import * diff --git a/python/heterocl/api.py b/python/heterocl/api.py index 62e28227a..7843b2698 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -12,7 +12,7 @@ from . import types from . import config -def init(init_dtype="int32"): +def init(init_dtype="int32", place="intel_fpga"): """Initialize a HeteroCL environment with configurations. This API must be called each time the users write an application. @@ -51,13 +51,14 @@ def app2(A, B, C): # execute f2 """ # set the configurations - config.init_dtype = init_dtype + config.init_dtype = init_dtype + config.init_device = place # initialize global variables - Schedule.stage_ops = [] + Schedule.stage_ops = [] Schedule.last_stages = OrderedSet([]) Scheme.current = None -def placeholder(shape, name=None, dtype=None): +def placeholder(shape, name=None, dtype=None, place=None): """Construct a HeteroCL placeholder for inputs/outputs. If the shape is an empty tuple, the returned value is a scalar. @@ -88,6 +89,7 @@ def placeholder(shape, name=None, dtype=None): """ name = util.get_name("placeholder", name) dtype = util.get_dtype(dtype) + place = util.get_device(place) if shape == (): return Scalar(tvm_api._Var(name, dtype)) diff --git a/python/heterocl/config.py b/python/heterocl/config.py index 5ea94483b..16ffd96b0 100644 --- a/python/heterocl/config.py +++ b/python/heterocl/config.py @@ -1,2 +1,3 @@ init_dtype = "int32" +init_device = "fpga_intel" diff --git a/python/heterocl/debug.py b/python/heterocl/debug.py index cba313e23..a885d2e0b 100644 --- a/python/heterocl/debug.py +++ b/python/heterocl/debug.py @@ -45,6 +45,11 @@ class TensorError(HCLError): def __init__(self, msg): HCLError.__init__(self, msg, "\33[1;31m[Tensor]\33[0m ") +class DeviceError(HCLError): + """A subclass for specifying device related exception""" + def __init__(self, msg): + HCLError.__init__(self, msg, "\33[1;31m[Device]\33[0m ") + def hcl_excepthook(etype, value, tb): """Customized excepthook diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py new file mode 100644 index 000000000..ad200e6e6 --- /dev/null +++ b/python/heterocl/devices.py @@ -0,0 +1,112 @@ +"""Define HeteroCL device types""" +#pylint: disable=too-few-public-methods, too-many-return-statements +from .debug import DeviceError + +class Device(object): + """The base class for all device types + + The default data placement is on CPU. + + Parameters + ---------- + types: str + Device of device to place data + model: str + Model of device to place date + """ + def __init__(self, types="CPU", model="x86"): + self.types = types + self.model = model + +class CPU(Device): + """cpu device with different models""" + def __init__(self, model): + if model not in ["riscv", "arm", "x86", "sparc", "powerpc"]: + raise DeviceError(model + " not supported yet") + super(CPU, self).__init__("CPU", model) + def __repr__(self): + return "CPU (" + str(self.model) + ")" + +class FPGA(Device): + """fpga device with different models""" + def __init__(self, model): + if model not in ["xilinx", "intel"]: + raise DeviceError(model + " not supported yet") + super(FPGA, self).__init__("FPGA", model) + def __repr__(self): + return "FPGA (" + str(self.model) + ")" + +class GPU(Device): + """gpu device with different models""" + def __init__(self, model): + if model not in ["cuda", "rocm"]: + raise DeviceError(model + " not supported yet") + super(GPU, self).__init__("GPU", model) + def __repr__(self): + return "GPU (" + str(self.model) + ")" + +def device_to_str(dtype): + """Convert a device type to string format. + + Parameters + ---------- + dtype : Device or str + The device type to be converted + + Returns + ------- + str + The converted device type in string format. + """ + if isinstance(dtype, Device): + if isinstance(dtype, CPU): + return "cpu_" + str(dtype.model) + elif isinstance(dtype, FPGA): + return "fpga_" + str(dtype.model) + else: + if not isinstance(dtype, str): + raise DeviceError("Unsupported device type format") + return dtype + +def device_to_hcl(dtype): + """Convert a device type to Heterocl type. + + Parameters + ---------- + dtype : Device or str + The device type to be converted + + Returns + ------- + Device + """ + if isinstance(dtype, Device): + return dtype + elif isinstance(dtype, str): + device, model = dtype.split("_") + if device == "cpu": + return CPU(model) + elif device == "gpu": + return GPU(model) + elif device == "fpga": + return FPGA(model) + else: + raise DeviceError("Unrecognized device type") + else: + raise DeviceError("Unrecognized device type format") + +def get_model(dtype): + """Get the model of a given device type. + + Parameters + ---------- + dtype : Device or str + The given device type + + Returns + ------- + str + """ + dtype = dtype_to_hcl(dtype) + return dtype.types, dtype.model + diff --git a/python/heterocl/tvm/device.py b/python/heterocl/tvm/device.py index 194d71850..e69de29bb 100644 --- a/python/heterocl/tvm/device.py +++ b/python/heterocl/tvm/device.py @@ -1,16 +0,0 @@ - -class device(object): - def __init__(self, name): - self.name = name - def __str__(self): - return self.name - def __repr__(self): - return self.__str__() - -class cpu(device): - def __init__(self): - super(cpu, self).__init__("cpu") - -class fpga(device) - def __init__(self): - super(cpu, self).__init__("fpga") diff --git a/python/heterocl/util.py b/python/heterocl/util.py index 996201105..fac15fed1 100644 --- a/python/heterocl/util.py +++ b/python/heterocl/util.py @@ -4,6 +4,7 @@ from .tvm.expr import Var, Call from .tvm.api import _IterVar, decl_buffer from . import types +from . import devices from . import config from .scheme import Scheme from .debug import DTypeError @@ -50,6 +51,32 @@ def get_name(var_type, name=None): VarName.name_dict[var_type] = counter return var_type + str(counter) +def get_device(device, name=None): + """Get the data type by default or from a value. + + Device type of a variable needs to be specified before + the scheduling. + + Parameters + ---------- + dtype: Type or str or None + The specified data type. + + name: str, optional + The name of the variable that will be given a data type. + + Returns + ------- + dtype: str + A data type represented in str. + """ + if Scheme.current is not None: + device_ = Scheme.current.device_dict.get(name) + device = device if device_ is None else device_ + device = config.init_device if device is None else device + return devices.device_to_str(device) + + def get_dtype(dtype, name=None): """Get the data type by default or from a value. diff --git a/samples/stream/primitive.py b/samples/stream/mod.py similarity index 87% rename from samples/stream/primitive.py rename to samples/stream/mod.py index a6e72049e..8c12ad722 100644 --- a/samples/stream/primitive.py +++ b/samples/stream/mod.py @@ -17,7 +17,11 @@ def ret_mul(a, b, x, y): d = hcl.compute(b.shape, lambda i, j: ret_mul(a, b, i, j)) s = hcl.create_schedule([a, b, c, d]) +# compute customization s[c].pipeline(c.axis[0], initiation_interval) +s.partition(b, dim=2, factor=2) + +# stream into modules / device # s[c].stream_to(ret_mul) # s[d].stream_to(hcl.FPGA) diff --git a/samples/stream/stream.py b/samples/stream/stream.py new file mode 100644 index 000000000..58b163326 --- /dev/null +++ b/samples/stream/stream.py @@ -0,0 +1,40 @@ +import heterocl as hcl + +hcl.init(place=hcl.CPU("riscv")) +initiation_interval = 4 +a = hcl.placeholder((10, 20), name="a") +b = hcl.placeholder((10, 20), name="b") +c = hcl.placeholder((10, 20), name="c", + place=hcl.FPGA("intel")) +d = hcl.placeholder((10, 20), "d") +e = hcl.placeholder((10, 20), "e") + +@hcl.def_([a.shape, b.shape, c.shape]) +def ret_add(a, b, c): + c = hcl.update(c, lambda x, y: a[x, y] + b[x, y], 'c_add') + +@hcl.def_([a.shape, b.shape, c.shape]) +def ret_mul(a, b, c): + c = hcl.update(c, lambda x, y: a[x, y] * b[x, y], 'c_mul') + +def add_mul(a, b, c, d, e): + ret_add(a, b, c) + ret_mul(c, d, e) + +# compute customization +s = hcl.create_schedule([a, b, c, d, e], add_mul) +# op1 = add_mul.c_add +# op2 = add_mul.c_mul +# s[op1].pipeline(op1.axis[0], initiation_interval) +s.partition(b, dim=2, factor=2) + +# stream into modules / device +print(s[ret_mul]) +# s.stream([a, b, d], hcl.FPGA) +# s[c].stream_to(ret_mul) +# s[d].stream_to(hcl.FPGA) + +print(hcl.lower(s)) +code = hcl.build(s, target="vhls") +print(code) + diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 2fa1f1936..20b42b05e 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -948,7 +948,14 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { } void CodeGenC::VisitStmt_(const KernelStmt *op) { - LOG(FATAL) << "KernelStmt is not yet support"; + // kernel stmt (call module func) + PrintIndent(); + stream << op->name << "("; + for (size_t i = 0; i < op->args.size(); i++) { + PrintExpr(op->args[i], stream); + if (i < op->args.size() -1) stream << ", "; + } + stream << ");\n"; } void CodeGenC::VisitStmt_(const Return *op) { From adb5af1f15c18d3807e51423c6d5cc81a85d95da Mon Sep 17 00:00:00 2001 From: Shawn Xiang Date: Mon, 9 Sep 2019 11:52:11 -0400 Subject: [PATCH 033/103] [fix] def workaround --- python/heterocl/tvm/schedule.py | 9 ++-- samples/stream/stream.py | 42 ++++++++++++------- tvm/src/schedule/schedule_dataflow_rewrite.cc | 2 +- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 76724d978..75ec2a31f 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -615,16 +615,13 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) - def stream_to(self, var, place, depth=10): + def stream_to(self, place, depth=10): """Stream var to devices. Parameters ---------- - var : IterVar - The iteration to be streamed. - - place : str - The device to be + place : hcl device or stage + The device or module for streaming """ _api_internal._StageStreamTo(self, place, channel, depth) diff --git a/samples/stream/stream.py b/samples/stream/stream.py index 58b163326..834fcc794 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -1,6 +1,7 @@ import heterocl as hcl -hcl.init(place=hcl.CPU("riscv")) +# hcl.init(place=hcl.CPU("riscv")) +hcl.init(place=hcl.FPGA("intel")) initiation_interval = 4 a = hcl.placeholder((10, 20), name="a") b = hcl.placeholder((10, 20), name="b") @@ -9,29 +10,38 @@ d = hcl.placeholder((10, 20), "d") e = hcl.placeholder((10, 20), "e") -@hcl.def_([a.shape, b.shape, c.shape]) -def ret_add(a, b, c): - c = hcl.update(c, lambda x, y: a[x, y] + b[x, y], 'c_add') - -@hcl.def_([a.shape, b.shape, c.shape]) -def ret_mul(a, b, c): - c = hcl.update(c, lambda x, y: a[x, y] * b[x, y], 'c_mul') - def add_mul(a, b, c, d, e): - ret_add(a, b, c) - ret_mul(c, d, e) + @hcl.def_([a.shape, b.shape, c.shape]) + def ret_add(a, b, c): + with hcl.for_(0, a.shape[0]) as i: + with hcl.for_(0, a.shape[1]) as j: + c[i, j] = a[i, j] + b[i, j] + + @hcl.def_([a.shape, b.shape, c.shape]) + def ret_mul(a, b, c): + # hcl.update(c, lambda x, y: a[x, y] * b[x, y], 'c_mul') + with hcl.for_(0, a.shape[0]) as i: + with hcl.for_(0, a.shape[1]) as j: + c[i, j] = a[i, j] * b[i, j] + + ret_add(a, b, c) + ret_mul(c, d, e) # compute customization s = hcl.create_schedule([a, b, c, d, e], add_mul) -# op1 = add_mul.c_add -# op2 = add_mul.c_mul + +# op1 = add_mul.ret_add.c +# op2 = add_mul.ret_mul.c # s[op1].pipeline(op1.axis[0], initiation_interval) +# s[op2].split(op2.axis[0]) s.partition(b, dim=2, factor=2) +print type(add_mul.ret_mul), add_mul.ret_mul.c +print(s[a], s[c]) + # stream into modules / device -print(s[ret_mul]) -# s.stream([a, b, d], hcl.FPGA) -# s[c].stream_to(ret_mul) +# s.stream([a, b, d], hcl.FPGA("intel")) +# s[c].stream_to(add_mul.ret_mul) # s[d].stream_to(hcl.FPGA) print(hcl.lower(s)) diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index d5d136814..69d701cbd 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -182,7 +182,7 @@ Tensor Schedule::reuse_at(const Tensor& target, } // Tensor Schedule::stream(const Tensor& target, -// Type partition_type) { +// Type stream_type) { // Stage target_stage = (*this)[target]; // std::vector consumers; // size_t num_stage = (*this)->stages.size(); From 34577738abf868db1503765bda5cd5d4ea3453b9 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 12 Sep 2019 15:03:55 -0400 Subject: [PATCH 034/103] [update] stream example --- samples/stream/stream.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/samples/stream/stream.py b/samples/stream/stream.py index 834fcc794..cec662343 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -29,19 +29,14 @@ def ret_mul(a, b, c): # compute customization s = hcl.create_schedule([a, b, c, d, e], add_mul) - # op1 = add_mul.ret_add.c # op2 = add_mul.ret_mul.c # s[op1].pipeline(op1.axis[0], initiation_interval) -# s[op2].split(op2.axis[0]) s.partition(b, dim=2, factor=2) -print type(add_mul.ret_mul), add_mul.ret_mul.c -print(s[a], s[c]) - # stream into modules / device -# s.stream([a, b, d], hcl.FPGA("intel")) -# s[c].stream_to(add_mul.ret_mul) +s.stream([a, b], add_mul.ret_add) +s[c].stream_to(s[add_mul.ret_mul]) # s[d].stream_to(hcl.FPGA) print(hcl.lower(s)) From 2330ea38aa9a31f0c8252ce10891888cf3e8ffcb Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 12 Sep 2019 22:57:52 -0400 Subject: [PATCH 035/103] [add] stream expr & stmt ir --- python/heterocl/mutator.py | 12 ++++++ python/heterocl/schedule.py | 3 +- python/heterocl/tvm/expr.py | 6 +++ python/heterocl/tvm/schedule.py | 9 +++- python/heterocl/tvm/stmt.py | 6 +-- samples/stream/stream.py | 2 +- tvm/HalideIR/src/ir/Expr.h | 10 +++++ tvm/HalideIR/src/ir/IR.cpp | 25 +++++++++++ tvm/HalideIR/src/ir/IR.h | 42 ++++++++++++++++++ tvm/HalideIR/src/ir/IRMutator.cpp | 14 ++++++ tvm/HalideIR/src/ir/IRMutator.h | 2 + tvm/HalideIR/src/ir/IRVisitor.cpp | 13 ++++++ tvm/HalideIR/src/ir/IRVisitor.h | 4 ++ tvm/include/tvm/ir.h | 3 ++ tvm/include/tvm/ir_mutator.h | 2 + tvm/include/tvm/schedule.h | 8 ++++ tvm/src/api/api_lang.cc | 7 +++ tvm/src/codegen/codegen_c.cc | 1 - tvm/src/codegen/hlsc/codegen_hlsc.cc | 20 +++++---- tvm/src/pass/ir_mutator.cc | 15 +++++++ tvm/src/schedule/compute_primitive.cc | 61 +++++++++++++++++++++++++++ tvm/src/schedule/compute_primitive.h | 8 ++++ tvm/src/schedule/schedule_lang.cc | 34 +++++++++++++++ 23 files changed, 290 insertions(+), 17 deletions(-) diff --git a/python/heterocl/mutator.py b/python/heterocl/mutator.py index a38c90906..3b6226820 100644 --- a/python/heterocl/mutator.py +++ b/python/heterocl/mutator.py @@ -76,6 +76,8 @@ def mutate(self, node): return self.mutate_SetSlice(node) elif isinstance(node, _expr.KernelExpr): return self.mutate_KernelExpr(node) + elif isinstance(node, _expr.StreamExpr): + return self.mutate_StreamExpr(node) else: return node elif isinstance(node, _stmt.Stmt): @@ -111,6 +113,8 @@ def mutate(self, node): return self.mutate_Break(node) elif isinstance(node, _stmt.While): return self.mutate_While(node) + elif isinstance(node, _stmt.StreamStmt): + return self.mutate_StreamStmt(node) else: return node elif isinstance(node, tuple): @@ -247,6 +251,10 @@ def mutate_KernelExpr(self, node): args = self.mutate(node.args) return _make.KernelExpr(node.dtype, args, node.name) + def mutate_StreamExpr(self, node): + args = self.mutate(node.args) + return _make.StreamExpr(node.dtype, args, node.name) + # statements def mutate_LetStmt(self, node): var = self.mutate(node.var) @@ -319,6 +327,10 @@ def mutate_KernelStmt(self, node): args = self.mutate(node.args) return _make.KernelStmt(args, node.name) + def mutate_StreamStmt(self, node): + args = self.mutate(node.args) + return _make.StreamStmt(node.dtype, args, node.name) + def mutate_Return(self, node): value = self.mutate(node.value) return _make.Return(value) diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index 419262540..ad8d42371 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -5,6 +5,7 @@ from ordered_set import OrderedSet from .tvm import make as _make from .tvm import stmt as _stmt +from .tvm import expr as _expr from .tvm import api as tvm_api from .tvm import _api_internal from .tvm._api_internal import _ExternOp @@ -134,7 +135,7 @@ def reuse_at(self, target, parent, axis, name=None): name = target.name + ".reuse" return self.sch.reuse_at(target, parent, axis, name) - def to(self, tensors, place=_stmt.Stream.FPGA): + def to(self, tensors, place=_expr.StreamExpr.FIFO): """Stream a list of Tensors to dst devices Parameters diff --git a/python/heterocl/tvm/expr.py b/python/heterocl/tvm/expr.py index d71307e8f..d1ea4ae75 100644 --- a/python/heterocl/tvm/expr.py +++ b/python/heterocl/tvm/expr.py @@ -382,3 +382,9 @@ class Quantize(Expr): @register_node class KernelExpr(Expr): pass + +@register_node +class StreamExpr(Expr): + Channel = 0 + Pipe = 1 + FIFO = 2 diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 75ec2a31f..9463c54fc 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -615,7 +615,7 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) - def stream_to(self, place, depth=10): + def stream_to(self, place, types=_expr.StreamExpr.Channel, depth=10): """Stream var to devices. Parameters @@ -623,7 +623,12 @@ def stream_to(self, place, depth=10): place : hcl device or stage The device or module for streaming """ - _api_internal._StageStreamTo(self, place, channel, depth) + from ..devices import Device + if isinstance(place, Device): + place = str(place) + else: # stream to modulei(stage) + assert isinstance(place, _Stage), "only support device / stage" + _api_internal._StageStream(self, place, types, depth) def pragma(self, var, pragma_type): """Annotate the iteration with pragma diff --git a/python/heterocl/tvm/stmt.py b/python/heterocl/tvm/stmt.py index 9dfd24b74..d5c2d0a18 100644 --- a/python/heterocl/tvm/stmt.py +++ b/python/heterocl/tvm/stmt.py @@ -114,7 +114,5 @@ class Stencil(Stmt): pass @register_node -class Stream(Stmt): - CPU = 0 - FPGA = 1 - +class StreamStmt(Stmt): + pass diff --git a/samples/stream/stream.py b/samples/stream/stream.py index cec662343..75a9b9111 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -35,7 +35,7 @@ def ret_mul(a, b, c): s.partition(b, dim=2, factor=2) # stream into modules / device -s.stream([a, b], add_mul.ret_add) +# s.stream([a, b], add_mul.ret_add) s[c].stream_to(s[add_mul.ret_mul]) # s[d].stream_to(hcl.FPGA) diff --git a/tvm/HalideIR/src/ir/Expr.h b/tvm/HalideIR/src/ir/Expr.h index b78a466ed..769dc8472 100644 --- a/tvm/HalideIR/src/ir/Expr.h +++ b/tvm/HalideIR/src/ir/Expr.h @@ -91,6 +91,9 @@ enum class IRNodeType : int { /** for memory customization **/ Reuse, Partition, + /** for data stream **/ + StreamExpr, + StreamStmt, /** for stencil analysis **/ Stencil }; @@ -302,6 +305,13 @@ enum class PartitionType : int { Cyclic = 2 }; +/** An enum describing the stream type */ +enum class StreamType : int { + Channel = 0, + Pipe = 1, + FIFO = 2 +}; + /** A reference-counted handle to a statement node. */ struct Stmt : public IRHandle { Stmt() : IRHandle() {} diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index ddb790e01..9935d54a5 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -776,6 +776,29 @@ Stmt Partition::make(VarExpr buffer_var, int dim, int factor, PartitionType part return Stmt(node); } +Expr StreamExpr::make(Type type, VarExpr buffer_var, StreamType stream_type, int depth) { + internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; + + std::shared_ptr node = std::make_shared(); + node->type = type; + node->buffer_var = std::move(buffer_var); + node->depth = depth; + node->stream_type = stream_type; + return Expr(node); +} + +Stmt StreamStmt::make(VarExpr buffer_var, Expr value, StreamType stream_type, int depth) { + internal_assert(value.defined()) << "The stream-in value not defined\n"; + internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; + + std::shared_ptr node = std::make_shared(); + node->buffer_var = std::move(buffer_var); + node->value = std::move(value); + node->depth = depth; + node->stream_type = stream_type; + return Stmt(node); +} + Stmt Stencil::make(Array inputs, Array outputs, Stmt body, int burst_width, int unroll_factor, int num_iteration) { internal_assert(body.defined()) << "Stencil of undefined body\n"; @@ -888,6 +911,8 @@ template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v-> template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const Reuse *)this, s); } template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const Partition *)this, s); } template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const Stencil *)this, s); } +template<> void StmtNode::accept(IRVisitor *v, const Stmt &s) const { v->visit((const StreamStmt *)this, s); } +template<> void ExprNode::accept(IRVisitor *v, const Expr &e) const { v->visit((const StreamExpr *)this, e); } Call::ConstString Call::debug_to_file = "debug_to_file"; Call::ConstString Call::reinterpret = "reinterpret"; diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index eba381218..327f32ca5 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -1172,6 +1172,48 @@ struct Partition : public StmtNode { static constexpr const char* _type_key = "Partition"; }; +struct StreamStmt : public StmtNode { + VarExpr buffer_var; // var written + Expr value; + int depth; + StreamType stream_type; + + EXPORT static Stmt make(VarExpr buffer_var, + Expr value, + StreamType stream_type, + int depth); + + void VisitAttrs(IR::AttrVisitor* v) final { + v -> Visit("buffer_var", &buffer_var); + v -> Visit("value", &value); + v -> Visit("depth", &depth); + v -> Visit("stream_type", &stream_type); + } + + static const IRNodeType _type_info = IRNodeType::StreamStmt; + static constexpr const char* _type_key = "StreamStmt"; +}; + +struct StreamExpr : public ExprNode { + VarExpr buffer_var; // var loaded + int depth; + StreamType stream_type; + + EXPORT static Expr make(Type type, + VarExpr buffer_var, + StreamType stream_type, + int depth); + + void VisitAttrs(IR::AttrVisitor* v) final { + v -> Visit("dtype", &type); + v -> Visit("buffer_var", &buffer_var); + v -> Visit("depth", &depth); + v -> Visit("stream_type", &stream_type); + } + static const IRNodeType _type_info = IRNodeType::StreamExpr; + static constexpr const char* _type_key = "StreamExpr"; +}; + struct Stencil : public StmtNode { Array inputs; Array outputs; diff --git a/tvm/HalideIR/src/ir/IRMutator.cpp b/tvm/HalideIR/src/ir/IRMutator.cpp index 1fa29ce0a..a069d685a 100644 --- a/tvm/HalideIR/src/ir/IRMutator.cpp +++ b/tvm/HalideIR/src/ir/IRMutator.cpp @@ -524,6 +524,20 @@ void IRMutator::visit(const KernelStmt *op, const Stmt &s) { } } +void IRMutator::visit(const StreamStmt *op, const Stmt &s) { + Expr value = mutate(op->value); + if (value.same_as(op->value)) { + stmt = s; + } else { + stmt = StreamStmt::make(op->buffer_var, value, + op->stream_type, op->depth); + } +} + +void IRMutator::visit(const StreamExpr *op, const Expr &e) { + expr = e; +} + void IRMutator::visit(const Return *op, const Stmt &s) { Expr value = mutate(op->value); if (value.same_as(op->value)) { diff --git a/tvm/HalideIR/src/ir/IRMutator.h b/tvm/HalideIR/src/ir/IRMutator.h index 1fea5fec6..4088ae5ea 100644 --- a/tvm/HalideIR/src/ir/IRMutator.h +++ b/tvm/HalideIR/src/ir/IRMutator.h @@ -99,6 +99,8 @@ class IRMutator : public IRVisitor { EXPORT virtual void visit(const Reuse *, const Stmt &); EXPORT virtual void visit(const Partition *, const Stmt &); EXPORT virtual void visit(const Stencil *, const Stmt &); + EXPORT virtual void visit(const StreamExpr *, const Expr &); + EXPORT virtual void visit(const StreamStmt *, const Stmt &); }; diff --git a/tvm/HalideIR/src/ir/IRVisitor.cpp b/tvm/HalideIR/src/ir/IRVisitor.cpp index 02880fdb4..a38ae2fa4 100644 --- a/tvm/HalideIR/src/ir/IRVisitor.cpp +++ b/tvm/HalideIR/src/ir/IRVisitor.cpp @@ -137,6 +137,9 @@ void IRVisitor::visit(const Let *op, const Expr &) { op->body.accept(this); } +void IRVisitor::visit(const StreamExpr *op, const Expr &) { +} + void IRVisitor::visit(const LetStmt *op, const Stmt &) { op->value.accept(this); op->body.accept(this); @@ -169,6 +172,10 @@ void IRVisitor::visit(const Store *op, const Stmt &) { op->predicate.accept(this); } +void IRVisitor::visit(const StreamStmt *op, const Stmt &) { + op->value.accept(this); +} + void IRVisitor::visit(const Provide *op, const Stmt &) { op->value.accept(this); for (size_t i = 0; i < op->args.size(); i++) { @@ -607,6 +614,12 @@ void IRGraphVisitor::visit(const Reuse *op, const Stmt &) { void IRGraphVisitor::visit(const Partition *op, const Stmt &) {} +void IRGraphVisitor::visit(const StreamExpr *op, const Expr &) {} + +void IRGraphVisitor::visit(const StreamStmt *op, const Stmt &) { + include(op->value); +} + void IRGraphVisitor::visit(const Stencil *op, const Stmt &) { include(op->body); } diff --git a/tvm/HalideIR/src/ir/IRVisitor.h b/tvm/HalideIR/src/ir/IRVisitor.h index 931f1c5c9..a4faa4aba 100644 --- a/tvm/HalideIR/src/ir/IRVisitor.h +++ b/tvm/HalideIR/src/ir/IRVisitor.h @@ -79,6 +79,8 @@ class IRVisitor { EXPORT virtual void visit(const Reuse *, const Stmt &); EXPORT virtual void visit(const Partition *, const Stmt &); EXPORT virtual void visit(const Stencil *, const Stmt &); + EXPORT virtual void visit(const StreamStmt *, const Stmt &); + EXPORT virtual void visit(const StreamExpr *, const Expr &); }; /** A base class for algorithms that walk recursively over the IR @@ -159,6 +161,8 @@ class IRGraphVisitor : public IRVisitor { EXPORT virtual void visit(const Reuse *, const Stmt &); EXPORT virtual void visit(const Partition *, const Stmt &); EXPORT virtual void visit(const Stencil *, const Stmt &); + EXPORT virtual void visit(const StreamExpr *, const Expr &); + EXPORT virtual void visit(const StreamStmt *, const Stmt &); // @} }; diff --git a/tvm/include/tvm/ir.h b/tvm/include/tvm/ir.h index e66db3fb4..2dd6b86ce 100644 --- a/tvm/include/tvm/ir.h +++ b/tvm/include/tvm/ir.h @@ -21,6 +21,7 @@ using Halide::Internal::StmtNode; using Halide::Internal::IRNodeType; using Halide::Internal::ForType; using Halide::Internal::PartitionType; +using Halide::Internal::StreamType; using Halide::DeviceAPI; // Node container for CommReducer @@ -501,6 +502,8 @@ using Halide::Internal::Quantize; using Halide::Internal::KernelDef; using Halide::Internal::KernelExpr; using Halide::Internal::KernelStmt; +using Halide::Internal::StreamExpr; +using Halide::Internal::StreamStmt; using Halide::Internal::Return; using Halide::Internal::Break; using Halide::Internal::While; diff --git a/tvm/include/tvm/ir_mutator.h b/tvm/include/tvm/ir_mutator.h index 964684ec1..200534644 100644 --- a/tvm/include/tvm/ir_mutator.h +++ b/tvm/include/tvm/ir_mutator.h @@ -77,6 +77,7 @@ class TVM_DLL IRMutator { virtual Stmt Mutate_(const Reuse* op, const Stmt& s); virtual Stmt Mutate_(const Partition* op, const Stmt& s); virtual Stmt Mutate_(const Stencil* op, const Stmt& s); + virtual Stmt Mutate_(const StreamStmt* op, const Stmt& s); virtual Expr Mutate_(const Variable* op, const Expr& e); virtual Expr Mutate_(const Load* op, const Expr& e); @@ -114,6 +115,7 @@ class TVM_DLL IRMutator { virtual Expr Mutate_(const SetSlice* op, const Expr& e); virtual Expr Mutate_(const Quantize* op, const Expr& e); virtual Expr Mutate_(const KernelExpr* op, const Expr& e); + virtual Expr Mutate_(const StreamExpr* op, const Expr& e); }; /*! diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index 9dc1956c8..ba407b68e 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -209,6 +209,14 @@ class Stage : public NodeRef { * \return reference to self. */ EXPORT Stage& pipeline(IterVar var, const Expr& initiation_interval); // NOLINT(*) + /*! + * \brief create stream data channel. + * \param target The data streaming consumer. + * \param stream_type The data streaming channel type. + * \param depth The channel depth. + * \return reference to self. + */ + EXPORT Stage& stream(Stage target, ir::StreamType stream_type, int depth); // NOLINT(*) EXPORT Stage& stencil(int burst_width, int unroll_factor, int num_iteration); // NOLINT(*) /*! diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index 85f383826..8593dbe73 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -307,6 +307,13 @@ TVM_REGISTER_API("_StageFuse") *ret = fused; }); +TVM_REGISTER_API("_StageStream") +.set_body([](TVMArgs args, TVMRetValue* ret) { + args[0].operator Stage() + .stream(args[1], + static_cast(args[2].operator int()), args[3]); + }); + TVM_REGISTER_API("_StageComputeAt") .set_body([](TVMArgs args, TVMRetValue* ret) { args[0].operator Stage() diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 20b42b05e..6a6acbcb6 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -934,7 +934,6 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { stream << ") {\n"; int func_scope = BeginScope(); range_ = CollectIterRange(op->body); - PrintIndent(); PrintStmt(op->body); EndScope(func_scope); stream << "}\n\n"; diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index f6437dcae..46a711640 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -173,16 +173,20 @@ void CodeGenHLSC::VisitStmt_(const Allocate* op) { var_shape_map_[buffer] = op->extents; std::string scope = alloc_storage_scope_.at(buffer); PrintStorageScope(scope, stream); - PrintType(op->type, stream); - stream << ' '<< vid; - if (constant_size > 1) {// Transfer length one array to scalar - for (size_t i = 0; i < op->extents.size(); i++) { - stream << '['; - PrintExpr(op->extents[i], stream); - stream << "]"; + + // remove kernel alloc + if (true) { + PrintType(op->type, stream); + stream << ' '<< vid; + if (constant_size > 1) {// Transfer length one array to scalar + for (size_t i = 0; i < op->extents.size(); i++) { + stream << '['; + PrintExpr(op->extents[i], stream); + stream << "]"; + } } + stream << ";\n"; } - stream << ";\n"; buf_length_map_[buffer] = constant_size; RegisterHandleType(op->buffer_var.get(), op->type); for (size_t i = 0; i < op->attrs.size(); i++) { diff --git a/tvm/src/pass/ir_mutator.cc b/tvm/src/pass/ir_mutator.cc index 61a09c75d..a63889e78 100644 --- a/tvm/src/pass/ir_mutator.cc +++ b/tvm/src/pass/ir_mutator.cc @@ -202,6 +202,15 @@ Stmt IRMutator::Mutate_(const Store *op, const Stmt& s) { } } +Stmt IRMutator::Mutate_(const StreamStmt *op, const Stmt& s) { + Expr value = this->Mutate(op->value); + if (value.same_as(op->value)) { + return s; + } else { + return StreamStmt::make(op->buffer_var, value, op->stream_type, op->depth); + } +} + Stmt IRMutator::Mutate_(const Provide* op, const Stmt& s) { auto new_args = MutateArray(op->args, this); auto new_value = this->Mutate(op->value); @@ -402,6 +411,7 @@ TVM_STATIC_IR_FUNCTOR(IRMutator, vtable_stmt) .DISPATCH_TO_MUTATE_STMT(Prefetch) .DISPATCH_TO_MUTATE_STMT(KernelDef) .DISPATCH_TO_MUTATE_STMT(KernelStmt) +.DISPATCH_TO_MUTATE_STMT(StreamStmt) .DISPATCH_TO_MUTATE_STMT(Return) .DISPATCH_TO_MUTATE_STMT(Break) .DISPATCH_TO_MUTATE_STMT(While) @@ -430,6 +440,10 @@ Expr IRMutator::Mutate_(const Load *op, const Expr& e) { } } +Expr IRMutator::Mutate_(const StreamExpr *op, const Expr& e) { + return e; +} + Expr IRMutator::Mutate_(const Let *op, const Expr& e) { Expr value = this->Mutate(op->value); Expr body = this->Mutate(op->body); @@ -665,6 +679,7 @@ TVM_STATIC_IR_FUNCTOR(IRMutator, vtable_expr) .DISPATCH_TO_MUTATE_EXPR(SetBit) .DISPATCH_TO_MUTATE_EXPR(SetSlice) .DISPATCH_TO_MUTATE_EXPR(Quantize) +.DISPATCH_TO_MUTATE_EXPR(StreamExpr) .DISPATCH_TO_MUTATE_EXPR(KernelExpr); } // namespace ir diff --git a/tvm/src/schedule/compute_primitive.cc b/tvm/src/schedule/compute_primitive.cc index ae59872b3..71ada1681 100644 --- a/tvm/src/schedule/compute_primitive.cc +++ b/tvm/src/schedule/compute_primitive.cc @@ -147,6 +147,51 @@ class LoopFuser final : public IRMutator { std::unordered_map& sub_; }; +class StreamConsumer final : public IRMutator { + public: + StreamConsumer( + const Variable* target, + const ir::StreamType& type) + : target_(target), type_(type) {} + + // Replace with StreamExpr e.g. var.read(op. index) + Expr Mutate_(const Load* op, const Expr& e) { + Expr index = op->index; + if (op->buffer_var.get() == target_) { + return StreamExpr::make(op->type, op->buffer_var, type_, 10); + } else { + return Load::make(op->type, op->buffer_var, index, op->predicate); + } + } + private: + const Variable* target_; + const ir::StreamType type_; +}; + +class StreamProducer final : public IRMutator { + public: + StreamProducer( + const Variable* target, + const ir::StreamType& type) + : target_(target), type_(type) {} + + // Replace with StreamStmt e.g. var.write(value) + Stmt Mutate_(const Store* op, const Stmt& s) { + Expr index = op->index; + Expr value = this->Mutate(op->value); + if (op->buffer_var.get() == target_) { + // TODO: assign channel depth + return StreamStmt::make(op->buffer_var, value, type_, 10); + } else { + return Store::make(op->buffer_var, value, index, op->predicate); + } + } + + private: + const Variable* target_; + const ir::StreamType type_; +}; + class LoopReorderer final : public IRMutator { public: LoopReorderer(const Array& order) : order_(order) { @@ -503,6 +548,22 @@ Stmt ReorderLoop(Stmt& stmt, const Array& order) { return stmt; } +Stmt StreamFromProducer(Stmt& stmt, + Buffer& producer_buf, + ir::StreamType& type) { + StreamProducer mutator(producer_buf->data.get(), type); + stmt = mutator.Mutate(stmt); + return stmt; +} + +Stmt StreamToConsumer(Stmt& stmt, + Buffer& producer_buf, + ir::StreamType& type) { + StreamConsumer mutator(producer_buf->data.get(), type); + stmt = mutator.Mutate(stmt); + return stmt; +} + Stmt UpdateIterVarAttr(Stmt& stmt, const IterVar& var, const IterVarAttrNode* node) { diff --git a/tvm/src/schedule/compute_primitive.h b/tvm/src/schedule/compute_primitive.h index e65885462..e7167257c 100644 --- a/tvm/src/schedule/compute_primitive.h +++ b/tvm/src/schedule/compute_primitive.h @@ -33,6 +33,14 @@ Stmt PerformComputeAt(Stmt& producer, size_t& attach_level, std::unordered_map& sub); +Stmt StreamFromProducer(Stmt& stmt, + Buffer& producer_buf, + ir::StreamType& type); + +Stmt StreamToConsumer(Stmt& stmt, + Buffer& producer_buf, + ir::StreamType& type); + Stmt UpdateIterVarAttr(Stmt& stmt, const IterVar& var, const IterVarAttrNode* node); diff --git a/tvm/src/schedule/schedule_lang.cc b/tvm/src/schedule/schedule_lang.cc index 624c159a1..7f4af8935 100644 --- a/tvm/src/schedule/schedule_lang.cc +++ b/tvm/src/schedule/schedule_lang.cc @@ -228,6 +228,35 @@ void Reorder(StageNode* self, const Array& order) { new_stmt); } +void StreamTo(StageNode* producer, + StageNode* consumer, + ir::StreamType type, + int depth_factor) { + auto producer_op = producer->op.as(); + auto consumer_op = consumer->op.as(); + Stmt producer_stmt = producer_op->body; + Stmt consumer_stmt = consumer_op->body; + // annotate producer output & consumer input + Buffer producer_buf = producer_op->output_placeholders[0]; + // match consumer's input to producer's output + Stmt new_consumer_stmt = StreamToConsumer(consumer_stmt, producer_buf, type); + Stmt new_producer_stmt = StreamFromProducer(producer_stmt, producer_buf, type); + producer->op = ExternOpNode::make(producer_op->name, + producer_op->tag, + producer_op->axis, + producer_op->inputs, + producer_op->input_placeholders, + producer_op->output_placeholders, + new_producer_stmt); + consumer->op = ExternOpNode::make(consumer_op->name, + consumer_op->tag, + consumer_op->axis, + consumer_op->inputs, + consumer_op->input_placeholders, + consumer_op->output_placeholders, + new_consumer_stmt); +} + void ComputeAt(StageNode* producer, StageNode* consumer, const IterVar& var, @@ -415,6 +444,11 @@ Stage& Stage::fuse(IterVar outer, IterVar inner, IterVar* p_target) { // NOLINT return *this; } +Stage& Stage::stream(Stage target, ir::StreamType type, int depth) { // NOLINT(*) + StreamTo(operator->(), target.operator->(), type, depth); + return *this; +} + Stage& Stage::reorder(const Array& order) { // NOLINT(*) Reorder(operator->(), order); return *this; From ae7bebf76d2e6dfb448f8ea1a5f08c1c5da21b85 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Sat, 14 Sep 2019 11:50:07 -0400 Subject: [PATCH 036/103] [fix] kernel arg location for stream --- python/heterocl/dsl.py | 4 +-- python/heterocl/schedule.py | 5 ++-- python/heterocl/tvm/schedule.py | 32 +++++++++++++++------- samples/stream/stream.py | 39 +++++++++++++++++---------- tvm/HalideIR/src/ir/IRPrinter.cpp | 16 ++++++++++- tvm/include/tvm/schedule.h | 3 ++- tvm/src/api/api_lang.cc | 4 +-- tvm/src/schedule/compute_primitive.cc | 29 +++++++++++++------- tvm/src/schedule/schedule_lang.cc | 21 +++++++++------ 9 files changed, 103 insertions(+), 50 deletions(-) diff --git a/python/heterocl/dsl.py b/python/heterocl/dsl.py index a7bc98f95..c9aceca91 100644 --- a/python/heterocl/dsl.py +++ b/python/heterocl/dsl.py @@ -416,12 +416,12 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n inputs_tvm = [] arg_shapes = [] for shape, name_, dtype in zip(shapes, new_names, dtypes): - if shape == (): + if shape == (): var_ = placeholder((), name_, dtype) inputs.append(var_) inputs_tvm.append(var_.var) arg_shapes.append([1]) - else: # tensor inputs + else: # tensor inputs (new bufs) placeholder_ = placeholder(shape, name_, dtype) inputs.append(placeholder_) inputs_tvm.append(placeholder_.buf.data) diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index ad8d42371..f4f5e6b5a 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -328,7 +328,7 @@ def __exit__(self, ptype, value, trace): # create the output operation input_ops = [i._op for i in self.input_stages] input_bufs = [i._buf for i in self.input_stages] - output_bufs = [self._buf] + output_bufs = [self._buf] body = self.pop_stmt() Stage._current.pop() op = _ExternOp(self.name, "", self.axis_list, input_ops, @@ -357,8 +357,7 @@ def __exit__(self, ptype, value, trace): superstage.var_dict[self.name] = self # update prefix self.name_with_prefix = superstage.name_with_prefix + "." + self.name - # Otherwise update the list of stages globally - else: + else: # otherwise update the list of stages globally Schedule.stage_ops.append(self) Schedule.last_stages.add(self) Schedule.last_stages -= self.input_stages diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 9463c54fc..8183ea5b2 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -3,6 +3,7 @@ from ._ffi.base import string_types from ._ffi.node import NodeBase, register_node from ._ffi.function import _init_api +from ..devices import Device from . import _api_internal from . import tensor as _tensor from . import expr as _expr @@ -332,7 +333,7 @@ def reuse_at(self, target, parent, axis, name): def partition(self, target, partition_type, dim, factor): return _api_internal._SchedulePartition(self, target, dim, factor, partition_type) - def stream(self, tensor, stream_type): + def stream_to(self, tensor, stream_type): return _api_internal._ScheduleStream(self, tensor,stream_type) @register_node("Stage") @@ -615,20 +616,31 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) - def stream_to(self, place, types=_expr.StreamExpr.Channel, depth=10): - """Stream var to devices. + def stream_to(self, dst, src=None, types=_expr.StreamExpr.Channel, depth=10): + """Stream variables between modules and devices + + Create and return buffer for inter device data movement + Void return for inter module + Parameters ---------- - place : hcl device or stage + dst : hcl device or dst stage The device or module for streaming + src : hcl source module + The source module producing output + type : channel type + The streaming type (e.g. fifo or pipe) """ - from ..devices import Device - if isinstance(place, Device): - place = str(place) - else: # stream to modulei(stage) - assert isinstance(place, _Stage), "only support device / stage" - _api_internal._StageStream(self, place, types, depth) + + if src: # inter-module move + assert isinstance(src, _Stage), \ + "only support device / stage" + _api_internal._StageStream(self, dst, src, types, depth) + else: # return device buffer + assert isinstance(dst, Device), \ + "missing src stage or wrong device" + # return _api_internal._Stage def pragma(self, var, pragma_type): """Annotate the iteration with pragma diff --git a/samples/stream/stream.py b/samples/stream/stream.py index 75a9b9111..aa70ebf3c 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -5,10 +5,15 @@ initiation_interval = 4 a = hcl.placeholder((10, 20), name="a") b = hcl.placeholder((10, 20), name="b") -c = hcl.placeholder((10, 20), name="c", - place=hcl.FPGA("intel")) -d = hcl.placeholder((10, 20), "d") -e = hcl.placeholder((10, 20), "e") + +# auto-alloc empty buffer on fpga +# c = hcl.placeholder((10, 20), name="c", +# place=hcl.FPGA("intel")) +c = hcl.compute((10, 20), lambda x, y: 0, + name = "c") + +d = hcl.placeholder((10, 20), name="d") +e = hcl.placeholder((10, 20), name="e") def add_mul(a, b, c, d, e): @hcl.def_([a.shape, b.shape, c.shape]) @@ -17,12 +22,12 @@ def ret_add(a, b, c): with hcl.for_(0, a.shape[1]) as j: c[i, j] = a[i, j] + b[i, j] - @hcl.def_([a.shape, b.shape, c.shape]) - def ret_mul(a, b, c): + @hcl.def_([c.shape, d.shape, e.shape]) + def ret_mul(c, d, e): # hcl.update(c, lambda x, y: a[x, y] * b[x, y], 'c_mul') - with hcl.for_(0, a.shape[0]) as i: - with hcl.for_(0, a.shape[1]) as j: - c[i, j] = a[i, j] * b[i, j] + with hcl.for_(0, c.shape[0]) as i: + with hcl.for_(0, c.shape[1]) as j: + e[i, j] = c[i, j] * d[i, j] ret_add(a, b, c) ret_mul(c, d, e) @@ -35,11 +40,17 @@ def ret_mul(a, b, c): s.partition(b, dim=2, factor=2) # stream into modules / device -# s.stream([a, b], add_mul.ret_add) -s[c].stream_to(s[add_mul.ret_mul]) -# s[d].stream_to(hcl.FPGA) +# a0, b0 = s.stream_to([a, b], hcl.FPGA("intel")) +# s.stream_to([a0, b0], add_mul.ret_add) + +# within device move producer to consumer +s[c].stream_to(s[add_mul.ret_add], + s[add_mul.ret_mul]) + +# return buffer for inter-device move +d0 = s[d].stream_to(hcl.FPGA('intel')) +# print(add_mul.ret_mul._buf, c._buf) print(hcl.lower(s)) -code = hcl.build(s, target="vhls") -print(code) +print(hcl.build(s, target="vhls")) diff --git a/tvm/HalideIR/src/ir/IRPrinter.cpp b/tvm/HalideIR/src/ir/IRPrinter.cpp index 4c5463d73..b6f3e6082 100644 --- a/tvm/HalideIR/src/ir/IRPrinter.cpp +++ b/tvm/HalideIR/src/ir/IRPrinter.cpp @@ -336,6 +336,19 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) } }); +TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) +.set_dispatch([](const StreamStmt *op, IRPrinter* p) { + p->do_indent(); + p->stream << op->buffer_var << ".write("; + p->print(op->value); + p->stream << ")\n"; +}); + +TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) +.set_dispatch([](const StreamExpr *op, IRPrinter* p) { + p->stream << op->buffer_var << ".read()"; +}); + TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const Ramp *op, IRPrinter* p) { p->stream << "ramp("; @@ -723,6 +736,7 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) p->do_indent(); p->stream << "def " << op->name << "("; for (size_t i = 0; i < op->args.size(); i++) { + p->stream << op->args[i].type() << "("; // handle type p->print(op->args[i]); if (op->api_args[i].size() > 1) { p->stream << "["; @@ -730,7 +744,7 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) p->print(op->api_args[i][j]); if (j < op->api_args[i].size() - 1) p->stream << "*"; } - p->stream << "]"; + p->stream << "])"; } if (i < op->args.size() - 1) { p->stream << ", "; diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index ba407b68e..38629dfdc 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -216,7 +216,8 @@ class Stage : public NodeRef { * \param depth The channel depth. * \return reference to self. */ - EXPORT Stage& stream(Stage target, ir::StreamType stream_type, int depth); // NOLINT(*) + EXPORT Stage& stream(Stage dest, Stage source, + ir::StreamType stream_type, int depth); // NOLINT(*) EXPORT Stage& stencil(int burst_width, int unroll_factor, int num_iteration); // NOLINT(*) /*! diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index 8593dbe73..428d02e29 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -310,8 +310,8 @@ TVM_REGISTER_API("_StageFuse") TVM_REGISTER_API("_StageStream") .set_body([](TVMArgs args, TVMRetValue* ret) { args[0].operator Stage() - .stream(args[1], - static_cast(args[2].operator int()), args[3]); + .stream(args[1], args[2], + static_cast(args[3].operator int()), args[4]); }); TVM_REGISTER_API("_StageComputeAt") diff --git a/tvm/src/schedule/compute_primitive.cc b/tvm/src/schedule/compute_primitive.cc index 71ada1681..648f48aad 100644 --- a/tvm/src/schedule/compute_primitive.cc +++ b/tvm/src/schedule/compute_primitive.cc @@ -150,28 +150,33 @@ class LoopFuser final : public IRMutator { class StreamConsumer final : public IRMutator { public: StreamConsumer( - const Variable* target, + const std::string& target, const ir::StreamType& type) : target_(target), type_(type) {} // Replace with StreamExpr e.g. var.read(op. index) Expr Mutate_(const Load* op, const Expr& e) { Expr index = op->index; - if (op->buffer_var.get() == target_) { + std::string target_name = op->buffer_var.get()->name_hint; + if (has_suffix(target_name, "." + target_)) { return StreamExpr::make(op->type, op->buffer_var, type_, 10); } else { return Load::make(op->type, op->buffer_var, index, op->predicate); } } private: - const Variable* target_; + const std::string target_; const ir::StreamType type_; + bool has_suffix(const std::string &str, const std::string &suffix) { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; + } }; class StreamProducer final : public IRMutator { public: StreamProducer( - const Variable* target, + const std::string& target, const ir::StreamType& type) : target_(target), type_(type) {} @@ -179,8 +184,8 @@ class StreamProducer final : public IRMutator { Stmt Mutate_(const Store* op, const Stmt& s) { Expr index = op->index; Expr value = this->Mutate(op->value); - if (op->buffer_var.get() == target_) { - // TODO: assign channel depth + std::string target_name = op->buffer_var.get()->name_hint; + if (has_suffix(target_name, "." + target_)) { return StreamStmt::make(op->buffer_var, value, type_, 10); } else { return Store::make(op->buffer_var, value, index, op->predicate); @@ -188,8 +193,12 @@ class StreamProducer final : public IRMutator { } private: - const Variable* target_; + const std::string target_; const ir::StreamType type_; + bool has_suffix(const std::string &str, const std::string &suffix) { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; + } }; class LoopReorderer final : public IRMutator { @@ -551,7 +560,8 @@ Stmt ReorderLoop(Stmt& stmt, const Array& order) { Stmt StreamFromProducer(Stmt& stmt, Buffer& producer_buf, ir::StreamType& type) { - StreamProducer mutator(producer_buf->data.get(), type); + std::string target_name = producer_buf.operator->()->name; + StreamProducer mutator(target_name, type); stmt = mutator.Mutate(stmt); return stmt; } @@ -559,7 +569,8 @@ Stmt StreamFromProducer(Stmt& stmt, Stmt StreamToConsumer(Stmt& stmt, Buffer& producer_buf, ir::StreamType& type) { - StreamConsumer mutator(producer_buf->data.get(), type); + std::string target_name = producer_buf.operator->()->name; + StreamConsumer mutator(target_name, type); stmt = mutator.Mutate(stmt); return stmt; } diff --git a/tvm/src/schedule/schedule_lang.cc b/tvm/src/schedule/schedule_lang.cc index 7f4af8935..3f4e360f0 100644 --- a/tvm/src/schedule/schedule_lang.cc +++ b/tvm/src/schedule/schedule_lang.cc @@ -228,19 +228,22 @@ void Reorder(StageNode* self, const Array& order) { new_stmt); } -void StreamTo(StageNode* producer, +void StreamTo(StageNode* target, + StageNode* producer, StageNode* consumer, ir::StreamType type, int depth_factor) { + // target op initialized as externop with buffer auto producer_op = producer->op.as(); auto consumer_op = consumer->op.as(); Stmt producer_stmt = producer_op->body; Stmt consumer_stmt = consumer_op->body; - // annotate producer output & consumer input - Buffer producer_buf = producer_op->output_placeholders[0]; - // match consumer's input to producer's output - Stmt new_consumer_stmt = StreamToConsumer(consumer_stmt, producer_buf, type); - Stmt new_producer_stmt = StreamFromProducer(producer_stmt, producer_buf, type); + // track the argument name for data moving + auto target_op = target->op.as(); + Buffer target_buf = target_op->output_placeholders[0]; + // mutate kernel and load operators inside + Stmt new_consumer_stmt = StreamToConsumer(consumer_stmt, target_buf, type); + Stmt new_producer_stmt = StreamFromProducer(producer_stmt, target_buf, type); producer->op = ExternOpNode::make(producer_op->name, producer_op->tag, producer_op->axis, @@ -444,8 +447,10 @@ Stage& Stage::fuse(IterVar outer, IterVar inner, IterVar* p_target) { // NOLINT return *this; } -Stage& Stage::stream(Stage target, ir::StreamType type, int depth) { // NOLINT(*) - StreamTo(operator->(), target.operator->(), type, depth); +Stage& Stage::stream(Stage dest, Stage source, + ir::StreamType type, int depth) { // NOLINT(*) + StreamTo(operator->(), dest.operator->(), + source.operator->(), type, depth); return *this; } From 83e4d7e0fb53fda5a2dea0600934e7b5b400a39a Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 13:18:39 -0400 Subject: [PATCH 037/103] opt1 --- tvm/src/codegen/opencl/codegen_aocl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 30797e36c..881ad82f8 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -39,12 +39,13 @@ class CodeGenAOCL final : public CodeGenC { std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) //overload visitor - void VisitStmt_(const LetStmt* op) final; // NOLINT(*) void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) + void VisitStmt_(const LetStmt* op) final; // NOLINT(*) + private: // whether enable fp16 and fp64 extension From df48ef911f77e4c4e20bb4ae2c0ffc7749633729 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 15:11:37 -0400 Subject: [PATCH 038/103] opencl-general --- tvm/src/codegen/opencl/build_opencl.cc | 30 ++++++++++++++++++++++++ tvm/src/codegen/opencl/codegen_aocl.cc | 2 ++ tvm/src/codegen/opencl/codegen_aocl.h | 3 +++ tvm/src/codegen/opencl/codegen_opencl.cc | 0 tvm/src/codegen/opencl/codegen_opencl.h | 0 5 files changed, 35 insertions(+) create mode 100644 tvm/src/codegen/opencl/codegen_opencl.cc create mode 100644 tvm/src/codegen/opencl/codegen_opencl.h diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index bba56d818..4717bf05f 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -251,6 +251,36 @@ TVM_REGISTER_API("codegen.build_aocl") * rv = BuildAOCL(args[0]); }); + + +// template mode for opencl +// template +// std::string BuildHLSC(Array funcs) { +// CodeAnalysOpenCLC ca; +// CodeGen cg; +// for (LoweredFunc f : funcs) { +// // 1st pass: Analyze AST and collect necessary information +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); +// // 2nd pass: Generate kernel code +// cg.AddFunction(f, map_arg_type); +// } +// std::string code = cg.Finish(); + +// LOG(WARNING) << "OpenCL C doesn't have runtime, return kernel code"; +// return code; +// } + +// TVM_REGISTER_API("codegen.build_sdaccel") +// .set_body([](TVMArgs args, TVMRetValue* rv) { +// *rv = BuildHLSC(args[0]); +// }); +// TVM_REGISTER_API("codegen.build_aocl") +// .set_body([](TVMArgs args, TVMRetValue* rv) { +// *rv = BuildHLSC(args[0]); +// }); + // For runtime // TVM_REGISTER_API("codegen.build_sdaccel_xclbin") // .set_body([]( TVMArgs args, TVMRetValue * rv ) { diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 5bbb71051..ec321cc17 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -420,5 +420,7 @@ void CodeGenAOCL::VisitStmt_(const IfThenElse* op) { stream << "}\n"; } + + } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 881ad82f8..1506e82f5 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -45,6 +45,9 @@ class CodeGenAOCL final : public CodeGenC { void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) void VisitStmt_(const LetStmt* op) final; // NOLINT(*) + void GenForStmt(const For* op, std::string pragma, bool before); + void VisitStmt_(const For* op) override; // NOLINT(*) + private: diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc new file mode 100644 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h new file mode 100644 index 000000000..e69de29bb From d51970ad77c193a97cc200edc6aa031e753e17bc Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 15:20:42 -0400 Subject: [PATCH 039/103] new-version --- tvm/src/codegen/opencl/codegen_aocl.h | 2 - tvm/src/codegen/opencl/codegen_opencl.cc | 424 +++++++++++++++++++++++ tvm/src/codegen/opencl/codegen_opencl.h | 60 ++++ 3 files changed, 484 insertions(+), 2 deletions(-) diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 1506e82f5..6d0851363 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -45,8 +45,6 @@ class CodeGenAOCL final : public CodeGenC { void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) void VisitStmt_(const LetStmt* op) final; // NOLINT(*) - void GenForStmt(const For* op, std::string pragma, bool before); - void VisitStmt_(const For* op) override; // NOLINT(*) diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index e69de29bb..d8cc9e774 100644 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -0,0 +1,424 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_opencl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +CodeGenSDACCEL::CodeGenSDACCEL() { + restrict_keyword_ = "restrict"; +} + +void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } +} + + +// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { +// this->stream << "__kernel "; +// CodeGenC::AddFunction(f); +// } + +// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { + // this->stream << "# pragma once\n"; + // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n\n"; + // this->stream << "__kernel "; + +// CodeGenC::AddFunction(f); +// } + +void CodeGenSDACCEL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Write head files + // stream.open("host.cpp"); + // this->stream << "# pragma once\n"; + // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n"; + // this->stream << "# include \n\n"; + + // Write entry function name + // this->stream << "__kernel " << f->name << "("; + // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; + // this->stream << f->name << "("; + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; +} + + + + +// void CodeGenSDACCEL::AddFunction(LoweredFunc f, +// str2tupleMap map_arg_type) { +// // Don't Write header flies +// // Clear previous generated state +// this->InitFuncState(f); +// // Register alloc buffer type +// for ( const auto & kv : f->handle_data_type ) { +// this->stream << kv.first.get(); +// this->stream << kv.second.type(); +// RegisterHandleType(kv.first.get(), kv.second.type()); +// } +// // Write entry function name +// this->stream << "__kernel "; +// // Write arguments +// for ( size_t i = 0; i < f->args.size(); i++ ) { +// Var v = f->args[i]; +// std::string vid = AllocVarID(v.get()); +// if ( i!= 0 ) { +// this->stream << ", "; +// } +// if ( map_arg_type.find(vid) == map_arg_type.end()) { +// LOG(WARNING) << vid << " type not found\n"; +// PrintType(v.type(), this->stream); +// this->stream << ' ' << vid; +// } +// else { +// auto arg = map_arg_type[vid]; +// PrintType(std::get<1>(arg), this->stream); +// if (v.type().is_handle()) { +// this->stream << "*"; +// } +// this->stream << ' ' << std::get<0>(arg); + +// } +// stream << ") {\n"; +// int func_scope = this->BeginScope(); +// this->PrintStmt(f->body); +// this->EndScope(func_scope); +// this->PrintIndent(); +// this->stream << "}\n\n"; +// } +// CodeGenSDACCEL::AddFunction(f, map_arg_type); +// } + +std::string CodeGenSDACCEL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenSDACCEL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + +void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + if ( t== Bool() ) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: + os << "float"; + break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; +} + +void CodeGenSDACCEL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenSDACCEL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenSDACCEL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenSDACCEL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + +// void CodeGenSDACCEL::PrintStorageScope( +// const std::string& scope, std::ostream& os) { // NOLINT(*) +// if (scope == "global") { +// os << "__global "; +// } else if (scope == "shared") { +// os << "__local "; +// } +// } + +void CodeGenSDACCEL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "global "; + } else if (scope == "shared") { + os << "local "; + } +} + + +std::string CodeGenSDACCEL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenSDACCEL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenSDACCEL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintType(op->args[2].type(), os); + os << ")"; + } + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + + +void CodeGenSDACCEL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) + if (std::isinf(op->value)) { + if ( op->value < 0) { + os << "-"; + } + os << "INFINITY"; + } else if (std::isnan(op->value)) { + os << "NAN"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenSDACCEL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) + os << "("; + PrintType(op->true_value.type(), os); + os << ")"; + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index e69de29bb..2a165b818 100644 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -0,0 +1,60 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ +#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "../codegen_c.h" + +namespace TVM { +namespace codegen { + + +class CodeGenOpenCL final : public CodeGenC { + public: + CodeGenOpenCL(); + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + std::string Finish(); + + void InitFuncState(LoweredFunc f) final; + void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) + void PrintStorageSync(const Call* op) final; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + + std::string GetVecLoad(Type t, const Variable * buffer, + Expr base) final; // NOLINT(*) + void PrintVecStore(const Variable * buffer, Type t, + Expr base, const std::string& value) final; //NOLINT(*) + void PrintVecAddr(const Variable * buffer, Type t, + Expr base, std::ostream& os); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) + + //overload visitor + void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) + void GenForStmt(const For* op, std::string pragma, bool before); + void VisitStmt_(const LetStmt* op) final; // NOLINT(*) + + + private: + // whether enable fp16 and fp64 extension + bool enable_fp16_{false}; + bool enable_fp64_{false}; + +}; +} // namespace codegen +} // namespace TVM + +#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file From 530ce5d7ae223e35010398d83b33a6e23e32bd58 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 16:57:59 -0400 Subject: [PATCH 040/103] no bug --- tvm/src/codegen/opencl/build_opencl.cc | 104 +++++++++++++++-------- tvm/src/codegen/opencl/codegen_aocl.h | 30 ++++--- tvm/src/codegen/opencl/codegen_opencl.cc | 73 +++++++++++----- tvm/src/codegen/opencl/codegen_opencl.h | 35 ++++---- tvm/src/codegen/opencl/codegen_sdaccel.h | 30 ++++--- 5 files changed, 168 insertions(+), 104 deletions(-) diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 4717bf05f..c72627127 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -10,11 +10,13 @@ # include # include "../../runtime/meta_data.h" # include -# include "./codegen_sdaccel.h" -# include "./codegen_aocl.h" +# include "./codegen_opencl.h" +// # include "./codegen_sdaccel.h" +// # include "./codegen_aocl.h" # include "./codeanalys_openclc.h" # include "../build_common.h" -# include "./sdaccel/sdaccel_module.h" +// # include "./sdaccel/sdaccel_module.h" +// # include "./aocl/aocl_module.h" namespace TVM { @@ -147,38 +149,63 @@ namespace codegen { // } -// codegen for SDACCEL_WITH_ANALYSIS -std::string BuildSDACCEL(Array funcs) { - using TVM::runtime::Registry; - CodeAnalysOpenCLC ca; - CodeGenSDACCEL cg; - for (LoweredFunc f : funcs) { - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); +// codegen for SDACCEL_WITH_ANALYSIS xxx +// std::string BuildSDACCEL(Array funcs) { +// using TVM::runtime::Registry; +// CodeAnalysOpenCLC ca; +// CodeGenSDACCEL cg; +// for (LoweredFunc f : funcs) { +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); - cg.AddFunction(f, map_arg_type); +// cg.AddFunction(f, map_arg_type); - } - std::string code = cg.Finish(); +// } +// std::string code = cg.Finish(); - if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { - code = (*f)(code).operator std::string(); - } +// if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { +// code = (*f)(code).operator std::string(); +// } - LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; - return code; -} +// LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; +// return code; +// } // runtime::Module BuildSDACCELXCLBIN(Array funcs) -// codegen for AOCL_WITH_ANALYSIS -std::string BuildAOCL(Array funcs) { +// codegen for AOCL_WITH_ANALYSIS xxx +// std::string BuildAOCL(Array funcs) { +// using TVM::runtime::Registry; +// CodeAnalysOpenCLC ca; +// CodeGenAOCL cg; +// for (LoweredFunc f : funcs) { +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); + +// cg.AddFunction(f, map_arg_type); + +// } +// std::string code = cg.Finish(); + +// if (const auto* f = Registry::Get("tvm_callback_aocl_postproc")) { +// code = (*f)(code).operator std::string(); +// } + +// LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; +// return code; +// } + + + +// codegen for OPENCL_WITH_ANALYSIS xxx +std::string BuildOpenCL(Array funcs) { using TVM::runtime::Registry; CodeAnalysOpenCLC ca; - CodeGenAOCL cg; + CodeGenOpenCL cg; for (LoweredFunc f : funcs) { ca.AddFunction(f); str2tupleMap map_arg_type; @@ -189,16 +216,18 @@ std::string BuildAOCL(Array funcs) { } std::string code = cg.Finish(); - if (const auto* f = Registry::Get("tvm_callback_aocl_postproc")) { + if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { code = (*f)(code).operator std::string(); } - LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; + LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; return code; } + + // codegen for OpenCL // std::string BuildOpenCL(Array funcs) { // using TVM::runtime::Registry; @@ -241,21 +270,24 @@ std::string BuildAOCL(Array funcs) { -TVM_REGISTER_API("codegen.build_sdaccel") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildSDACCEL(args[0]); - }); +// TVM_REGISTER_API("codegen.build_sdaccel") +// .set_body([]( TVMArgs args, TVMRetValue * rv ) { +// * rv = BuildSDACCEL(args[0]); +// }); -TVM_REGISTER_API("codegen.build_aocl") +// TVM_REGISTER_API("codegen.build_aocl") +// .set_body([]( TVMArgs args, TVMRetValue * rv ) { +// * rv = BuildAOCL(args[0]); +// }); +TVM_REGISTER_API("codegen.build_opencl") .set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildAOCL(args[0]); + * rv = BuildOpenCL(args[0]); }); - // template mode for opencl // template -// std::string BuildHLSC(Array funcs) { +// std::string BuildOpenCL(Array funcs) { // CodeAnalysOpenCLC ca; // CodeGen cg; // for (LoweredFunc f : funcs) { @@ -274,11 +306,11 @@ TVM_REGISTER_API("codegen.build_aocl") // TVM_REGISTER_API("codegen.build_sdaccel") // .set_body([](TVMArgs args, TVMRetValue* rv) { -// *rv = BuildHLSC(args[0]); +// *rv = BuildOpenCL(args[0]); // }); // TVM_REGISTER_API("codegen.build_aocl") // .set_body([](TVMArgs args, TVMRetValue* rv) { -// *rv = BuildHLSC(args[0]); +// *rv = BuildOpenCL(args[0]); // }); // For runtime diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 6d0851363..6426c13f2 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -11,40 +11,42 @@ # include # include "./codeanalys_openclc.h" # include "../codegen_c.h" +# include "./codegen_opencl.h" + namespace TVM { namespace codegen { -class CodeGenAOCL final : public CodeGenC { +class CodeGenAOCL final : public CodeGenOpenCL { public: CodeGenAOCL(); // void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); std::string Finish(); - void InitFuncState(LoweredFunc f) final; - void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) - void PrintStorageSync(const Call* op) final; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + void InitFuncState(LoweredFunc f) override; + void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void PrintStorageSync(const Call* op) override; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) void PrintType(Type t, std::ostream& os) override; //NOLINT(*) std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) final; // NOLINT(*) + Expr base) override; // NOLINT(*) void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) final; //NOLINT(*) + Expr base, const std::string& value) override; //NOLINT(*) void PrintVecAddr(const Variable * buffer, Type t, Expr base, std::ostream& os); //NOLINT(*) std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) - void VisitStmt_(const LetStmt* op) final; // NOLINT(*) + void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) + void VisitStmt_(const LetStmt* op) override; // NOLINT(*) diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index d8cc9e774..0ac068121 100644 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -14,11 +14,11 @@ namespace TVM { namespace codegen { -CodeGenSDACCEL::CodeGenSDACCEL() { +CodeGenOpenCL::CodeGenOpenCL() { restrict_keyword_ = "restrict"; } -void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { +void CodeGenOpenCL::InitFuncState(LoweredFunc f) { CodeGenC::InitFuncState(f); for (Var arg: f->args) { if (arg.type().is_handle()) { @@ -28,12 +28,12 @@ void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { } -// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { +// void CodeGenOpenCL::AddFunction(LoweredFunc f) { // this->stream << "__kernel "; // CodeGenC::AddFunction(f); // } -// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { +// void CodeGenOpenCL::AddFunction(LoweredFunc f) { // this->stream << "# pragma once\n"; // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; @@ -50,7 +50,7 @@ void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { // CodeGenC::AddFunction(f); // } -void CodeGenSDACCEL::AddFunction(LoweredFunc f, +void CodeGenOpenCL::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Clear previous generated state this->InitFuncState(f); @@ -115,7 +115,7 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, -// void CodeGenSDACCEL::AddFunction(LoweredFunc f, +// void CodeGenOpenCL::AddFunction(LoweredFunc f, // str2tupleMap map_arg_type) { // // Don't Write header flies // // Clear previous generated state @@ -156,10 +156,10 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, // this->PrintIndent(); // this->stream << "}\n\n"; // } -// CodeGenSDACCEL::AddFunction(f, map_arg_type); +// CodeGenOpenCL::AddFunction(f, map_arg_type); // } -std::string CodeGenSDACCEL::Finish() { +std::string CodeGenOpenCL::Finish() { // inject extension enable pragma for fp16 and fp64 if (enable_fp16_) { decl_stream @@ -188,7 +188,7 @@ std::string CodeGenSDACCEL::Finish() { return CodeGenC::Finish(); } -void CodeGenSDACCEL::BindThreadIndex(const IterVar& iv) { +void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { CHECK(!var_idmap_.count(iv->var.get())); runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); std::ostringstream os; @@ -201,7 +201,7 @@ void CodeGenSDACCEL::BindThreadIndex(const IterVar& iv) { CastFromTo(os.str(), UInt(64), iv->var.type()); } -void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) +void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { CHECK_EQ(lanes, 1) @@ -257,7 +257,7 @@ void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; } -void CodeGenSDACCEL::PrintVecAddr(const Variable* buffer, Type t, +void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, Expr base, std::ostream& os) { // NOLINT(*) if (!HandleTypeMatch(buffer, t.element_of())) { os << '('; @@ -272,7 +272,7 @@ void CodeGenSDACCEL::PrintVecAddr(const Variable* buffer, Type t, os << GetVarID(buffer) << " + "; PrintExpr(base, os); } -std::string CodeGenSDACCEL::GetVecLoad( +std::string CodeGenOpenCL::GetVecLoad( Type t, const Variable* buffer, Expr base) { std::ostringstream os; os << "vload" << t.lanes() << "(0, "; @@ -281,7 +281,7 @@ std::string CodeGenSDACCEL::GetVecLoad( return os.str(); } -void CodeGenSDACCEL::PrintVecStore(const Variable* buffer, +void CodeGenOpenCL::PrintVecStore(const Variable* buffer, Type t, Expr base, const std::string& value) { this->PrintIndent(); @@ -290,7 +290,7 @@ void CodeGenSDACCEL::PrintVecStore(const Variable* buffer, stream << ");\n"; } -void CodeGenSDACCEL::PrintStorageSync(const Call* op) { +void CodeGenOpenCL::PrintStorageSync(const Call* op) { const std::string& sync = op->args[0].as()->value; if (sync == "warp") { LOG(FATAL) << "warp sync not supported in opencl"; @@ -302,7 +302,7 @@ void CodeGenSDACCEL::PrintStorageSync(const Call* op) { } } -// void CodeGenSDACCEL::PrintStorageScope( +// void CodeGenOpenCL::PrintStorageScope( // const std::string& scope, std::ostream& os) { // NOLINT(*) // if (scope == "global") { // os << "__global "; @@ -311,7 +311,7 @@ void CodeGenSDACCEL::PrintStorageSync(const Call* op) { // } // } -void CodeGenSDACCEL::PrintStorageScope( +void CodeGenOpenCL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global") { os << "global "; @@ -321,7 +321,7 @@ void CodeGenSDACCEL::PrintStorageScope( } -std::string CodeGenSDACCEL::CastFromTo(std::string value, Type from, Type target) { +std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { if (from == target) return value; std::ostringstream os; if (target.lanes() == 1) { @@ -337,7 +337,7 @@ std::string CodeGenSDACCEL::CastFromTo(std::string value, Type from, Type target return os.str(); } -void CodeGenSDACCEL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) +void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) std::string v = PrintExpr(op->value); os << "(("; PrintType(op->type, os); @@ -349,7 +349,7 @@ void CodeGenSDACCEL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NO os << "))"; } -void CodeGenSDACCEL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) +void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { os << "("; PrintType(op->args[2].type(), os); @@ -358,7 +358,7 @@ void CodeGenSDACCEL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(* CodeGenC::VisitExpr_(op, os); } -void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { +void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { std::string value = PrintExpr(op->value); // Skip the argument retrieving assign statement std::string vid = AllocVarID(op->var.get()); @@ -375,7 +375,7 @@ void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { } -void CodeGenSDACCEL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) +void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) if (std::isinf(op->value)) { if ( op->value < 0) { os << "-"; @@ -388,14 +388,14 @@ void CodeGenSDACCEL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLI } } -void CodeGenSDACCEL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) +void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) os << "("; PrintType(op->true_value.type(), os); os << ")"; CodeGenC::VisitExpr_(op, os); } -void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { +void CodeGenOpenCL::VisitStmt_(const IfThenElse* op) { std::string cond = PrintExpr(op->condition); // Skip the buffer data checking if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) @@ -420,5 +420,32 @@ void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { stream << "}\n"; } +void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { + std::string extent = PrintExpr(op->extent); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + if (before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + PrintIndent(); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + if (!before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + + + } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index 2a165b818..8b992ec7b 100644 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -3,8 +3,8 @@ yb269@cornell.edu */ -#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ -#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ +#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ +#define TVM_CODEGEN_CODEGEN_OPENCL_H_ # include # include @@ -16,36 +16,37 @@ namespace TVM { namespace codegen { -class CodeGenOpenCL final : public CodeGenC { +class CodeGenOpenCL : public CodeGenC { public: CodeGenOpenCL(); // void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); std::string Finish(); - void InitFuncState(LoweredFunc f) final; - void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) - void PrintStorageSync(const Call* op) final; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + void InitFuncState(LoweredFunc f) override; + void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void PrintStorageSync(const Call* op) override; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) void PrintType(Type t, std::ostream& os) override; //NOLINT(*) std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) final; // NOLINT(*) + Expr base) override; // NOLINT(*) void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) final; //NOLINT(*) + Expr base, const std::string& value) override; //NOLINT(*) void PrintVecAddr(const Variable * buffer, Type t, Expr base, std::ostream& os); //NOLINT(*) std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) + void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) + void VisitStmt_(const LetStmt* op) override; // NOLINT(*) void GenForStmt(const For* op, std::string pragma, bool before); - void VisitStmt_(const LetStmt* op) final; // NOLINT(*) + private: @@ -57,4 +58,4 @@ class CodeGenOpenCL final : public CodeGenC { } // namespace codegen } // namespace TVM -#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file +#endif // TVM_CODEGEN_CODEGEN_OPENCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index 4e7f0e746..ff3d76662 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -11,40 +11,42 @@ # include # include "./codeanalys_openclc.h" # include "../codegen_c.h" +# include "./codegen_opencl.h" namespace TVM { namespace codegen { -class CodeGenSDACCEL final : public CodeGenC { +class CodeGenSDACCEL final : public CodeGenOpenCL { public: CodeGenSDACCEL(); // void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); std::string Finish(); - void InitFuncState(LoweredFunc f) final; - void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) final; //NOLINT(*) - void PrintStorageSync(const Call* op) final; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) final; //NOLINT(*) + void InitFuncState(LoweredFunc f) override; + void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void PrintStorageSync(const Call* op) override; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) void PrintType(Type t, std::ostream& os) override; //NOLINT(*) std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) final; // NOLINT(*) + Expr base) override; // NOLINT(*) void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) final; //NOLINT(*) + Expr base, const std::string& value) override; //NOLINT(*) void PrintVecAddr(const Variable * buffer, Type t, Expr base, std::ostream& os); //NOLINT(*) std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) //overload visitor - void VisitStmt_(const LetStmt* op) final; // NOLINT(*) - void VisitExpr_(const Broadcast * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) final; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) final; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) final; //NOLINT(*) + void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) + void VisitStmt_(const LetStmt* op) override; // NOLINT(*) + private: // whether enable fp16 and fp64 extension From 64d91e508a041f102ebcde5f1fa103f64f0e4813 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Sun, 18 Aug 2019 17:05:04 -0400 Subject: [PATCH 041/103] a --- tvm/src/codegen/opencl/codegen_opencl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index 8b992ec7b..bf2c298d3 100644 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -45,7 +45,7 @@ class CodeGenOpenCL : public CodeGenC { void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) void VisitStmt_(const LetStmt* op) override; // NOLINT(*) - void GenForStmt(const For* op, std::string pragma, bool before); + // void GenForStmt(const For* op, std::string pragma, bool before); From e1233634494080f14bd408ab6060000945cd527f Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 19 Aug 2019 17:56:32 -0400 Subject: [PATCH 042/103] test+unroll+pipeline --- tests/test_codegen_opencl.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_codegen_opencl.py diff --git a/tests/test_codegen_opencl.py b/tests/test_codegen_opencl.py new file mode 100644 index 000000000..521e28e38 --- /dev/null +++ b/tests/test_codegen_opencl.py @@ -0,0 +1,29 @@ +import heterocl as hcl + +def test_pragma(): + hcl.init(hcl.Float()) + A = hcl.placeholder((10, 32), "A") + B = hcl.placeholder((10, 32)) + C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) + s1 = hcl.create_schedule([A, B, C]) + s1[C].unroll(C.axis[1], factor=4) + code1 = hcl.build(s1, target='aocl') + code11 = hcl.build(s1, target='sdaccel') + print (code1) + assert "#pragma unroll 4" in code1 + print (code11) + assert "__attribute__((opencl_unroll_hint(4)))" in code11 + # pipeline + s2 = hcl.create_schedule([A, B, C]) + s2[C].pipeline(C.axis[0], initiation_interval=2) + code2 = hcl.build(s2, target='aocl') + code22 = hcl.build(s2, target='sdaccel') + print (code2) + assert "#pragma ii 2" in code2 + print (code22) + assert "__attribute__((xcl_pipeline_loop(2)))" in code22 + + + +if __name__ == '__main__': + test_pragma() \ No newline at end of file From 0cdceb82d3c9de969589848a8ca8502a59ebbc42 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 21 Aug 2019 09:36:56 -0400 Subject: [PATCH 043/103] pragma --- tvm/src/codegen/opencl/build_opencl.cc | 122 +++--- tvm/src/codegen/opencl/codegen_aocl.cc | 64 +++ tvm/src/codegen/opencl/codegen_aocl.h | 5 +- tvm/src/codegen/opencl/codegen_opencl.cc | 451 ---------------------- tvm/src/codegen/opencl/codegen_opencl.h | 61 --- tvm/src/codegen/opencl/codegen_sdaccel.cc | 76 ++++ tvm/src/codegen/opencl/codegen_sdaccel.h | 6 +- 7 files changed, 208 insertions(+), 577 deletions(-) delete mode 100644 tvm/src/codegen/opencl/codegen_opencl.cc delete mode 100644 tvm/src/codegen/opencl/codegen_opencl.h diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index c72627127..5556a2938 100644 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -10,9 +10,8 @@ # include # include "../../runtime/meta_data.h" # include -# include "./codegen_opencl.h" -// # include "./codegen_sdaccel.h" -// # include "./codegen_aocl.h" +# include "./codegen_sdaccel.h" +# include "./codegen_aocl.h" # include "./codeanalys_openclc.h" # include "../build_common.h" // # include "./sdaccel/sdaccel_module.h" @@ -150,62 +149,38 @@ namespace codegen { // codegen for SDACCEL_WITH_ANALYSIS xxx -// std::string BuildSDACCEL(Array funcs) { -// using TVM::runtime::Registry; -// CodeAnalysOpenCLC ca; -// CodeGenSDACCEL cg; -// for (LoweredFunc f : funcs) { -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); +std::string BuildSDACCEL(Array funcs) { + using TVM::runtime::Registry; + CodeAnalysOpenCLC ca; + CodeGenSDACCEL cg; + for (LoweredFunc f : funcs) { + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); -// cg.AddFunction(f, map_arg_type); + cg.AddFunction(f, map_arg_type); -// } -// std::string code = cg.Finish(); + } + std::string code = cg.Finish(); -// if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { -// code = (*f)(code).operator std::string(); -// } + if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { + code = (*f)(code).operator std::string(); + } -// LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; -// return code; -// } - -// runtime::Module BuildSDACCELXCLBIN(Array funcs) - - - -// codegen for AOCL_WITH_ANALYSIS xxx -// std::string BuildAOCL(Array funcs) { -// using TVM::runtime::Registry; -// CodeAnalysOpenCLC ca; -// CodeGenAOCL cg; -// for (LoweredFunc f : funcs) { -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); - -// cg.AddFunction(f, map_arg_type); + LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; + return code; +} -// } -// std::string code = cg.Finish(); -// if (const auto* f = Registry::Get("tvm_callback_aocl_postproc")) { -// code = (*f)(code).operator std::string(); -// } - -// LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; -// return code; -// } +//runtime::Module BuildSDACCELXCLBIN(Array funcs) -// codegen for OPENCL_WITH_ANALYSIS xxx -std::string BuildOpenCL(Array funcs) { +// codegen for AOCL_WITH_ANALYSIS xxx +std::string BuildAOCL(Array funcs) { using TVM::runtime::Registry; CodeAnalysOpenCLC ca; - CodeGenOpenCL cg; + CodeGenAOCL cg; for (LoweredFunc f : funcs) { ca.AddFunction(f); str2tupleMap map_arg_type; @@ -216,16 +191,41 @@ std::string BuildOpenCL(Array funcs) { } std::string code = cg.Finish(); - if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { + if (const auto* f = Registry::Get("tvm_callback_aocl_postproc")) { code = (*f)(code).operator std::string(); } - LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; + LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; return code; } +// codegen for OPENCL_WITH_ANALYSIS xxx +// std::string BuildOpenCL(Array funcs) { +// using TVM::runtime::Registry; +// CodeAnalysOpenCLC ca; +// CodeGenOpenCL cg; +// for (LoweredFunc f : funcs) { +// ca.AddFunction(f); +// str2tupleMap map_arg_type; +// map_arg_type = ca.Finish(); + +// cg.AddFunction(f, map_arg_type); + +// } +// std::string code = cg.Finish(); + +// if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { +// code = (*f)(code).operator std::string(); +// } + +// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; +// return code; +// } + + + // codegen for OpenCL @@ -270,19 +270,19 @@ std::string BuildOpenCL(Array funcs) { -// TVM_REGISTER_API("codegen.build_sdaccel") -// .set_body([]( TVMArgs args, TVMRetValue * rv ) { -// * rv = BuildSDACCEL(args[0]); -// }); +TVM_REGISTER_API("codegen.build_sdaccel") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildSDACCEL(args[0]); + }); -// TVM_REGISTER_API("codegen.build_aocl") -// .set_body([]( TVMArgs args, TVMRetValue * rv ) { -// * rv = BuildAOCL(args[0]); -// }); -TVM_REGISTER_API("codegen.build_opencl") +TVM_REGISTER_API("codegen.build_aocl") .set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); + * rv = BuildAOCL(args[0]); }); +// TVM_REGISTER_API("codegen.build_opencl") +// .set_body([]( TVMArgs args, TVMRetValue * rv ) { +// * rv = BuildOpenCL(args[0]); +// }); // template mode for opencl @@ -330,4 +330,4 @@ TVM_REGISTER_API("codegen.build_opencl") // * rv = BuildOpenCL(args[0]); // }); } // namespace codegen -} // namespace TVM \ No newline at end of file +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index ec321cc17..0e19c0040 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -420,6 +420,70 @@ void CodeGenAOCL::VisitStmt_(const IfThenElse* op) { stream << "}\n"; } +void CodeGenAOCL::GenForStmt(const For* op, std::string pragma, bool before) { + std::string extent = PrintExpr(op->extent); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + if (before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + PrintIndent(); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + if (!before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeGenAOCL::VisitStmt_(const For* op) { + std::ostringstream os; + if (op->for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + os << "#pragma unroll"; + if (unroll_factor > 0) os << " " << unroll_factor << "\n"; + else os << "\n"; + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + os << "#pragma"; + os << " ii " << II << "\n"; + } + CodeGenAOCL::GenForStmt(op, os.str(), true); +} + + } // namespace codegen diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 6426c13f2..05e268833 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -11,14 +11,13 @@ # include # include "./codeanalys_openclc.h" # include "../codegen_c.h" -# include "./codegen_opencl.h" namespace TVM { namespace codegen { -class CodeGenAOCL final : public CodeGenOpenCL { +class CodeGenAOCL : public CodeGenC { public: CodeGenAOCL(); // void AddFunction(LoweredFunc f); @@ -48,6 +47,8 @@ class CodeGenAOCL final : public CodeGenOpenCL { void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) void VisitStmt_(const LetStmt* op) override; // NOLINT(*) + void GenForStmt(const For* op, std::string pragma, bool before); + void VisitStmt_(const For* op) override; private: diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc deleted file mode 100644 index 0ac068121..000000000 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ /dev/null @@ -1,451 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_opencl.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -CodeGenOpenCL::CodeGenOpenCL() { - restrict_keyword_ = "restrict"; -} - -void CodeGenOpenCL::InitFuncState(LoweredFunc f) { - CodeGenC::InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } -} - - -// void CodeGenOpenCL::AddFunction(LoweredFunc f) { -// this->stream << "__kernel "; -// CodeGenC::AddFunction(f); -// } - -// void CodeGenOpenCL::AddFunction(LoweredFunc f) { - // this->stream << "# pragma once\n"; - // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n\n"; - // this->stream << "__kernel "; - -// CodeGenC::AddFunction(f); -// } - -void CodeGenOpenCL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Write head files - // stream.open("host.cpp"); - // this->stream << "# pragma once\n"; - // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n\n"; - - // Write entry function name - // this->stream << "__kernel " << f->name << "("; - // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; - // this->stream << f->name << "("; - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - - - - -// void CodeGenOpenCL::AddFunction(LoweredFunc f, -// str2tupleMap map_arg_type) { -// // Don't Write header flies -// // Clear previous generated state -// this->InitFuncState(f); -// // Register alloc buffer type -// for ( const auto & kv : f->handle_data_type ) { -// this->stream << kv.first.get(); -// this->stream << kv.second.type(); -// RegisterHandleType(kv.first.get(), kv.second.type()); -// } -// // Write entry function name -// this->stream << "__kernel "; -// // Write arguments -// for ( size_t i = 0; i < f->args.size(); i++ ) { -// Var v = f->args[i]; -// std::string vid = AllocVarID(v.get()); -// if ( i!= 0 ) { -// this->stream << ", "; -// } -// if ( map_arg_type.find(vid) == map_arg_type.end()) { -// LOG(WARNING) << vid << " type not found\n"; -// PrintType(v.type(), this->stream); -// this->stream << ' ' << vid; -// } -// else { -// auto arg = map_arg_type[vid]; -// PrintType(std::get<1>(arg), this->stream); -// if (v.type().is_handle()) { -// this->stream << "*"; -// } -// this->stream << ' ' << std::get<0>(arg); - -// } -// stream << ") {\n"; -// int func_scope = this->BeginScope(); -// this->PrintStmt(f->body); -// this->EndScope(func_scope); -// this->PrintIndent(); -// this->stream << "}\n\n"; -// } -// CodeGenOpenCL::AddFunction(f, map_arg_type); -// } - -std::string CodeGenOpenCL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - -void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); - if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - os << "void*"; return; - } - if ( t== Bool() ) { - os << "bool"; return; - } - bool fail = false; - if (t.is_float()) { - switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: - os << "float"; - break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << 'u'; - } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; - } - switch (t.bits()) { - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 1: os << "int"; break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } - LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; -} - -void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenOpenCL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenOpenCL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenOpenCL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - -// void CodeGenOpenCL::PrintStorageScope( -// const std::string& scope, std::ostream& os) { // NOLINT(*) -// if (scope == "global") { -// os << "__global "; -// } else if (scope == "shared") { -// os << "__local "; -// } -// } - -void CodeGenOpenCL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - os << "global "; - } else if (scope == "shared") { - os << "local "; - } -} - - -std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} - -void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) - if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintType(op->args[2].type(), os); - os << ")"; - } - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - - -void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) - if (std::isinf(op->value)) { - if ( op->value < 0) { - os << "-"; - } - os << "INFINITY"; - } else if (std::isnan(op->value)) { - os << "NAN"; - } else { - CodeGenC::VisitExpr_(op, os); - } -} - -void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) - os << "("; - PrintType(op->true_value.type(), os); - os << ")"; - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { - std::string extent = PrintExpr(op->extent); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - if (before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - PrintIndent(); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - if (!before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h deleted file mode 100644 index bf2c298d3..000000000 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ -#define TVM_CODEGEN_CODEGEN_OPENCL_H_ - -# include -# include -# include -# include "./codeanalys_openclc.h" -# include "../codegen_c.h" - -namespace TVM { -namespace codegen { - - -class CodeGenOpenCL : public CodeGenC { - public: - CodeGenOpenCL(); - // void AddFunction(LoweredFunc f); - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - std::string Finish(); - - void InitFuncState(LoweredFunc f) override; - void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void PrintStorageSync(const Call* op) override; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) override; // NOLINT(*) - void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) override; //NOLINT(*) - void PrintVecAddr(const Variable * buffer, Type t, - Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - - //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) - void VisitStmt_(const LetStmt* op) override; // NOLINT(*) - // void GenForStmt(const For* op, std::string pragma, bool before); - - - - private: - // whether enable fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; - -}; -} // namespace codegen -} // namespace TVM - -#endif // TVM_CODEGEN_CODEGEN_OPENCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 47d66f68f..2a888ab98 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -420,5 +420,81 @@ void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { stream << "}\n"; } + +void CodeGenSDACCEL::GenForStmt(const For* op, std::string pragma, bool before) { + std::string extent = PrintExpr(op->extent); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + if (before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + PrintIndent(); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + if (!before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeGenSDACCEL::VisitStmt_(const For* op) { + std::ostringstream os; + if (op->for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + // os << "#pragma unroll"; + os << "__attribute__((opencl_unroll_hint("; + if (unroll_factor > 0) os << unroll_factor << ")))\n"; + else + os << "\n"; + + // if (unroll_factor > 0) os << " " << unroll_factor << "\n"; + // else os << "\n"; + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + // os << "#pragma"; + // os << " ii " << II << "\n"; + os << "__attribute__((xcl_pipeline_loop("; + os << II << ")))\n"; + } + CodeGenSDACCEL::GenForStmt(op, os.str(), true); +} + + + + + + } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index ff3d76662..a9c21a3d9 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -11,13 +11,12 @@ # include # include "./codeanalys_openclc.h" # include "../codegen_c.h" -# include "./codegen_opencl.h" namespace TVM { namespace codegen { -class CodeGenSDACCEL final : public CodeGenOpenCL { +class CodeGenSDACCEL : public CodeGenC { public: CodeGenSDACCEL(); // void AddFunction(LoweredFunc f); @@ -47,6 +46,9 @@ class CodeGenSDACCEL final : public CodeGenOpenCL { void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) void VisitStmt_(const LetStmt* op) override; // NOLINT(*) + void GenForStmt(const For* op, std::string pragma, bool before); + void VisitStmt_(const For* op) override; + private: // whether enable fp16 and fp64 extension From 4fba56ca1607a71f43f10d40dc634b8beeb84d37 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 21 Aug 2019 17:04:07 -0400 Subject: [PATCH 044/103] new --- tests/test_codegen_opencl.py | 59 +++++++++- tvm/src/codegen/opencl/codegen_aocl.cc | 125 ++++++++++++++-------- tvm/src/codegen/opencl/codegen_sdaccel.cc | 105 ++++++++++++++++-- tvm/src/codegen/opencl/codegen_sdaccel.h | 2 + 4 files changed, 238 insertions(+), 53 deletions(-) diff --git a/tests/test_codegen_opencl.py b/tests/test_codegen_opencl.py index 521e28e38..eeeb57b10 100644 --- a/tests/test_codegen_opencl.py +++ b/tests/test_codegen_opencl.py @@ -1,10 +1,12 @@ import heterocl as hcl + def test_pragma(): - hcl.init(hcl.Float()) + hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) + # unroll s1 = hcl.create_schedule([A, B, C]) s1[C].unroll(C.axis[1], factor=4) code1 = hcl.build(s1, target='aocl') @@ -22,8 +24,61 @@ def test_pragma(): assert "#pragma ii 2" in code2 print (code22) assert "__attribute__((xcl_pipeline_loop(2)))" in code22 + # partition + s3 = hcl.create_schedule([A, B, C]) + s3.partition(A, hcl.Partition.Block, dim=2, factor=2) + code3 = hcl.build(s3, target='sdaccel') + print (code3) + assert "__attribute__((xcl_array_partition(block,2,2)))" in code3 + + + + + +def test_binary_conv(): + hcl.init() + A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A") + B = hcl.placeholder((64, 32, 3, 3), dtype=hcl.UInt(1), name="B") + rc = hcl.reduce_axis(0, 32) + ry = hcl.reduce_axis(0, 3) + rx = hcl.reduce_axis(0, 3) + C = hcl.compute((1, 64, 12, 12), + lambda nn, ff, yy, xx: hcl.sum( + A[nn, rc, yy + ry, xx + rx] * B[ff, rc, ry, rx], axis=[rc, ry, rx]), + dtype=hcl.UInt(8), name="C") + s = hcl.create_schedule([A, B, C]) + s[C].split(C.axis[1], factor=5) + code = hcl.build(s, target='aocl') + print (code) + assert "for (ap_int<32>intd_t ff_outer = 0; ff_outer < 13; ++ff_outer)" in code + assert "for (ap_int<32>intd_t ff_inner = 0; ff_inner < 5; ++ff_inner)" in code + assert "if (ff_inner < (64 - (ff_outer * 5)))" in code + + +# def test_partition(): +# # hcl.init(hcl.Float()) +# # A = hcl.placeholder((10, 10), "A") +# # def kernel(A): +# # return hcl.compute((8, 8), lambda y, x: A[y][x] + A[y+2][x+2], "B") +# # s = hcl.create_schedule(A, kernel) +# # s[kernel.B].pipeline(kernel.B.axis[1]) +# # f = hcl.build(s, target='sdaccel') +# # print (f) +# hcl.init(hcl.Float()) +# A = hcl.placeholder((10, 10), "A") +# def kernel(A): +# return hcl.compute((8, 8), lambda y, x: A[y][x] + A[y+2][x+2], "B") +# s = hcl.create_scheme(A, kernel) +# s.partition(A) +# s[kernel.B].pipeline(kernel.B.axis[1]) +# f = hcl.build(s, target='sdaccel') +# print (f) + + if __name__ == '__main__': - test_pragma() \ No newline at end of file + test_pragma() + test_binary_conv() + diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 0e19c0040..9011b2739 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -81,6 +81,7 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, // this->stream << "__kernel " << f->name << "("; // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; // this->stream << f->name << "("; + this->stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" << "\n"; this->stream << "__kernel " << "void " << f->name << "("; // Write arguments @@ -201,62 +202,102 @@ void CodeGenAOCL::BindThreadIndex(const IterVar& iv) { CastFromTo(os.str(), UInt(64), iv->var.type()); } +// void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) +// int lanes = t.lanes(); +// if (t.is_handle()) { +// CHECK_EQ(lanes, 1) +// << "do not yet support vector types"; +// os << "void*"; return; +// } +// if ( t== Bool() ) { +// os << "bool"; return; +// } +// bool fail = false; +// if (t.is_float()) { +// switch (t.bits()) { +// case 16: +// os << "half"; +// enable_fp16_ = true; +// break; +// case 32: +// os << "float"; +// break; +// case 64: +// os << "double"; +// enable_fp64_ = true; +// break; +// default: +// fail = true; +// break; +// } +// if (!fail && lanes == 1) return; +// if (!fail && (lanes >= 2 && lanes <= 16)) { +// os << lanes; return; +// } +// } else if (t.is_uint() || t.is_int()) { +// if (t.is_uint()) { +// os << 'u'; +// } +// if (t.bits() == 8 && t.lanes() == 4) { +// // directly 4 8 bit int in integer. +// os << "int"; return; +// } +// switch (t.bits()) { +// case 8: os << "char"; break; +// case 16: os << "short"; break; +// case 32: os << "int"; break; +// case 64: os << "long"; break; +// case 1: os << "int"; break; +// default: fail = true; break; +// } +// if (!fail && lanes == 1) return; +// // if (!fail && (lanes >= 2 && lanes <= 16)) { +// // os << lanes; return; +// // } +// } +// } + + void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; os << "void*"; return; } - if ( t== Bool() ) { - os << "bool"; return; - } - bool fail = false; if (t.is_float()) { - switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: - os << "float"; - break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; + if (t.bits() == 16) { + enable_fp16_ = true; + os << "half"; return; } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << 'u'; + if (t.bits() == 32) { + os << "float"; return; } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; + if (t.bits() == 64) { + enable_fp64_ = true; + os << "double"; return; } + } else if (t.is_uint()) { switch (t.bits()) { - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 1: os << "int"; break; - default: fail = true; break; + case 8: case 16: case 32: case 64: { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; + // os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_int<" << t.bits() << ">" << "intd_t"; return; + // os << "int" << t.bits() << "_t"; return; + + } } } - LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; } + + + void CodeGenAOCL::PrintVecAddr(const Variable* buffer, Type t, Expr base, std::ostream& os) { // NOLINT(*) if (!HandleTypeMatch(buffer, t.element_of())) { diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 2a888ab98..3a3fedc90 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -250,13 +250,54 @@ void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) default: fail = true; break; } if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } + // if (!fail && (lanes >= 2 && lanes <= 16)) { + // os << lanes; return; + // } } - LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; + // LOG(FATAL) << "Cannot convert type " << t << " to OpenCL C type"; } + +// void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) +// CHECK_EQ(t.lanes(), 1) +// << "do not yet support vector types"; +// if (t.is_handle()) { +// os << "void*"; return; +// } +// if (t.is_float()) { +// if (t.bits() == 16) { +// enable_fp16_ = true; +// os << "half"; return; +// } +// if (t.bits() == 32) { +// os << "float"; return; +// } +// if (t.bits() == 64) { +// enable_fp64_ = true; +// os << "double"; return; +// } +// } else if (t.is_uint() || t.is_int()) { +// if (t.is_uint()) { +// os << 'u'; +// } +// if (t.bits() == 8 && t.lanes() == 4) { +// os << "int"; return; +// } +// switch (t.bits()) { +// case 8: os << "char"; break; +// case 16: os << "short"; break; +// case 32: os << "int"; break; +// case 64: os << "long"; break; +// case 1: os << "int"; break; +// } +// } + +// os << t; +// } + + + + void CodeGenSDACCEL::PrintVecAddr(const Variable* buffer, Type t, Expr base, std::ostream& os) { // NOLINT(*) if (!HandleTypeMatch(buffer, t.element_of())) { @@ -460,14 +501,11 @@ void CodeGenSDACCEL::VisitStmt_(const For* op) { } i++; } - // os << "#pragma unroll"; os << "__attribute__((opencl_unroll_hint("; if (unroll_factor > 0) os << unroll_factor << ")))\n"; else os << "\n"; - // if (unroll_factor > 0) os << " " << unroll_factor << "\n"; - // else os << "\n"; } else if (op->for_type == ForType::Pipelined) { int II = 1, i = 0; @@ -483,17 +521,66 @@ void CodeGenSDACCEL::VisitStmt_(const For* op) { } i++; } - // os << "#pragma"; - // os << " ii " << II << "\n"; os << "__attribute__((xcl_pipeline_loop("; os << II << ")))\n"; } CodeGenSDACCEL::GenForStmt(op, os.str(), true); } +// void CodeGenSDACCEL::VisitStmt_(const Partition* op) { +// std::string vid = GetVarID(op->buffer_var.get()); +// stream << vid << " "; +// stream << "__attribute__((xcl_array_partition("; +// switch (op->partition_type) { +// case PartitionType::Complete: +// stream << "complete,"; +// break; +// case PartitionType::Block: +// stream << "block,"; +// break; +// case PartitionType::Cyclic: +// stream << "cyclic,"; +// break; +// } +// if (op->partition_type != PartitionType::Complete) { +// stream << op->factor << ","; +// // stream << " factor=" << op->factor; +// } +// stream << op->dim << ")))"; +// stream << "\n"; +// } +void CodeGenSDACCEL::VisitStmt_(const Partition* op) { + std::string vid = GetVarID(op->buffer_var.get()); + stream << vid << " "; + if (op->partition_type != PartitionType::Complete) { + stream << "__attribute__((xcl_array_partition("; + switch (op->partition_type) { + // case PartitionType::Complete: + // stream << "complete,"; + // break; + case PartitionType::Block: + stream << "block,"; + break; + case PartitionType::Cyclic: + stream << "cyclic,"; + break; + } + stream << op->factor << ","; + stream << op->dim << ")))\n"; + }else { + if (op->dim == 0) { + stream << "__attribute__((xcl_array_partition))\n"; + } else { + stream << "__attribute__((xcl_array_partition("; + stream << "complete,"; + stream << op->factor << ","; + stream << op->dim << ")))\n"; + } + } +} } // namespace codegen diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index a9c21a3d9..a1f141501 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -48,6 +48,8 @@ class CodeGenSDACCEL : public CodeGenC { void GenForStmt(const For* op, std::string pragma, bool before); void VisitStmt_(const For* op) override; + void VisitStmt_(const Partition* op) override; + private: From ccd58fd95e25f8d027d89075620ecbaa3abc0d6a Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 22 Aug 2019 16:33:17 -0400 Subject: [PATCH 045/103] type has fixed --- ...codegen_opencl.py => test_codegen_aocl.py} | 14 +-- tests/test_codegen_sdaccel.py | 35 +++++++ tvm/src/codegen/opencl/codegen_aocl.cc | 91 ++++++++++++++----- tvm/src/codegen/opencl/codegen_sdaccel.cc | 8 +- 4 files changed, 108 insertions(+), 40 deletions(-) rename tests/{test_codegen_opencl.py => test_codegen_aocl.py} (81%) create mode 100644 tests/test_codegen_sdaccel.py diff --git a/tests/test_codegen_opencl.py b/tests/test_codegen_aocl.py similarity index 81% rename from tests/test_codegen_opencl.py rename to tests/test_codegen_aocl.py index eeeb57b10..27c58bb0f 100644 --- a/tests/test_codegen_opencl.py +++ b/tests/test_codegen_aocl.py @@ -6,30 +6,20 @@ def test_pragma(): A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) + # unroll s1 = hcl.create_schedule([A, B, C]) s1[C].unroll(C.axis[1], factor=4) code1 = hcl.build(s1, target='aocl') - code11 = hcl.build(s1, target='sdaccel') print (code1) assert "#pragma unroll 4" in code1 - print (code11) - assert "__attribute__((opencl_unroll_hint(4)))" in code11 + # pipeline s2 = hcl.create_schedule([A, B, C]) s2[C].pipeline(C.axis[0], initiation_interval=2) code2 = hcl.build(s2, target='aocl') - code22 = hcl.build(s2, target='sdaccel') print (code2) assert "#pragma ii 2" in code2 - print (code22) - assert "__attribute__((xcl_pipeline_loop(2)))" in code22 - # partition - s3 = hcl.create_schedule([A, B, C]) - s3.partition(A, hcl.Partition.Block, dim=2, factor=2) - code3 = hcl.build(s3, target='sdaccel') - print (code3) - assert "__attribute__((xcl_array_partition(block,2,2)))" in code3 diff --git a/tests/test_codegen_sdaccel.py b/tests/test_codegen_sdaccel.py new file mode 100644 index 000000000..f0f7f8394 --- /dev/null +++ b/tests/test_codegen_sdaccel.py @@ -0,0 +1,35 @@ +import heterocl as hcl + + + + + +def test_pragma(): + hcl.init(hcl.Float()) + A = hcl.placeholder((10, 32), "A") + B = hcl.placeholder((10, 32)) + C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) + + # unroll + s1 = hcl.create_schedule([A, B, C]) + s1[C].unroll(C.axis[1], factor=6) + code1 = hcl.build(s1, target='sdaccel') + print (code1) + assert "__attribute__((opencl_unroll_hint(6)))" in code1 + + # pipeline + s2 = hcl.create_schedule([A, B, C]) + s2[C].pipeline(C.axis[0], initiation_interval=2) + code2 = hcl.build(s2, target='sdaccel') + print (code2) + assert "__attribute__((xcl_pipeline_loop(2)))" in code2 + + # partition + s3 = hcl.create_schedule([A, B, C]) + s3.partition(A, hcl.Partition.Block, dim=2, factor=2) + code3 = hcl.build(s3, target='sdaccel') + print (code3) + assert "__attribute__((xcl_array_partition(block,2,2)))" in code3 + +if __name__ == "__main__": + test_pragma() \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 9011b2739..2da5372eb 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -264,35 +264,78 @@ void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) if (t.is_handle()) { os << "void*"; return; } - if (t.is_float()) { - if (t.bits() == 16) { - enable_fp16_ = true; - os << "half"; return; - } - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - enable_fp64_ = true; - os << "double"; return; + + if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; - // os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; + else if ( t.is_int()) { + os << "ap_int<" << t.bits() << ">" << "intd_t"; } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_int<" << t.bits() << ">" << "intd_t"; return; - // os << "int" << t.bits() << "_t"; return; - + else { + if (t.is_float()) { + if (t.bits() == 16) { + enable_fp16_ = true; + os << "half"; return; + } + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + enable_fp64_ = true; + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; + // os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_int<" << t.bits() << ">" << "intd_t"; return; + // os << "int" << t.bits() << "_t"; return; + } + } } } } + + + // if (t.is_float()) { + // if (t.bits() == 16) { + // enable_fp16_ = true; + // os << "half"; return; + // } + // if (t.bits() == 32) { + // os << "float"; return; + // } + // if (t.bits() == 64) { + // enable_fp64_ = true; + // os << "double"; return; + // } + // } else if (t.is_uint()) { + // switch (t.bits()) { + // case 8: case 16: case 32: case 64: { + // os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; + // // os << "uint" << t.bits() << "_t"; return; + // } + // case 1: os << "int"; return; + // } + // } else if (t.is_int()) { + // switch (t.bits()) { + // case 8: case 16: case 32: case 64: { + // os << "ap_int<" << t.bits() << ">" << "intd_t"; return; + // // os << "int" << t.bits() << "_t"; return; + + // } + // } + // } + // LOG(FATAL) << "Cannot convert type " << t << " to AOCL type"; + } diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 3a3fedc90..95928cbe5 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -250,11 +250,11 @@ void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) default: fail = true; break; } if (!fail && lanes == 1) return; - // if (!fail && (lanes >= 2 && lanes <= 16)) { - // os << lanes; return; - // } + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } } - // LOG(FATAL) << "Cannot convert type " << t << " to OpenCL C type"; + LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; } From 1832760f76fe3185cb400145ac832e8186616eb7 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 23 Aug 2019 13:12:29 -0400 Subject: [PATCH 046/103] new_test --- tests/test_codegen_aocl.py | 16 +++++++++++++++- tests/test_codegen_ihls.py | 1 + tests/test_codegen_sdaccel.py | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/test_codegen_aocl.py b/tests/test_codegen_aocl.py index 27c58bb0f..4f15ec511 100644 --- a/tests/test_codegen_aocl.py +++ b/tests/test_codegen_aocl.py @@ -1,6 +1,20 @@ import heterocl as hcl +def test_ap_int(): + hcl.init(); + A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) + B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) + C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) + s = hcl.create_schedule([A, B, C]) + code = hcl.build(s, target='aocl') + print (code) + assert "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" in code + assert "ap_int<3>intd_t" in code + assert "ap_uint<3>uintd_t" in code + assert "ap_int<8>intd_t" in code + + def test_pragma(): hcl.init() A = hcl.placeholder((10, 32), "A") @@ -24,7 +38,6 @@ def test_pragma(): - def test_binary_conv(): hcl.init() A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A") @@ -69,6 +82,7 @@ def test_binary_conv(): if __name__ == '__main__': + test_ap_int() test_pragma() test_binary_conv() diff --git a/tests/test_codegen_ihls.py b/tests/test_codegen_ihls.py index fc5a7e53b..1b53f18ca 100644 --- a/tests/test_codegen_ihls.py +++ b/tests/test_codegen_ihls.py @@ -65,3 +65,4 @@ def kernel(A): s = hcl.create_schedule([A], kernel) code = hcl.build(s, target="ihls") assert "A[0].slc<4>(1)" in code + diff --git a/tests/test_codegen_sdaccel.py b/tests/test_codegen_sdaccel.py index f0f7f8394..43d94f238 100644 --- a/tests/test_codegen_sdaccel.py +++ b/tests/test_codegen_sdaccel.py @@ -30,6 +30,7 @@ def test_pragma(): code3 = hcl.build(s3, target='sdaccel') print (code3) assert "__attribute__((xcl_array_partition(block,2,2)))" in code3 + if __name__ == "__main__": test_pragma() \ No newline at end of file From c3d8f3c209e0cff6079aab6a7f9902312e494a7a Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 23 Aug 2019 15:10:16 -0400 Subject: [PATCH 047/103] test_reorder_split_fuse --- tests/test_codegen_aocl.py | 62 +++++++++++++++++--------- tvm/src/codegen/opencl/codegen_aocl.cc | 2 - tvm/src/codegen/opencl/codegen_aocl.h | 1 + 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/tests/test_codegen_aocl.py b/tests/test_codegen_aocl.py index 4f15ec511..28013f924 100644 --- a/tests/test_codegen_aocl.py +++ b/tests/test_codegen_aocl.py @@ -36,6 +36,45 @@ def test_pragma(): assert "#pragma ii 2" in code2 +def test_reorder(): + hcl.init() + A = hcl.placeholder((10, 100), "A") + + def two_stage(A): + B = hcl.compute(A.shape, lambda x, y : A[x, y] + 1, "B") + C = hcl.compute(A.shape, lambda x, y : B[x, y] + 1, "C") + return C + + s = hcl.create_schedule([A], two_stage) + s_B = two_stage.B + code = hcl.build(s, target='aocl') + print (code) + s[s_B].reorder(s_B.axis[1], s_B.axis[0]) + code2 = hcl.build(s, target='aocl') + print (code2) + + + +def test_split_fuse(): + hcl.init() + A = hcl.placeholder((10, 100), "A") + + def two_stage(A): + B = hcl.compute(A.shape, lambda x, y : A[x, y] + 1, "B") + C = hcl.compute(A.shape, lambda x, y : B[x, y] + 1, 'C') + return C + + s = hcl.create_schedule([A], two_stage) + s_B = two_stage.B + x_out, x_in = s[s_B].split(s_B.axis[0], 5) + code = hcl.build(s, target='aocl') + print (code) + s2 = hcl.create_schedule([A], two_stage) + s2_B = two_stage.B + x_y = s[s_B].fuse(s2_B.axis[0], s2_B.axis[1]) + code2 = hcl.build(s2, target='aocl') + print (code2) + def test_binary_conv(): @@ -58,31 +97,14 @@ def test_binary_conv(): assert "if (ff_inner < (64 - (ff_outer * 5)))" in code -# def test_partition(): -# # hcl.init(hcl.Float()) -# # A = hcl.placeholder((10, 10), "A") -# # def kernel(A): -# # return hcl.compute((8, 8), lambda y, x: A[y][x] + A[y+2][x+2], "B") -# # s = hcl.create_schedule(A, kernel) -# # s[kernel.B].pipeline(kernel.B.axis[1]) -# # f = hcl.build(s, target='sdaccel') -# # print (f) -# hcl.init(hcl.Float()) -# A = hcl.placeholder((10, 10), "A") -# def kernel(A): -# return hcl.compute((8, 8), lambda y, x: A[y][x] + A[y+2][x+2], "B") -# s = hcl.create_scheme(A, kernel) -# s.partition(A) -# s[kernel.B].pipeline(kernel.B.axis[1]) -# f = hcl.build(s, target='sdaccel') -# print (f) - - if __name__ == '__main__': test_ap_int() test_pragma() + test_set_bit() test_binary_conv() + test_reorder() + test_split_fuse() diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 2da5372eb..8cfeb218c 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -568,7 +568,5 @@ void CodeGenAOCL::VisitStmt_(const For* op) { } - - } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 05e268833..a265e1653 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -44,6 +44,7 @@ class CodeGenAOCL : public CodeGenC { void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) void VisitStmt_(const LetStmt* op) override; // NOLINT(*) From ad27bccd7802d8c1f60a8a4a906800805fe4fd5e Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 23 Aug 2019 15:17:21 -0400 Subject: [PATCH 048/103] target --- python/heterocl/tvm/target.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 17b483858..ab8b92f79 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -17,7 +17,8 @@ if _LIB_NAME != "libhcl_runtime.so": raise err_msg -FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', 'opencl', 'sdaccel', 'sdaccel_sw_emu', 'aocl', 'aocl_sw_emu'] +FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', + 'opencl', 'sdaccel', 'sdaccel_sw_emu', 'aocl', 'aocl_sw_emu'] def _merge_opts(opts, new_opts): """Helper function to merge options""" From 337db9ac7b08d72e416725fdcd82c35aff758557 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 23 Aug 2019 15:25:26 -0400 Subject: [PATCH 049/103] order --- tests/test_codegen_aocl.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_codegen_aocl.py b/tests/test_codegen_aocl.py index 28013f924..8359e9a08 100644 --- a/tests/test_codegen_aocl.py +++ b/tests/test_codegen_aocl.py @@ -98,13 +98,11 @@ def test_binary_conv(): - - if __name__ == '__main__': test_ap_int() test_pragma() - test_set_bit() - test_binary_conv() test_reorder() test_split_fuse() + test_binary_conv() + From 3cb38bf19deb3cf7eb71875c8d62724a49868257 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 23 Aug 2019 15:27:20 -0400 Subject: [PATCH 050/103] simplified by rui --- tvm/src/codegen/opencl/aocl/aocl_module.cc | 0 tvm/src/codegen/opencl/aocl/aocl_module.h | 0 tvm/src/codegen/opencl/build_opencl.cc | 391 ++------ tvm/src/codegen/opencl/codeanalys_openclc.cc | 919 ------------------ tvm/src/codegen/opencl/codeanalys_openclc.h | 202 ---- tvm/src/codegen/opencl/codegen_aocl.cc | 718 +++----------- tvm/src/codegen/opencl/codegen_aocl.h | 96 +- tvm/src/codegen/opencl/codegen_opencl.cc | 246 +++++ tvm/src/codegen/opencl/codegen_opencl.h | 53 + tvm/src/codegen/opencl/codegen_sdaccel.cc | 783 ++++----------- tvm/src/codegen/opencl/codegen_sdaccel.h | 98 +- .../codegen/opencl/sdaccel/sdaccel_module.cc | 336 ------- .../codegen/opencl/sdaccel/sdaccel_module.h | 30 - 13 files changed, 767 insertions(+), 3105 deletions(-) delete mode 100644 tvm/src/codegen/opencl/aocl/aocl_module.cc delete mode 100644 tvm/src/codegen/opencl/aocl/aocl_module.h mode change 100644 => 100755 tvm/src/codegen/opencl/build_opencl.cc delete mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.cc delete mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.h mode change 100644 => 100755 tvm/src/codegen/opencl/codegen_aocl.cc mode change 100644 => 100755 tvm/src/codegen/opencl/codegen_aocl.h create mode 100755 tvm/src/codegen/opencl/codegen_opencl.cc create mode 100755 tvm/src/codegen/opencl/codegen_opencl.h mode change 100644 => 100755 tvm/src/codegen/opencl/codegen_sdaccel.cc mode change 100644 => 100755 tvm/src/codegen/opencl/codegen_sdaccel.h delete mode 100644 tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc delete mode 100644 tvm/src/codegen/opencl/sdaccel/sdaccel_module.h diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.cc b/tvm/src/codegen/opencl/aocl/aocl_module.cc deleted file mode 100644 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.h b/tvm/src/codegen/opencl/aocl/aocl_module.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc old mode 100644 new mode 100755 index 5556a2938..b964176c5 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -1,333 +1,58 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -# include -# include -# include -# include -# include -# include "../../runtime/meta_data.h" -# include -# include "./codegen_sdaccel.h" -# include "./codegen_aocl.h" -# include "./codeanalys_openclc.h" -# include "../build_common.h" -// # include "./sdaccel/sdaccel_module.h" -// # include "./aocl/aocl_module.h" - - -namespace TVM { -namespace codegen { - -// #if OPENCL_SDACCEL_RUNTIME -// runtime::Module BuildSDAccelSwEmu(Array funcs) { -// CodeAnalysOpenCLC ca; -// CodeGenSDACCEL cg; -// for (LoweredFunc f : funcs) { -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); -// cg.AddFunction(f, map_arg_type); -// } -// std::string code = cg.Finish(); - -// return runtime::CreateSDAccelModule(funcs[0], code); -// } - -// TVM_REGISTER_API("codegen.build_sdaccel_sw_emu") -// .set_body([](TVMArgs args, TVMRetValue* rv) { -// *rv = BuildSDAccelSwEmu(args[0]); -// }); -// #endif - - - -// #if OPENCL_AOCL_RUNTIME - -// #endif - -// std::string BuildOpenCL(Array funcs) { -// using TVM::runtime::Registry; - -// CodeAnalysOpenCLC ca; -// CodeGenSDACCEL cg; -// for (LoweredFunc f : funcs) { -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); - -// cg.AddFunction(f, map_arg_type); -// } -// std::string code = cg.Finish(); - -// if (const auto*f = Registry::Get("tvm_callback_opencl_postproc")) { -// code = (*f)(code).operator std::string(); -// } -// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; -// return code; -// } - -// std::string BuildOpenCL(Array funcs) { -// using TVM::runtime::Registry; -// bool output_ssa = false; -// CodeGenSDACCEL cg; -// cg.Init(output_ssa); - -// for (LoweredFunc f : funcs) { -// cg.AddFunction(f); -// } -// std::string code = cg.Finish(); - -// if (const auto*f = Registry::Get("tvm_callback_opencl_postproc")) { -// code = (*f)(code).operator std::string(); -// } -// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; -// return code; -// } - - - - - -// codegen for AOCL -// std::string BuildAOCL(Array funcs) { -// using TVM::runtime::Registry; -// bool output_ssa = false; -// CodeGenAOCL cg; -// cg.Init(output_ssa); -// for ( LoweredFunc f : funcs ) { -// cg.AddFunction(f); -// } -// std::string code = cg.Finish(); - -// if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { -// code = (*f)(code).operator std::string(); -// } -// LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; -// return code; -// } - - -// codegen for AOCL -// std::string BuildAOCL(Array funcs) { -// using TVM::runtime::Registry; -// bool output_ssa = false; -// CodeGenAOCL cg; -// cg.Init(output_ssa); -// for ( LoweredFunc f : funcs ) { -// cg.AddFunction(f); -// } -// std::string code = cg.Finish(); - -// if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { -// code = (*f)(code).operator std::string(); -// } -// LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; -// return code; -// } - - -// codegen for SDACCEL -// std::string BuildSDACCEL(Array funcs) { -// using TVM::runtime::Registry; -// bool output_ssa = false; -// CodeGenSDACCEL cg; -// cg.Init(output_ssa); -// for (LoweredFunc f : funcs) { -// cg.AddFunction(f); -// } -// std::string code = cg.Finish(); - -// // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { -// // code = (*f)(code).operator std::string(); -// // } -// LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; -// return code; -// } - - -// codegen for SDACCEL_WITH_ANALYSIS xxx -std::string BuildSDACCEL(Array funcs) { - using TVM::runtime::Registry; - CodeAnalysOpenCLC ca; - CodeGenSDACCEL cg; - for (LoweredFunc f : funcs) { - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); - - cg.AddFunction(f, map_arg_type); - - } - std::string code = cg.Finish(); - - if (const auto* f = Registry::Get("tvm_callback_sdaccel_postproc")) { - code = (*f)(code).operator std::string(); - } - - LOG(WARNING) << "SDaccel doesn't have runtime, return kernel code"; - return code; -} - - -//runtime::Module BuildSDACCELXCLBIN(Array funcs) - - - -// codegen for AOCL_WITH_ANALYSIS xxx -std::string BuildAOCL(Array funcs) { - using TVM::runtime::Registry; - CodeAnalysOpenCLC ca; - CodeGenAOCL cg; - for (LoweredFunc f : funcs) { - ca.AddFunction(f); - str2tupleMap map_arg_type; - map_arg_type = ca.Finish(); - - cg.AddFunction(f, map_arg_type); - - } - std::string code = cg.Finish(); - - if (const auto* f = Registry::Get("tvm_callback_aocl_postproc")) { - code = (*f)(code).operator std::string(); - } - - LOG(WARNING) << "AOCL doesn't have runtime, return kernel code"; - return code; -} - - - -// codegen for OPENCL_WITH_ANALYSIS xxx -// std::string BuildOpenCL(Array funcs) { -// using TVM::runtime::Registry; -// CodeAnalysOpenCLC ca; -// CodeGenOpenCL cg; -// for (LoweredFunc f : funcs) { -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); - -// cg.AddFunction(f, map_arg_type); - -// } -// std::string code = cg.Finish(); - -// if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { -// code = (*f)(code).operator std::string(); -// } - -// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; -// return code; -// } - - - - - -// codegen for OpenCL -// std::string BuildOpenCL(Array funcs) { -// using TVM::runtime::Registry; -// bool output_ssa = false; -// CodeGenOpenCL cg; -// cg.Init(output_ssa); -// for (LoweredFunc f : funcs) { -// cg.AddFunction(f); -// } -// std::string code = cg.Finish(); - -// LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; -// return code; -// } - - - -// codegen for SDACCEL -// template -// std::string BuildOpenCL(Array funcs) { -// CodeAnalysOpenCL ca; -// CodeGen cg; -// for (LoweredFunc f : funcs) { -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); -// cg.AddFunction(f, map_arg_type); -// } -// std::string code = cg.Finish(); - -// // if ( const auto * f = Registry::Get("tvm_callback_opencl_postproc")) { -// // code = (*f)(code).operator std::string(); -// // } -// LOG(WARNING) << "SDAccel doesn't have runtime, return kernel code"; -// // std::unordered_map -// // std::unordered_map temp = ExtractFuncInfo(funcs); -// return code; -// } - - - - -TVM_REGISTER_API("codegen.build_sdaccel") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildSDACCEL(args[0]); - }); - -TVM_REGISTER_API("codegen.build_aocl") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildAOCL(args[0]); - }); -// TVM_REGISTER_API("codegen.build_opencl") -// .set_body([]( TVMArgs args, TVMRetValue * rv ) { -// * rv = BuildOpenCL(args[0]); -// }); - - -// template mode for opencl -// template -// std::string BuildOpenCL(Array funcs) { -// CodeAnalysOpenCLC ca; -// CodeGen cg; -// for (LoweredFunc f : funcs) { -// // 1st pass: Analyze AST and collect necessary information -// ca.AddFunction(f); -// str2tupleMap map_arg_type; -// map_arg_type = ca.Finish(); -// // 2nd pass: Generate kernel code -// cg.AddFunction(f, map_arg_type); -// } -// std::string code = cg.Finish(); - -// LOG(WARNING) << "OpenCL C doesn't have runtime, return kernel code"; -// return code; -// } - -// TVM_REGISTER_API("codegen.build_sdaccel") -// .set_body([](TVMArgs args, TVMRetValue* rv) { -// *rv = BuildOpenCL(args[0]); -// }); -// TVM_REGISTER_API("codegen.build_aocl") -// .set_body([](TVMArgs args, TVMRetValue* rv) { -// *rv = BuildOpenCL(args[0]); -// }); - -// For runtime -// TVM_REGISTER_API("codegen.build_sdaccel_xclbin") -// .set_body([]( TVMArgs args, TVMRetValue * rv ) { -// * rv = BuildSDACCEL(args[0]); -// }); - - -// TVM_REGISTER_API("codegen.build_opencl") -// .set_body([]( TVMArgs args, TVMRetValue * rv ) { -// * rv = BuildOpenCL(args[0]); -// }); - -// TVM_REGISTER_API("codegen.build_aocl") -// .set_body([]( TVMArgs args, TVMRetValue * rv ) { -// * rv = BuildOpenCL(args[0]); -// }); -} // namespace codegen -} // namespace TVM +/* + Yang.Bai + yb269@cornell.edu +*/ + +# include +# include +# include +# include +# include +# include "../../runtime/meta_data.h" +# include +# include "./codegen_sdaccel.h" +# include "./codegen_aocl.h" +# include "./codeanalys_openclc.h" +# include "../build_common.h" +// # include "./sdaccel/sdaccel_module.h" +// # include "./aocl/aocl_module.h" + + +namespace TVM { +namespace codegen { + + +template +std::string BuildOpenCL(Array funcs){ + using TVM::runtime::Registry; + CodeAnalysOpenCLC ca; + CodeGen cg; + for(LoweredFunc f: funcs){ + ca.AddFunction(f); + str2tupleMapmap_arg_type; + map_arg_type = ca.Finish(); + + cg.AddFunction(f, map_arg_type); + } + std::string code = cg.Finish(); + + if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { + code = (*f)(code).operator std::string(); + } + + LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; + return code; +} + + + +TVM_REGISTER_API("codegen.build_sdaccel") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildOpenCL(args[0]); + }); + +TVM_REGISTER_API("codegen.build_aocl") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildOpenCL(args[0]); + }); + diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc deleted file mode 100644 index 030453a94..000000000 --- a/tvm/src/codegen/opencl/codeanalys_openclc.cc +++ /dev/null @@ -1,919 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file tvm/src/codegen/hlsc/codegen_hlsc.cc - */ -#include -#include -#include -#include "./codeanalys_openclc.h" -#include "../codegen_common.h" -#include "../../arithmetic/compute_expr.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -void CodeAnalysOpenCLC::Init() { - ; -} - -void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { - alloc_storage_scope_.clear(); - handle_data_type_.clear(); - map_arg_type_.clear(); - CodeGenSourceBase::ClearFuncState(); -} -void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { - // Clear previous generated state. - this->InitFuncState(f); - - // Add to alloc buffer type. - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Record the arguments for analyzing the type - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - this->map_arg_type_[vid]; - } - int func_scope = this->BeginScope(); - VisitStmt(f->body); - this->EndScope(func_scope); -} - -str2tupleMap CodeAnalysOpenCLC::Finish() { - return this->map_arg_type_; -} - -void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) - VisitExpr(n, os); -} - -void CodeAnalysOpenCLC::PrintSSAAssign( - const std::string& target, const std::string& src, Type t) { - PrintType(t, stream); - stream << ' ' << target << " = "; - if (src.length() > 3 && - src[0] == '(' && src[src.length() - 1] == ')') { - stream << src.substr(1, src.length() - 2); - } else { - stream << src; - } - stream << ";\n"; -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetBufferRef( - Type t, const Variable* buffer, Expr index) { - std::ostringstream os; - std::string vid = GetVarID(buffer); - std::string scope; - if (alloc_storage_scope_.count(buffer)) { - scope = alloc_storage_scope_.at(buffer); - } - bool is_vol = volatile_buf_.count(buffer) != 0; - if (t.lanes() == 1) { - if (!HandleTypeMatch(buffer, t) || is_vol) { - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)" << vid << ')'; - } else { - os << vid; - } - os << '['; - PrintExpr(index, os); - os << ']'; - } else { - // Buffer declared as vector type. - // optimize for case where it is in register, - if (HandleTypeMatch(buffer, t) && !is_vol) { - // optimize for constant access - int offset; - if (arith::GetConstInt(index, &offset)) { - CHECK_EQ(offset % t.lanes(), 0) - << "Find unaligned vector load to a vector type"; - os << vid << '[' << (offset / t.lanes()) << ']'; - return os.str(); - } - } - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)("; - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << vid << " + "; - PrintExpr(index, os); - os << "))[0]"; - } - return os.str(); -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind) { - if (kind < intrinsic::kArrKindBound_) { - std::ostringstream os; - os << "(((TVMArray*)"; - this->PrintExpr(buffer, os); - os << ")"; - if (kind == intrinsic::kArrAddr) { - os << " + "; - this->PrintExpr(index, os); - os << ")"; - return os.str(); - } - os << '['; - this->PrintExpr(index, os); - os << "]."; - // other case: get fields. - switch (kind) { - case intrinsic::kArrData: os << "data"; break; - case intrinsic::kArrShape: os << "shape"; break; - case intrinsic::kArrStrides: os << "strides"; break; - case intrinsic::kArrNDim: os << "ndim"; break; - case intrinsic::kArrTypeCode: os << "dtype.code"; break; - case intrinsic::kArrTypeBits: os << "dtype.bits"; break; - case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; - case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; - case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; - case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; - default: os << "unknown_field_code_" << kind; - } - os << ')'; - return os.str(); - } else { - CHECK_LT(kind, intrinsic::kTVMValueKindBound_); - std::ostringstream os; - os << "(((TVMValue*)"; - this->PrintExpr(buffer, os); - os << ")[" << index << "]."; - if (t.is_handle()) { - os << "v_handle"; - } else if (t.is_float()) { - os << "v_float64"; - } else if (t.is_int()) { - os << "v_int64"; - } else { - os << t; - } - os << ")"; - return os.str(); - } -} - - -bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) return false; - return it->second == t; -} - -void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) { - handle_data_type_[buf_var] = t; - } else { - CHECK(it->second == t) - << "conflicting buf var type"; - } -} - -void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, - Type t, int i, - std::ostream& os) { // NOLINT(*) - os << vec << ".s" << std::hex << i << std::dec; -} - -void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, - Type t, int i, - const std::string& value) { - this->PrintIndent(); - stream << vec << ".s" << std::hex << i - << " = " << value << ";\n" << std::dec; -} - -std::string CodeAnalysOpenCLC::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - return GetBufferRef(t, buffer, base); -} - -void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - std::string ref = GetBufferRef(t, buffer, base); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; -} - -std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - return os.str(); -} - -void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { - LOG(FATAL) << "not implemented"; -} - -void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) -} - -void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) - CHECK_EQ(scope, "global"); -} - -std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) - std::ostringstream os; - PrintType(t, os); - return os.str(); -} - -void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - if (t.is_float()) { - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "int" << t.bits() << "_t"; return; - } - } - } - os << t; -} - - -inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == Int(32)) { - std::ostringstream temp; - temp << op->value; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == UInt(32)) { - std::ostringstream temp; - temp << op->value << "U"; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - switch (op->type.bits()) { - case 64: case 32: { - std::ostringstream temp; - temp << std::scientific << op->value; - if (op->type.bits() == 32) temp << 'f'; - p->MarkConst(temp.str()); - os << temp.str(); - break; - } - case 16: { - os << '('; - p->PrintType(op->type, os); - os << ')' << std::scientific <value << 'f'; - break; - } - default: os << op << "\n"; - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) - os << "\"" << op->value << "\""; -} - -template -inline void PrintBinaryExpr(const T* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - if (isalpha(opstr[0])) { - os << opstr << '('; - p->PrintExpr(op->a, os); - os << ", "; - p->PrintExpr(op->b, os); - os << ')'; - } else { - os << '('; - p->PrintExpr(op->a, os); - os << ' ' << opstr << ' '; - p->PrintExpr(op->b, os); - os << ')'; - } - } else { - p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); - } -} - -inline void PrintBinaryIntrinsitc(const Call* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - CHECK_EQ(op->args.size(), 2U); - os << '('; - p->PrintExpr(op->args[0], os); - os << opstr; - p->PrintExpr(op->args[1], os); - os << ')'; - } else { - p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); - } -} -void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) - std::stringstream value; - this->PrintExpr(op->value, value); - os << CastFromTo(value.str(), op->value.type(), op->type); -} -void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) - os << GetVarID(op); -} -void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "+", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "-", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "*", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "/", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "%", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "min", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "max", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "==", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "!=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "&&", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "||", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) - os << '!'; - PrintExpr(op->a, os); -} - -void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) - if (op->call_type == Call::Extern || - op->call_type == Call::PureExtern) { - os << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - this->PrintExpr(op->args[i], os); - if (i < op->args.size() - 1) { - os << ", "; - } - } - os << ")"; - } else if (op->is_intrinsic(Call::bitwise_and)) { - PrintBinaryIntrinsitc(op, " & ", os, this); - } else if (op->is_intrinsic(Call::bitwise_xor)) { - PrintBinaryIntrinsitc(op, " ^ ", os, this); - } else if (op->is_intrinsic(Call::bitwise_or)) { - PrintBinaryIntrinsitc(op, " | ", os, this); - } else if (op->is_intrinsic(Call::bitwise_not)) { - CHECK_EQ(op->args.size(), 1U); - os << "(~"; - this->PrintExpr(op->args[0], os); - os << ')'; - } else if (op->is_intrinsic(Call::shift_left)) { - PrintBinaryIntrinsitc(op, " << ", os, this); - } else if (op->is_intrinsic(Call::shift_right)) { - PrintBinaryIntrinsitc(op, " >> ", os, this); - } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintExpr(op->args[0], os); - os << " ? "; - PrintExpr(op->args[1], os); - os << " : "; - PrintExpr(op->args[2], os); - os << ")"; - } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { - const Load *l = op->args[0].as(); - CHECK(op->args.size() == 1 && l); - os << "(("; - this->PrintType(l->type.element_of(), os); - os << " *)" << this->GetVarID(l->buffer_var.get()) - << " + "; - this->PrintExpr(l->index, os); - os << ')'; - } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { - CHECK_EQ(op->args.size(), 3U); - os << GetStructRef( - op->type, op->args[0], op->args[1], - op->args[2].as()->value); - } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { - CHECK_EQ(op->args.size(), 1U); - os << "("; - this->PrintExpr(op->args[0], os); - os << " == NULL)"; - } else - os << op->name << "()"; -} - -void CodeAnalysOpenCLC::PrintVecBinaryOp( - const std::string& op, Type t, - Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) - if (isalpha(op[0])) { - os << op << "("; - this->PrintExpr(lhs, os); - os << ", "; - this->PrintExpr(rhs, os); - os << ")"; - } else { - os <<"("; - this->PrintExpr(lhs, os); - os << ' ' << op << ' '; - this->PrintExpr(rhs, os); - os << ")"; - } -} - -inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { - const Ramp* r = index.as(); - if (!r) return false; - if (!is_one(r->stride)) return false; - CHECK_EQ(r->lanes, lanes); - *base = r->base; - return true; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) - int lanes = op->type.lanes(); - // delcare type. - if (op->type.lanes() == 1) { - std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); - os << ref; - } else { - CHECK(is_one(op->predicate)) - << "predicated load is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { - std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); - os << ref; - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // load seperately. - std::string svalue = GetUniqueName("_"); - this->PrintIndent(); - this->PrintType(op->type, stream); - stream << ' ' << svalue << ";\n"; - std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string vid = GetVarID(op->buffer_var.get()); - Type elem_type = op->type.element_of(); - for (int i = 0; i < lanes; ++i) { - std::ostringstream value_temp; - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - value_temp << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, value_temp); - value_temp << ' '; - } - } - PrintType(elem_type, value_temp); - value_temp << "*)" << vid << ')'; - } else { - value_temp << vid; - } - value_temp << '['; - PrintVecElemLoad(sindex, op->index.type(), i, value_temp); - value_temp << ']'; - PrintVecElemStore(svalue, op->type, i, value_temp.str()); - } - os << svalue; - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { - Type t = op->value.type(); - if (t.lanes() == 1) { - std::string value = this->PrintExpr(op->value); - std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; - } else { - CHECK(is_one(op->predicate)) - << "Predicated store is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, t.lanes(), &base)) { - std::string value = this->PrintExpr(op->value); - this->PrintVecStore(op->buffer_var.get(), t, base, value); - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // store elements seperately - std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); - std::string vid = GetVarID(op->buffer_var.get()); - for (int i = 0; i < t.lanes(); ++i) { - this->PrintIndent(); - Type elem_type = t.element_of(); - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - stream << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, stream); - stream << ' '; - } - } - PrintType(elem_type, stream); - stream << "*)" << vid << ')'; - } else { - stream << vid; - } - stream << '['; - PrintVecElemLoad(index, op->index.type(), i, stream); - stream << "] = "; - PrintVecElemLoad(value, op->value.type(), i, stream); - stream << ";\n"; - } - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) - std::string value = PrintExpr(op->value); - CHECK(!var_idmap_.count(op->var.get())); - var_idmap_[op->var.get()] = value; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) - // constraint of current logic - CHECK_EQ(op->base.type(), Int(32)); - os << "((int" << op->lanes << ")("; - for (int i = 0; i < op->lanes; i++) { - os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; - if (i != op->lanes - 1) - os << ", "; - } - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Broadcast: not supported "; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->condition, os); - os << " ? "; - PrintExpr(op->true_value, os); - os << " : "; - PrintExpr(op->false_value, os); - os << ")"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " & (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. a' = SHR a for Idx_R bits - // 2. mask: 1.(length).1 - // (1 << (L - R + 1)) - 1 - // 3. a' & mask - - os << "(("; - PrintExpr(op->a, os); - os << " >> "; - PrintExpr(op->index_right, os); - os << ") & ((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " | (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. mask: 0.(Idx L).01..10.(Idx R).0 - // ((1 << (L - R + 1)) - 1) << R - // 2. a & mask - - os << "("; - PrintExpr(op->a, os); - os << " & (((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1) << "; - PrintExpr(op->index_right, os); - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "KernelExpr is not yet support"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { - // TODO comaniac - //std::vector vec_var = GetNodesByType(op->value); - - std::string arg_vid = "unknown"; - std::string str = PrintExpr(op->value); - if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { - size_t pos_arg = str.find("arg"); - size_t pos_data = str.find("data"); - arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); - } - else if (std::regex_match(str, std::regex("arg(.+)"))) - arg_vid = str; - - std::string vid = AllocVarID(op->var.get()); - if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { - if ("unknown" != arg_vid) - LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; - } else { - Type type = op->var.type(); - if (op->var.type() == Handle() && - handle_data_type_.count(op->var.get())) - type = handle_data_type_.at(op->var.get()); - this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); - } - VisitStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { - CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); - if (op->new_expr.defined()) { - // Prefer global static allocation for the program - CHECK_EQ(op->free_function, "nop"); - std::string new_data = PrintExpr(op->new_expr); - this->PrintIndent(); - PrintType(op->type, stream); - stream << "* "<< vid << '=' << new_data << ";\n"; - } else { - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - stream << ' '; - PrintType(op->type, stream); - stream << ' '<< vid << '[' - << constant_size << "];\n"; - } - RegisterHandleType(op->buffer_var.get(), op->type); - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { - if (op->attr_key == ir::attr::thread_extent) { - IterVar iv(op->node.node_); - if (iv->thread_tag.length() != 0) { - if (!var_idmap_.count(iv->var.get())) { - BindThreadIndex(iv); - } - } - } else if (op->attr_key == ir::attr::storage_scope) { - const Variable* v = op->node.as(); - CHECK(v); - alloc_storage_scope_[v] = op->value.as()->value; - } else if (op->attr_key == ir::attr::volatile_scope) { - const Variable* v = op->node.as(); - CHECK(v); - volatile_buf_.insert(v); - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (op->message.as()) { - // GLOG style check - stream << "CHECK(" << cond << ") << \"" - << op->message.as()->value << "\";\n"; - } else { - stream << "assert(" << cond << ");\n"; - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const For* op) { - std::string extent = PrintExpr(op->extent); - PrintIndent(); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { - PrintStmt(op->first); - if (op->rest.defined()) PrintStmt(op->rest); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { - if (is_const(op->value)) return; - const Call* call = op->value.as(); - if (call) { - if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { - this->PrintStorageSync(call); return; - } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { - CHECK_EQ(call->args.size(), 4); - std::string value = PrintExpr(call->args[3]); - std::string ref = GetStructRef( - call->args[3].type(), - call->args[0], - call->args[1], - call->args[2].as()->value); - this->PrintIndent(); - this->stream << ref << " = " << value << ";\n"; - return; - } - } - std::string vid = this->PrintExpr(op->value); - this->PrintIndent(); - this->stream << "(void)" << vid << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { - PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { - LOG(FATAL) << "KernelDef is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { - LOG(FATAL) << "KernelStmt is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { - this->stream << "return "; - PrintExpr(op->value); - this->stream << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { - // TODO: Check if the break statement is used correctly - this->stream << "break;\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const While *op) { - std::string condition = PrintExpr(op->condition); - PrintIndent(); - stream << "while (" << condition << ") {\n"; - int while_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(while_scope); - PrintIndent(); - stream << "}\n"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h deleted file mode 100644 index 8aaeedb39..000000000 --- a/tvm/src/codegen/opencl/codeanalys_openclc.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -#ifndef TVM_CODEGEN_CODEANALYS_OPENCLC_H_ -#define TVM_CODEGEN_CODEANALYS_OPENCLC_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../codegen_source_base.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -template -using str2tupleMap = std::unordered_map>; - -/*! - * \brief A class to analyze the IR AST for MerlinC generation. - * - */ -class CodeAnalysOpenCLC : - public ExprFunctor, - public StmtFunctor, - public CodeGenSourceBase { - public: - /*! - * \brief Initialize the code generator. - * \param output_ssa Whether output SSA. - */ - void Init(); - /*! - * \brief Add the function to the generated module. - * \param f The function to be compiled. - */ - void AddFunction(LoweredFunc f); - /*! - * \brief Finalize the compilation and return the code. - * \return The code. - */ - str2tupleMap Finish(); - /*! - * \brief Print the Stmt n to CodeAnalysMerlinC->stream - * \param n The statement to be printed. - */ - void PrintStmt(const Stmt& n) { - VisitStmt(n); - } - /*! - * \brief Print the expression n(or its ssa id if in ssa mode) into os - * \param n The expression to be printed. - * \param os The output stream - */ - void PrintExpr(const Expr& n, std::ostream& os); - /*! - * \brief Same as PrintExpr, but simply returns result string - * \param n The expression to be printed. - */ - std::string PrintExpr(const Expr& n) { - std::ostringstream os; - PrintExpr(n, os); - return os.str(); - } - // The following parts are overloadable print operations. - /*! - * \brief Initialize codegen state for generating f. - * \param f The function to be compiled. - */ - virtual void InitFuncState(LoweredFunc f); - // expression - void VisitExpr_(const Variable* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Load* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Let* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Call* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Add* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Sub* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Mul* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Div* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Mod* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Min* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Max* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const EQ* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const NE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const LT* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const LE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GT* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const And* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Or* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Cast* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Not* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Select* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Ramp* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Broadcast* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const IntImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const UIntImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const FloatImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const StringImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GetBit* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GetSlice* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const SetBit* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) - // statment - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const Store* op) override; - void VisitStmt_(const For* op) override; - void VisitStmt_(const IfThenElse* op) override; - void VisitStmt_(const Allocate* op) override; - void VisitStmt_(const AttrStmt* op) override; - void VisitStmt_(const AssertStmt* op) override; - void VisitStmt_(const Evaluate* op) override; - void VisitStmt_(const Block* op) override; - void VisitStmt_(const ProducerConsumer* op) override; - void VisitStmt_(const KernelDef* op) override; - void VisitStmt_(const KernelStmt* op) override; - void VisitStmt_(const Return* op) override; - void VisitStmt_(const Break* op) override; - void VisitStmt_(const While* op) override; - void VisitStmt_(const Partition* op) override; - /*! - * Print Type represetnation of type t. - * \param t The type representation. - * \param os The stream to print the ctype into - */ - void PrintType(Type t, std::ostream& os); // NOLINT(*) - std::string GetType(Type t); // NOLINT(*) - /*! - * \brief Print expr representing the thread tag - * \param IterVar iv The thread index to be binded; - */ - void BindThreadIndex(const IterVar& iv); // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os); // NOLINT(*) - void PrintStorageSync(const Call* op); // NOLINT(*) - // Binary vector op. - void PrintVecBinaryOp( - const std::string&op, Type op_type, - Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) - // print vector load - std::string GetVecLoad(Type t, const Variable* buffer, Expr base); - // print vector store - void PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value); // NOLINT(*) - // print load of single element - void PrintVecElemLoad( - const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) - // print store of single element. - void PrintVecElemStore( - const std::string& vec, Type t, int i, const std::string& value); - // Get a cast type from to - std::string CastFromTo(std::string value, Type from, Type target); - - protected: - // Print reference to struct location - std::string GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind); - // print reference to a buffer as type t in index. - virtual std::string GetBufferRef( - Type t, const Variable* buffer, Expr index); - /*! - * \brief If buffer is allocated as type t. - * \param buf_var The buffer variable. - * \param t The type to be checked. - */ - bool HandleTypeMatch(const Variable* buf_var, Type t) const; - /*! - * \brief Register the data type of buf_var - * \param buf_var The buffer variable. - * \param t The type to be checked. - */ - void RegisterHandleType(const Variable* buf_var, Type t); - // override - void PrintSSAAssign( - const std::string& target, const std::string& src, Type t) final; - /*! \brief restrict keyword */ - std::string restrict_keyword_{""}; - /*! \brief the storage scope of allocation */ - std::unordered_map alloc_storage_scope_; - /*! \brief the data type of allocated buffers */ - std::unordered_map handle_data_type_; - - private: - /*! \brief set of volatile buf access */ - std::unordered_set volatile_buf_; - /*! \brief map of function arguments to their types */ - str2tupleMap map_arg_type_; -}; - -} // namespace codegen -} // namespace TVM -#endif // TVM_CODEGEN_CODEGEN_C_H_ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc old mode 100644 new mode 100755 index 8cfeb218c..f3b302d33 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -1,572 +1,146 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_aocl.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -CodeGenAOCL::CodeGenAOCL() { - restrict_keyword_ = "restrict"; -} - -void CodeGenAOCL::InitFuncState(LoweredFunc f) { - CodeGenC::InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } -} - - -// void CodeGenAOCL::AddFunction(LoweredFunc f) { -// this->stream << "__kernel "; -// CodeGenC::AddFunction(f); -// } - -// void CodeGenAOCL::AddFunction(LoweredFunc f) { - // this->stream << "# pragma once\n"; - // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n\n"; - // this->stream << "__kernel "; - -// CodeGenC::AddFunction(f); -// } - -void CodeGenAOCL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Write head files - // stream.open("host.cpp"); - // this->stream << "# pragma once\n"; - // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n\n"; - - // Write entry function name - // this->stream << "__kernel " << f->name << "("; - // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; - // this->stream << f->name << "("; - this->stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" << "\n"; - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - - - - -// void CodeGenAOCL::AddFunction(LoweredFunc f, -// str2tupleMap map_arg_type) { -// // Don't Write header flies -// // Clear previous generated state -// this->InitFuncState(f); -// // Register alloc buffer type -// for ( const auto & kv : f->handle_data_type ) { -// this->stream << kv.first.get(); -// this->stream << kv.second.type(); -// RegisterHandleType(kv.first.get(), kv.second.type()); -// } -// // Write entry function name -// this->stream << "__kernel "; -// // Write arguments -// for ( size_t i = 0; i < f->args.size(); i++ ) { -// Var v = f->args[i]; -// std::string vid = AllocVarID(v.get()); -// if ( i!= 0 ) { -// this->stream << ", "; -// } -// if ( map_arg_type.find(vid) == map_arg_type.end()) { -// LOG(WARNING) << vid << " type not found\n"; -// PrintType(v.type(), this->stream); -// this->stream << ' ' << vid; -// } -// else { -// auto arg = map_arg_type[vid]; -// PrintType(std::get<1>(arg), this->stream); -// if (v.type().is_handle()) { -// this->stream << "*"; -// } -// this->stream << ' ' << std::get<0>(arg); - -// } -// stream << ") {\n"; -// int func_scope = this->BeginScope(); -// this->PrintStmt(f->body); -// this->EndScope(func_scope); -// this->PrintIndent(); -// this->stream << "}\n\n"; -// } -// CodeGenAOCL::AddFunction(f, map_arg_type); -// } - -std::string CodeGenAOCL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenAOCL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - -// void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) -// int lanes = t.lanes(); -// if (t.is_handle()) { -// CHECK_EQ(lanes, 1) -// << "do not yet support vector types"; -// os << "void*"; return; -// } -// if ( t== Bool() ) { -// os << "bool"; return; -// } -// bool fail = false; -// if (t.is_float()) { -// switch (t.bits()) { -// case 16: -// os << "half"; -// enable_fp16_ = true; -// break; -// case 32: -// os << "float"; -// break; -// case 64: -// os << "double"; -// enable_fp64_ = true; -// break; -// default: -// fail = true; -// break; -// } -// if (!fail && lanes == 1) return; -// if (!fail && (lanes >= 2 && lanes <= 16)) { -// os << lanes; return; -// } -// } else if (t.is_uint() || t.is_int()) { -// if (t.is_uint()) { -// os << 'u'; -// } -// if (t.bits() == 8 && t.lanes() == 4) { -// // directly 4 8 bit int in integer. -// os << "int"; return; -// } -// switch (t.bits()) { -// case 8: os << "char"; break; -// case 16: os << "short"; break; -// case 32: os << "int"; break; -// case 64: os << "long"; break; -// case 1: os << "int"; break; -// default: fail = true; break; -// } -// if (!fail && lanes == 1) return; -// // if (!fail && (lanes >= 2 && lanes <= 16)) { -// // os << lanes; return; -// // } -// } -// } - - -void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - - if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; - } - else if ( t.is_int()) { - os << "ap_int<" << t.bits() << ">" << "intd_t"; - } - else { - if (t.is_float()) { - if (t.bits() == 16) { - enable_fp16_ = true; - os << "half"; return; - } - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - enable_fp64_ = true; - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; - // os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_int<" << t.bits() << ">" << "intd_t"; return; - // os << "int" << t.bits() << "_t"; return; - } - } - } - } - } - - - // if (t.is_float()) { - // if (t.bits() == 16) { - // enable_fp16_ = true; - // os << "half"; return; - // } - // if (t.bits() == 32) { - // os << "float"; return; - // } - // if (t.bits() == 64) { - // enable_fp64_ = true; - // os << "double"; return; - // } - // } else if (t.is_uint()) { - // switch (t.bits()) { - // case 8: case 16: case 32: case 64: { - // os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; - // // os << "uint" << t.bits() << "_t"; return; - // } - // case 1: os << "int"; return; - // } - // } else if (t.is_int()) { - // switch (t.bits()) { - // case 8: case 16: case 32: case 64: { - // os << "ap_int<" << t.bits() << ">" << "intd_t"; return; - // // os << "int" << t.bits() << "_t"; return; - - // } - // } - // } - // LOG(FATAL) << "Cannot convert type " << t << " to AOCL type"; - -} - - - - -void CodeGenAOCL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenAOCL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenAOCL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenAOCL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - -// void CodeGenAOCL::PrintStorageScope( -// const std::string& scope, std::ostream& os) { // NOLINT(*) -// if (scope == "global") { -// os << "__global "; -// } else if (scope == "shared") { -// os << "__local "; -// } -// } - -void CodeGenAOCL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - os << "global "; - } else if (scope == "shared") { - os << "local "; - } -} - - -std::string CodeGenAOCL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenAOCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} - -void CodeGenAOCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) - if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintType(op->args[2].type(), os); - os << ")"; - } - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenAOCL::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - - -void CodeGenAOCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) - if (std::isinf(op->value)) { - if ( op->value < 0) { - os << "-"; - } - os << "INFINITY"; - } else if (std::isnan(op->value)) { - os << "NAN"; - } else { - CodeGenC::VisitExpr_(op, os); - } -} - -void CodeGenAOCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) - os << "("; - PrintType(op->true_value.type(), os); - os << ")"; - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenAOCL::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeGenAOCL::GenForStmt(const For* op, std::string pragma, bool before) { - std::string extent = PrintExpr(op->extent); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - if (before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - PrintIndent(); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - if (!before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeGenAOCL::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - os << "#pragma unroll"; - if (unroll_factor > 0) os << " " << unroll_factor << "\n"; - else os << "\n"; - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "#pragma"; - os << " ii " << II << "\n"; - } - CodeGenAOCL::GenForStmt(op, os.str(), true); -} - - -} // namespace codegen -} // namespace TVM +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_aocl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +void CodeGenAOCL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + + this->stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" << "\n"; + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; +} + + + +void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + if (t.is_float()) { + if (t.bits() == 16) { + enable_fp16_ = true; + os << "half"; return; + } + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + enable_fp64_ = true; + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; + // os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_int<" << t.bits() << ">" << "intd_t"; return; + // os << "int" << t.bits() << "_t"; return; + + } + } + } +} + + + +void CodeGenAOCL::VisitStmt_(const For* op) { + std::ostringstream os; + if (op->for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + os << "#pragma unroll"; + if (unroll_factor > 0) os << " " << unroll_factor << "\n"; + else os << "\n"; + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + os << "#pragma"; + os << " ii " << II << "\n"; + } + CodeGenAOCL::GenForStmt(op, os.str(), true); +} + + + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h old mode 100644 new mode 100755 index a265e1653..f3e3a0c75 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -1,64 +1,34 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ -#define TVM_CODEGEN_CODEGEN_AOCL_H_ - -# include -# include -# include -# include "./codeanalys_openclc.h" -# include "../codegen_c.h" - - -namespace TVM { -namespace codegen { - - -class CodeGenAOCL : public CodeGenC { - public: - CodeGenAOCL(); - // void AddFunction(LoweredFunc f); - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - std::string Finish(); - - void InitFuncState(LoweredFunc f) override; - void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void PrintStorageSync(const Call* op) override; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) override; // NOLINT(*) - void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) override; //NOLINT(*) - void PrintVecAddr(const Variable * buffer, Type t, - Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - - //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) - - void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) - void VisitStmt_(const LetStmt* op) override; // NOLINT(*) - - void GenForStmt(const For* op, std::string pragma, bool before); - void VisitStmt_(const For* op) override; - - - private: - // whether enable fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; - -}; -} // namespace codegen -} // namespace TVM - +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ +#define TVM_CODEGEN_CODEGEN_AOCL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "./codegen_opencl.h" + + +namespace TVM { +namespace codegen { + + +class CodeGenAOCL : public CodeGenOpenCL { + public: + CodeGenAOCL(); + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + + void VisitStmt_(const For* op) override; + +}; +} // namespace codegen +} // namespace TVM + #endif // TVM_CODEGEN_CODEGEN_AOCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc new file mode 100755 index 000000000..6f2a43d39 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -0,0 +1,246 @@ + +# include +# include +# include +# include +# include +# include +# include "./codegen_opencl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM{ +namespace codegen{ + +CodeGenOpenCL::CodeGenOpenCL(){ + restrict_keyword_ = "restrict"; +} + +std::string CodeGenOpenCL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + + + + +void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenOpenCL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenOpenCL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenOpenCL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + + + +void CodeGenOpenCL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "global "; + } else if (scope == "shared") { + os << "local "; + } +} + + +std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintType(op->args[2].type(), os); + os << ")"; + } + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + + +void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) + if (std::isinf(op->value)) { + if ( op->value < 0) { + os << "-"; + } + os << "INFINITY"; + } else if (std::isnan(op->value)) { + os << "NAN"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) + os << "("; + PrintType(op->true_value.type(), os); + os << ")"; + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenCL::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { + std::string extent = PrintExpr(op->extent); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + if (before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + PrintIndent(); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + if (!before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h new file mode 100755 index 000000000..feb84b9bf --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -0,0 +1,53 @@ +#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ +#define TVM_CODEGEN_CODEGEN_OPENCL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "../codegen_c.h" + +namespace TVM{ +namespace codegen{ + +class CodeGenOpenCL : public CodeGenC{ + public: + // void AddFunction(LoweredFunc f); + CodeGenOpenCL(); + virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; + std::string Finish(); + void InitFuncState(LoweredFunc f) override; + void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void PrintStorageSync(const Call* op) override; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + virtual void PrintType(Type t, std::ostream& os) = 0; //NOLINT + std::string GetVecLoad(Type t, const Variable * buffer, + Expr base) override; // NOLINT(*) + void PrintVecStore(const Variable * buffer, Type t, + Expr base, const std::string& value) override; //NOLINT(*) + void PrintVecAddr(const Variable * buffer, Type t, + Expr base, std::ostream& os); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) + + //overload visitor + void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) + void VisitStmt_(const LetStmt* op) override; // NOLINT + void GenForStmt(const For* op, std::string pragma, bool before); + virtual void VisitStmt_(const For* op) = 0; + + protected: + // whether enable fp16 and fp64 extension + bool enable_fp16_{false}; + bool enable_fp64_{false}; +}; + + +} // namespace codegen +} // namespace TVM + +#endif \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc old mode 100644 new mode 100755 index 95928cbe5..5470a10f7 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -1,587 +1,196 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_sdaccel.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -CodeGenSDACCEL::CodeGenSDACCEL() { - restrict_keyword_ = "restrict"; -} - -void CodeGenSDACCEL::InitFuncState(LoweredFunc f) { - CodeGenC::InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } -} - - -// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { -// this->stream << "__kernel "; -// CodeGenC::AddFunction(f); -// } - -// void CodeGenSDACCEL::AddFunction(LoweredFunc f) { - // this->stream << "# pragma once\n"; - // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n\n"; - // this->stream << "__kernel "; - -// CodeGenC::AddFunction(f); -// } - -void CodeGenSDACCEL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Write head files - // stream.open("host.cpp"); - // this->stream << "# pragma once\n"; - // this->stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - // this->stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - // this->stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n"; - // this->stream << "# include \n\n"; - - // Write entry function name - // this->stream << "__kernel " << f->name << "("; - // this->stream << "__kernel " << "void " << "__attribute__ " << "((reqd_work_group_size(1, 1, 1)))\n"; - // this->stream << f->name << "("; - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - - - - -// void CodeGenSDACCEL::AddFunction(LoweredFunc f, -// str2tupleMap map_arg_type) { -// // Don't Write header flies -// // Clear previous generated state -// this->InitFuncState(f); -// // Register alloc buffer type -// for ( const auto & kv : f->handle_data_type ) { -// this->stream << kv.first.get(); -// this->stream << kv.second.type(); -// RegisterHandleType(kv.first.get(), kv.second.type()); -// } -// // Write entry function name -// this->stream << "__kernel "; -// // Write arguments -// for ( size_t i = 0; i < f->args.size(); i++ ) { -// Var v = f->args[i]; -// std::string vid = AllocVarID(v.get()); -// if ( i!= 0 ) { -// this->stream << ", "; -// } -// if ( map_arg_type.find(vid) == map_arg_type.end()) { -// LOG(WARNING) << vid << " type not found\n"; -// PrintType(v.type(), this->stream); -// this->stream << ' ' << vid; -// } -// else { -// auto arg = map_arg_type[vid]; -// PrintType(std::get<1>(arg), this->stream); -// if (v.type().is_handle()) { -// this->stream << "*"; -// } -// this->stream << ' ' << std::get<0>(arg); - -// } -// stream << ") {\n"; -// int func_scope = this->BeginScope(); -// this->PrintStmt(f->body); -// this->EndScope(func_scope); -// this->PrintIndent(); -// this->stream << "}\n\n"; -// } -// CodeGenSDACCEL::AddFunction(f, map_arg_type); -// } - -std::string CodeGenSDACCEL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenSDACCEL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - -void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); - if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - os << "void*"; return; - } - if ( t== Bool() ) { - os << "bool"; return; - } - bool fail = false; - if (t.is_float()) { - switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: - os << "float"; - break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << 'u'; - } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; - } - switch (t.bits()) { - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 1: os << "int"; break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } - LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; -} - - -// void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) -// CHECK_EQ(t.lanes(), 1) -// << "do not yet support vector types"; -// if (t.is_handle()) { -// os << "void*"; return; -// } -// if (t.is_float()) { -// if (t.bits() == 16) { -// enable_fp16_ = true; -// os << "half"; return; -// } -// if (t.bits() == 32) { -// os << "float"; return; -// } -// if (t.bits() == 64) { -// enable_fp64_ = true; -// os << "double"; return; -// } -// } else if (t.is_uint() || t.is_int()) { -// if (t.is_uint()) { -// os << 'u'; -// } -// if (t.bits() == 8 && t.lanes() == 4) { -// os << "int"; return; -// } -// switch (t.bits()) { -// case 8: os << "char"; break; -// case 16: os << "short"; break; -// case 32: os << "int"; break; -// case 64: os << "long"; break; -// case 1: os << "int"; break; -// } -// } - -// os << t; -// } - - - - -void CodeGenSDACCEL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenSDACCEL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenSDACCEL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenSDACCEL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - -// void CodeGenSDACCEL::PrintStorageScope( -// const std::string& scope, std::ostream& os) { // NOLINT(*) -// if (scope == "global") { -// os << "__global "; -// } else if (scope == "shared") { -// os << "__local "; -// } -// } - -void CodeGenSDACCEL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - os << "global "; - } else if (scope == "shared") { - os << "local "; - } -} - - -std::string CodeGenSDACCEL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenSDACCEL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} - -void CodeGenSDACCEL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) - if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintType(op->args[2].type(), os); - os << ")"; - } - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenSDACCEL::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - - -void CodeGenSDACCEL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) - if (std::isinf(op->value)) { - if ( op->value < 0) { - os << "-"; - } - os << "INFINITY"; - } else if (std::isnan(op->value)) { - os << "NAN"; - } else { - CodeGenC::VisitExpr_(op, os); - } -} - -void CodeGenSDACCEL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) - os << "("; - PrintType(op->true_value.type(), os); - os << ")"; - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenSDACCEL::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - - -void CodeGenSDACCEL::GenForStmt(const For* op, std::string pragma, bool before) { - std::string extent = PrintExpr(op->extent); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - if (before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - PrintIndent(); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - if (!before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeGenSDACCEL::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - os << "__attribute__((opencl_unroll_hint("; - if (unroll_factor > 0) os << unroll_factor << ")))\n"; - else - os << "\n"; - - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "__attribute__((xcl_pipeline_loop("; - os << II << ")))\n"; - } - CodeGenSDACCEL::GenForStmt(op, os.str(), true); -} - -// void CodeGenSDACCEL::VisitStmt_(const Partition* op) { -// std::string vid = GetVarID(op->buffer_var.get()); -// stream << vid << " "; -// stream << "__attribute__((xcl_array_partition("; -// switch (op->partition_type) { -// case PartitionType::Complete: -// stream << "complete,"; -// break; -// case PartitionType::Block: -// stream << "block,"; -// break; -// case PartitionType::Cyclic: -// stream << "cyclic,"; -// break; -// } -// if (op->partition_type != PartitionType::Complete) { -// stream << op->factor << ","; -// // stream << " factor=" << op->factor; -// } -// stream << op->dim << ")))"; - -// stream << "\n"; -// } - - -void CodeGenSDACCEL::VisitStmt_(const Partition* op) { - std::string vid = GetVarID(op->buffer_var.get()); - stream << vid << " "; - if (op->partition_type != PartitionType::Complete) { - stream << "__attribute__((xcl_array_partition("; - switch (op->partition_type) { - // case PartitionType::Complete: - // stream << "complete,"; - // break; - case PartitionType::Block: - stream << "block,"; - break; - case PartitionType::Cyclic: - stream << "cyclic,"; - break; - } - stream << op->factor << ","; - stream << op->dim << ")))\n"; - }else { - if (op->dim == 0) { - stream << "__attribute__((xcl_array_partition))\n"; - } else { - stream << "__attribute__((xcl_array_partition("; - stream << "complete,"; - stream << op->factor << ","; - stream << op->dim << ")))\n"; - } - } -} - - -} // namespace codegen -} // namespace TVM +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_sdaccel.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +void CodeGenSDACCEL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; +} + + +void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + if ( t== Bool() ) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: + os << "float"; + break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; +} + + + + +void CodeGenSDACCEL::VisitStmt_(const For* op) { + std::ostringstream os; + if (op->for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + os << "__attribute__((opencl_unroll_hint("; + if (unroll_factor > 0) os << unroll_factor << ")))\n"; + else + os << "\n"; + + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + os << "__attribute__((xcl_pipeline_loop("; + os << II << ")))\n"; + } + CodeGenSDACCEL::GenForStmt(op, os.str(), true); +} + + + +void CodeGenSDACCEL::VisitStmt_(const Partition* op) { + std::string vid = GetVarID(op->buffer_var.get()); + stream << vid << " "; + if (op->partition_type != PartitionType::Complete) { + stream << "__attribute__((xcl_array_partition("; + switch (op->partition_type) { + // case PartitionType::Complete: + // stream << "complete,"; + // break; + case PartitionType::Block: + stream << "block,"; + break; + case PartitionType::Cyclic: + stream << "cyclic,"; + break; + } + stream << op->factor << ","; + stream << op->dim << ")))\n"; + }else { + if (op->dim == 0) { + stream << "__attribute__((xcl_array_partition))\n"; + } else { + stream << "__attribute__((xcl_array_partition("; + stream << "complete,"; + stream << op->factor << ","; + stream << op->dim << ")))\n"; + } + } +} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h old mode 100644 new mode 100755 index a1f141501..5bf156db4 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -1,64 +1,36 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ -#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ - -# include -# include -# include -# include "./codeanalys_openclc.h" -# include "../codegen_c.h" - -namespace TVM { -namespace codegen { - - -class CodeGenSDACCEL : public CodeGenC { - public: - CodeGenSDACCEL(); - // void AddFunction(LoweredFunc f); - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - std::string Finish(); - - void InitFuncState(LoweredFunc f) override; - void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void PrintStorageSync(const Call* op) override; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) override; // NOLINT(*) - void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) override; //NOLINT(*) - void PrintVecAddr(const Variable * buffer, Type t, - Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - - //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) - void VisitStmt_(const LetStmt* op) override; // NOLINT(*) - - void GenForStmt(const For* op, std::string pragma, bool before); - void VisitStmt_(const For* op) override; - void VisitStmt_(const Partition* op) override; - - - - private: - // whether enable fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; - -}; -} // namespace codegen -} // namespace TVM - +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ +#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "./codegen_opencl.h" + +namespace TVM { +namespace codegen { + + +class CodeGenSDACCEL : public CodeGenOpenCL { + public: + CodeGenSDACCEL(); + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + + void VisitStmt_(const For* op) override; + void VisitStmt_(const Partition* op) override; + + + +}; +} // namespace codegen +} // namespace TVM + #endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc deleted file mode 100644 index 8a640e556..000000000 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc +++ /dev/null @@ -1,336 +0,0 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-30 15:15:28 - * @LastEditTime: 2019-08-14 16:16:03 - * @LastEditors: Please set LastEditors - */ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include "./sdaccel_module.h" -# include -# include -# include -# include -# include - -namespace TVM { -namespace runtime { - -namespace { - -void PrintIndent(std::ofstream& stream, int indent) { - for (int i = 0;i < indent; i++ ) { - stream << ' '; - } -} - -inline size_t GetTypeSize(TVMType t) { - size_t byte = (t.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - return byte; -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size_t byte = (arr->dtype.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - size *= (byte * 8 * arr->dtype.lanes + 7) / 8; - return size; -} - -inline TVMType Type2TVMType(Type t) { - TVMType tt; - if (t.is_int()) tt.code = kDLInt; - else if (t.is_uint()) tt.code = kDLUInt; - else if (t.is_float()) tt.code = kDLFloat; - else LOG(FATAL) << "Unacceptable type: " << t; - tt.bits = static_cast(t.bits()); - tt.fracs = static_cast(t.fracs()); - return tt; -} - -inline std::string Type2Str(TVMType t) { - -} - -inline std::string Tpye2ExtStr(TVMType t) { - -} - - - - - -inline std::string Type2Byte(TVMType t) { - std::string str = ""; - if (t.code == kDLFloat) { - str += "float"; - } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "u"; - str += "int"; - if (t.bits <= 8) str += "8"; - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - str += "_t"; - } - return str; -} - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - arg_sizes.push_back(GetDataSize(arr)); - arg_types.push_back(arr->dtype); - } else { - const Variable* var = func->api_args[i].as(); - TVMType t = Type2TVMType(var->type); - arg_sizes.push_back(GetTypeSize(t)); - arg_types.push_back(t); - } - } -} - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } - } -} - -// copy values from the shared mem to local mem -void PrintCopy() - - - - -// copy values from local mem back to shared mem -void PrintCopyBack() - - - -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string test_file) { - int indent = 0; - std::ofstream stream; - stream.open("host.cpp"); - - // write the header files and macro commmands. - stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# pragram once\n"; - stream << "# define LENGTH (1024)\n"; - stream << "# define NUM_WORKGROUPS (1)\n"; - stream << "# define WORKGROUP_SIZE (16)\n"; - stream << test_file; - stream << "int main(void) { \n"; - indent += 2; - - - // get the platform and devices - stream << "#if define(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; - PrintIndent(stream, indent); - stream << "# define STR_VALUE(arg) #arg\n"; - PrintIndent(stream, indent); - stream << "# define GET_STRING(name) STR_VALUE(name)\n"; - PrintIndent(stream, indent); - stream << "# define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n" - stream << "#endif"; - - - // get the xclbin filename . - stream << "char * xclbinFilename = argv[1]\n"; - stream << "size_t \n"; - - // source memories - - - // create the test data and goldn data locally - - - - - // OpenCL HOST CODE AREA START - // get First Platform - stream << "std::vector platforms;\n"; - stream << "cl::Platform::get(&platforms)\n;"; - stream << "cl::Platform platform = platform[0];\n"; - stream << "std::cout << "" " - - // get accelerator devices and select 1st such device - - // create context and command queue for selected device - - - // load xcl binary into the buffer - - - // creat program from binary file - - // create kernel - - // create buffers inside device - - // copy input data to device buffer from host memory - - // run the kernel - - // copy device result data to host memory - // OpenCL HOST CODE AREA END - - - - // compare the results of the kernel to the simulation - - - - - for ( int i = 0;i < args.size(); i++ ) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << Type2Byte(arg_types)[i] << "*"; - PrintIndent(stream, indent); - - - } - } - - // call the function - PrintIndent(stream, indent); - stream << func->name << "("; - for (int i = 0;i < args.size();i++) { - if (i != args.size()-1) { - stream << ", "; - } - } - stream << ");\n"; - - // copy to shared mem - for (int i = 0;i < args.size();i++ ) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - PrintIndent(stream, indent); - } - } - stream << "}\n"; - stream.close(); -} -} // namespace - -class SDAccelModuleNode final : public ModuleNode { - public: - SDAccelModuleNode(LoweredFunc func, std::string test_file) - : func_(func), test_file_(test_file) {} - - const char* type_key() const { - return "sdaccel_sw_emu"; - } - - PackedFunc GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self) final { - return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ - if (args.size() != (int)func_->args.size()) - LOG(FATAL) << "The function should take in " << func_->args.size() - << " inputs but get " << args.size(); - std::vector arg_sizes; - std::vector arg_types; - std::vector shmids; - CollectArgInfo(args, func_, arg_sizes, arg_types); - GenSharedMem(args, shmids, arg_sizes); - GenHostCode(args, shmids, arg_types, func_, test_file_); - // TODO: find a better way to do the following - LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; - LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; - system("make -f sdaccel.mk run_cpu_em"); - // system("./out"); - LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; - system("make -f sdaccel.mk clean"); - FreeSharedMem(args, shmids, arg_sizes); - }); - } - - private: - LoweredFunc func_; - std::string test_file_; -}; - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code) { - - std::shared_ptr n = - std::make_shared(func, code); - - return Module(n); -} - - -} // namespace runtime -} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h deleted file mode 100644 index 25db653b9..000000000 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-30 15:15:15 - * @LastEditTime: 2019-07-30 15:15:15 - * @LastEditors: your name - */ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef SDACCEL_MODULE_H -#define SDACCEL_MODULE_H - -# include -# include -# include "../../build_common.h" - -namespace TVM { -namespace runtime { - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code); - -} // namespace runtime -} // namespace TVM - -#endif \ No newline at end of file From c4562e55d7dddbae83fbe08776d3f95305d675d7 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 23 Aug 2019 15:35:46 -0400 Subject: [PATCH 051/103] analysis --- tvm/src/codegen/opencl/codeanalys_openclc.cc | 919 +++++++++++++++++++ tvm/src/codegen/opencl/codeanalys_openclc.h | 202 ++++ 2 files changed, 1121 insertions(+) create mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.cc create mode 100644 tvm/src/codegen/opencl/codeanalys_openclc.h diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc new file mode 100644 index 000000000..030453a94 --- /dev/null +++ b/tvm/src/codegen/opencl/codeanalys_openclc.cc @@ -0,0 +1,919 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file tvm/src/codegen/hlsc/codegen_hlsc.cc + */ +#include +#include +#include +#include "./codeanalys_openclc.h" +#include "../codegen_common.h" +#include "../../arithmetic/compute_expr.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +void CodeAnalysOpenCLC::Init() { + ; +} + +void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { + alloc_storage_scope_.clear(); + handle_data_type_.clear(); + map_arg_type_.clear(); + CodeGenSourceBase::ClearFuncState(); +} +void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { + // Clear previous generated state. + this->InitFuncState(f); + + // Add to alloc buffer type. + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Record the arguments for analyzing the type + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + this->map_arg_type_[vid]; + } + int func_scope = this->BeginScope(); + VisitStmt(f->body); + this->EndScope(func_scope); +} + +str2tupleMap CodeAnalysOpenCLC::Finish() { + return this->map_arg_type_; +} + +void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) + VisitExpr(n, os); +} + +void CodeAnalysOpenCLC::PrintSSAAssign( + const std::string& target, const std::string& src, Type t) { + PrintType(t, stream); + stream << ' ' << target << " = "; + if (src.length() > 3 && + src[0] == '(' && src[src.length() - 1] == ')') { + stream << src.substr(1, src.length() - 2); + } else { + stream << src; + } + stream << ";\n"; +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetBufferRef( + Type t, const Variable* buffer, Expr index) { + std::ostringstream os; + std::string vid = GetVarID(buffer); + std::string scope; + if (alloc_storage_scope_.count(buffer)) { + scope = alloc_storage_scope_.at(buffer); + } + bool is_vol = volatile_buf_.count(buffer) != 0; + if (t.lanes() == 1) { + if (!HandleTypeMatch(buffer, t) || is_vol) { + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)" << vid << ')'; + } else { + os << vid; + } + os << '['; + PrintExpr(index, os); + os << ']'; + } else { + // Buffer declared as vector type. + // optimize for case where it is in register, + if (HandleTypeMatch(buffer, t) && !is_vol) { + // optimize for constant access + int offset; + if (arith::GetConstInt(index, &offset)) { + CHECK_EQ(offset % t.lanes(), 0) + << "Find unaligned vector load to a vector type"; + os << vid << '[' << (offset / t.lanes()) << ']'; + return os.str(); + } + } + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)("; + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << vid << " + "; + PrintExpr(index, os); + os << "))[0]"; + } + return os.str(); +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind) { + if (kind < intrinsic::kArrKindBound_) { + std::ostringstream os; + os << "(((TVMArray*)"; + this->PrintExpr(buffer, os); + os << ")"; + if (kind == intrinsic::kArrAddr) { + os << " + "; + this->PrintExpr(index, os); + os << ")"; + return os.str(); + } + os << '['; + this->PrintExpr(index, os); + os << "]."; + // other case: get fields. + switch (kind) { + case intrinsic::kArrData: os << "data"; break; + case intrinsic::kArrShape: os << "shape"; break; + case intrinsic::kArrStrides: os << "strides"; break; + case intrinsic::kArrNDim: os << "ndim"; break; + case intrinsic::kArrTypeCode: os << "dtype.code"; break; + case intrinsic::kArrTypeBits: os << "dtype.bits"; break; + case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; + case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; + case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; + case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; + default: os << "unknown_field_code_" << kind; + } + os << ')'; + return os.str(); + } else { + CHECK_LT(kind, intrinsic::kTVMValueKindBound_); + std::ostringstream os; + os << "(((TVMValue*)"; + this->PrintExpr(buffer, os); + os << ")[" << index << "]."; + if (t.is_handle()) { + os << "v_handle"; + } else if (t.is_float()) { + os << "v_float64"; + } else if (t.is_int()) { + os << "v_int64"; + } else { + os << t; + } + os << ")"; + return os.str(); + } +} + + +bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) return false; + return it->second == t; +} + +void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) { + handle_data_type_[buf_var] = t; + } else { + CHECK(it->second == t) + << "conflicting buf var type"; + } +} + +void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, + Type t, int i, + std::ostream& os) { // NOLINT(*) + os << vec << ".s" << std::hex << i << std::dec; +} + +void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, + Type t, int i, + const std::string& value) { + this->PrintIndent(); + stream << vec << ".s" << std::hex << i + << " = " << value << ";\n" << std::dec; +} + +std::string CodeAnalysOpenCLC::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + return GetBufferRef(t, buffer, base); +} + +void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + std::string ref = GetBufferRef(t, buffer, base); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; +} + +std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + return os.str(); +} + +void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { + LOG(FATAL) << "not implemented"; +} + +void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) +} + +void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) + CHECK_EQ(scope, "global"); +} + +std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) + std::ostringstream os; + PrintType(t, os); + return os.str(); +} + +void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + if (t.is_float()) { + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "int" << t.bits() << "_t"; return; + } + } + } + os << t; +} + + +inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == Int(32)) { + std::ostringstream temp; + temp << op->value; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == UInt(32)) { + std::ostringstream temp; + temp << op->value << "U"; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + switch (op->type.bits()) { + case 64: case 32: { + std::ostringstream temp; + temp << std::scientific << op->value; + if (op->type.bits() == 32) temp << 'f'; + p->MarkConst(temp.str()); + os << temp.str(); + break; + } + case 16: { + os << '('; + p->PrintType(op->type, os); + os << ')' << std::scientific <value << 'f'; + break; + } + default: os << op << "\n"; + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) + os << "\"" << op->value << "\""; +} + +template +inline void PrintBinaryExpr(const T* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + if (isalpha(opstr[0])) { + os << opstr << '('; + p->PrintExpr(op->a, os); + os << ", "; + p->PrintExpr(op->b, os); + os << ')'; + } else { + os << '('; + p->PrintExpr(op->a, os); + os << ' ' << opstr << ' '; + p->PrintExpr(op->b, os); + os << ')'; + } + } else { + p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); + } +} + +inline void PrintBinaryIntrinsitc(const Call* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + CHECK_EQ(op->args.size(), 2U); + os << '('; + p->PrintExpr(op->args[0], os); + os << opstr; + p->PrintExpr(op->args[1], os); + os << ')'; + } else { + p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); + } +} +void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) + std::stringstream value; + this->PrintExpr(op->value, value); + os << CastFromTo(value.str(), op->value.type(), op->type); +} +void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) + os << GetVarID(op); +} +void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "+", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "-", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "*", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "/", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "%", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "min", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "max", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "==", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "!=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "&&", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "||", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) + os << '!'; + PrintExpr(op->a, os); +} + +void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) + if (op->call_type == Call::Extern || + op->call_type == Call::PureExtern) { + os << op->name << "("; + for (size_t i = 0; i < op->args.size(); i++) { + this->PrintExpr(op->args[i], os); + if (i < op->args.size() - 1) { + os << ", "; + } + } + os << ")"; + } else if (op->is_intrinsic(Call::bitwise_and)) { + PrintBinaryIntrinsitc(op, " & ", os, this); + } else if (op->is_intrinsic(Call::bitwise_xor)) { + PrintBinaryIntrinsitc(op, " ^ ", os, this); + } else if (op->is_intrinsic(Call::bitwise_or)) { + PrintBinaryIntrinsitc(op, " | ", os, this); + } else if (op->is_intrinsic(Call::bitwise_not)) { + CHECK_EQ(op->args.size(), 1U); + os << "(~"; + this->PrintExpr(op->args[0], os); + os << ')'; + } else if (op->is_intrinsic(Call::shift_left)) { + PrintBinaryIntrinsitc(op, " << ", os, this); + } else if (op->is_intrinsic(Call::shift_right)) { + PrintBinaryIntrinsitc(op, " >> ", os, this); + } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintExpr(op->args[0], os); + os << " ? "; + PrintExpr(op->args[1], os); + os << " : "; + PrintExpr(op->args[2], os); + os << ")"; + } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { + const Load *l = op->args[0].as(); + CHECK(op->args.size() == 1 && l); + os << "(("; + this->PrintType(l->type.element_of(), os); + os << " *)" << this->GetVarID(l->buffer_var.get()) + << " + "; + this->PrintExpr(l->index, os); + os << ')'; + } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { + CHECK_EQ(op->args.size(), 3U); + os << GetStructRef( + op->type, op->args[0], op->args[1], + op->args[2].as()->value); + } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { + CHECK_EQ(op->args.size(), 1U); + os << "("; + this->PrintExpr(op->args[0], os); + os << " == NULL)"; + } else + os << op->name << "()"; +} + +void CodeAnalysOpenCLC::PrintVecBinaryOp( + const std::string& op, Type t, + Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) + if (isalpha(op[0])) { + os << op << "("; + this->PrintExpr(lhs, os); + os << ", "; + this->PrintExpr(rhs, os); + os << ")"; + } else { + os <<"("; + this->PrintExpr(lhs, os); + os << ' ' << op << ' '; + this->PrintExpr(rhs, os); + os << ")"; + } +} + +inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { + const Ramp* r = index.as(); + if (!r) return false; + if (!is_one(r->stride)) return false; + CHECK_EQ(r->lanes, lanes); + *base = r->base; + return true; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) + int lanes = op->type.lanes(); + // delcare type. + if (op->type.lanes() == 1) { + std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); + os << ref; + } else { + CHECK(is_one(op->predicate)) + << "predicated load is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { + std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); + os << ref; + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // load seperately. + std::string svalue = GetUniqueName("_"); + this->PrintIndent(); + this->PrintType(op->type, stream); + stream << ' ' << svalue << ";\n"; + std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string vid = GetVarID(op->buffer_var.get()); + Type elem_type = op->type.element_of(); + for (int i = 0; i < lanes; ++i) { + std::ostringstream value_temp; + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + value_temp << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, value_temp); + value_temp << ' '; + } + } + PrintType(elem_type, value_temp); + value_temp << "*)" << vid << ')'; + } else { + value_temp << vid; + } + value_temp << '['; + PrintVecElemLoad(sindex, op->index.type(), i, value_temp); + value_temp << ']'; + PrintVecElemStore(svalue, op->type, i, value_temp.str()); + } + os << svalue; + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { + Type t = op->value.type(); + if (t.lanes() == 1) { + std::string value = this->PrintExpr(op->value); + std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; + } else { + CHECK(is_one(op->predicate)) + << "Predicated store is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, t.lanes(), &base)) { + std::string value = this->PrintExpr(op->value); + this->PrintVecStore(op->buffer_var.get(), t, base, value); + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // store elements seperately + std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); + std::string vid = GetVarID(op->buffer_var.get()); + for (int i = 0; i < t.lanes(); ++i) { + this->PrintIndent(); + Type elem_type = t.element_of(); + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + stream << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, stream); + stream << ' '; + } + } + PrintType(elem_type, stream); + stream << "*)" << vid << ')'; + } else { + stream << vid; + } + stream << '['; + PrintVecElemLoad(index, op->index.type(), i, stream); + stream << "] = "; + PrintVecElemLoad(value, op->value.type(), i, stream); + stream << ";\n"; + } + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) + std::string value = PrintExpr(op->value); + CHECK(!var_idmap_.count(op->var.get())); + var_idmap_[op->var.get()] = value; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) + // constraint of current logic + CHECK_EQ(op->base.type(), Int(32)); + os << "((int" << op->lanes << ")("; + for (int i = 0; i < op->lanes; i++) { + os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; + if (i != op->lanes - 1) + os << ", "; + } + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Broadcast: not supported "; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->condition, os); + os << " ? "; + PrintExpr(op->true_value, os); + os << " : "; + PrintExpr(op->false_value, os); + os << ")"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " & (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. a' = SHR a for Idx_R bits + // 2. mask: 1.(length).1 + // (1 << (L - R + 1)) - 1 + // 3. a' & mask + + os << "(("; + PrintExpr(op->a, os); + os << " >> "; + PrintExpr(op->index_right, os); + os << ") & ((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " | (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. mask: 0.(Idx L).01..10.(Idx R).0 + // ((1 << (L - R + 1)) - 1) << R + // 2. a & mask + + os << "("; + PrintExpr(op->a, os); + os << " & (((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1) << "; + PrintExpr(op->index_right, os); + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Quantize is not yet support"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "KernelExpr is not yet support"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { + // TODO comaniac + //std::vector vec_var = GetNodesByType(op->value); + + std::string arg_vid = "unknown"; + std::string str = PrintExpr(op->value); + if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { + size_t pos_arg = str.find("arg"); + size_t pos_data = str.find("data"); + arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); + } + else if (std::regex_match(str, std::regex("arg(.+)"))) + arg_vid = str; + + std::string vid = AllocVarID(op->var.get()); + if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { + if ("unknown" != arg_vid) + LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; + } else { + Type type = op->var.type(); + if (op->var.type() == Handle() && + handle_data_type_.count(op->var.get())) + type = handle_data_type_.at(op->var.get()); + this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); + } + VisitStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { + CHECK(!is_zero(op->condition)); + std::string vid = AllocVarID(op->buffer_var.get()); + if (op->new_expr.defined()) { + // Prefer global static allocation for the program + CHECK_EQ(op->free_function, "nop"); + std::string new_data = PrintExpr(op->new_expr); + this->PrintIndent(); + PrintType(op->type, stream); + stream << "* "<< vid << '=' << new_data << ";\n"; + } else { + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); + stream << ' '; + PrintType(op->type, stream); + stream << ' '<< vid << '[' + << constant_size << "];\n"; + } + RegisterHandleType(op->buffer_var.get(), op->type); + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::thread_extent) { + IterVar iv(op->node.node_); + if (iv->thread_tag.length() != 0) { + if (!var_idmap_.count(iv->var.get())) { + BindThreadIndex(iv); + } + } + } else if (op->attr_key == ir::attr::storage_scope) { + const Variable* v = op->node.as(); + CHECK(v); + alloc_storage_scope_[v] = op->value.as()->value; + } else if (op->attr_key == ir::attr::volatile_scope) { + const Variable* v = op->node.as(); + CHECK(v); + volatile_buf_.insert(v); + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (op->message.as()) { + // GLOG style check + stream << "CHECK(" << cond << ") << \"" + << op->message.as()->value << "\";\n"; + } else { + stream << "assert(" << cond << ");\n"; + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const For* op) { + std::string extent = PrintExpr(op->extent); + PrintIndent(); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { + PrintStmt(op->first); + if (op->rest.defined()) PrintStmt(op->rest); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { + if (is_const(op->value)) return; + const Call* call = op->value.as(); + if (call) { + if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { + this->PrintStorageSync(call); return; + } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { + CHECK_EQ(call->args.size(), 4); + std::string value = PrintExpr(call->args[3]); + std::string ref = GetStructRef( + call->args[3].type(), + call->args[0], + call->args[1], + call->args[2].as()->value); + this->PrintIndent(); + this->stream << ref << " = " << value << ";\n"; + return; + } + } + std::string vid = this->PrintExpr(op->value); + this->PrintIndent(); + this->stream << "(void)" << vid << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { + PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { + LOG(FATAL) << "KernelDef is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { + LOG(FATAL) << "KernelStmt is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { + this->stream << "return "; + PrintExpr(op->value); + this->stream << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { + // TODO: Check if the break statement is used correctly + this->stream << "break;\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const While *op) { + std::string condition = PrintExpr(op->condition); + PrintIndent(); + stream << "while (" << condition << ") {\n"; + int while_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(while_scope); + PrintIndent(); + stream << "}\n"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h new file mode 100644 index 000000000..8aaeedb39 --- /dev/null +++ b/tvm/src/codegen/opencl/codeanalys_openclc.h @@ -0,0 +1,202 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +#ifndef TVM_CODEGEN_CODEANALYS_OPENCLC_H_ +#define TVM_CODEGEN_CODEANALYS_OPENCLC_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../codegen_source_base.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +template +using str2tupleMap = std::unordered_map>; + +/*! + * \brief A class to analyze the IR AST for MerlinC generation. + * + */ +class CodeAnalysOpenCLC : + public ExprFunctor, + public StmtFunctor, + public CodeGenSourceBase { + public: + /*! + * \brief Initialize the code generator. + * \param output_ssa Whether output SSA. + */ + void Init(); + /*! + * \brief Add the function to the generated module. + * \param f The function to be compiled. + */ + void AddFunction(LoweredFunc f); + /*! + * \brief Finalize the compilation and return the code. + * \return The code. + */ + str2tupleMap Finish(); + /*! + * \brief Print the Stmt n to CodeAnalysMerlinC->stream + * \param n The statement to be printed. + */ + void PrintStmt(const Stmt& n) { + VisitStmt(n); + } + /*! + * \brief Print the expression n(or its ssa id if in ssa mode) into os + * \param n The expression to be printed. + * \param os The output stream + */ + void PrintExpr(const Expr& n, std::ostream& os); + /*! + * \brief Same as PrintExpr, but simply returns result string + * \param n The expression to be printed. + */ + std::string PrintExpr(const Expr& n) { + std::ostringstream os; + PrintExpr(n, os); + return os.str(); + } + // The following parts are overloadable print operations. + /*! + * \brief Initialize codegen state for generating f. + * \param f The function to be compiled. + */ + virtual void InitFuncState(LoweredFunc f); + // expression + void VisitExpr_(const Variable* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Load* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Let* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Call* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Add* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Sub* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Mul* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Div* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Mod* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Min* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Max* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const EQ* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const NE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const LT* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const LE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GT* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const And* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Or* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Cast* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Not* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Select* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Ramp* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Broadcast* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const IntImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const UIntImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const FloatImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const StringImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GetBit* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GetSlice* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SetBit* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) + // statment + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const Store* op) override; + void VisitStmt_(const For* op) override; + void VisitStmt_(const IfThenElse* op) override; + void VisitStmt_(const Allocate* op) override; + void VisitStmt_(const AttrStmt* op) override; + void VisitStmt_(const AssertStmt* op) override; + void VisitStmt_(const Evaluate* op) override; + void VisitStmt_(const Block* op) override; + void VisitStmt_(const ProducerConsumer* op) override; + void VisitStmt_(const KernelDef* op) override; + void VisitStmt_(const KernelStmt* op) override; + void VisitStmt_(const Return* op) override; + void VisitStmt_(const Break* op) override; + void VisitStmt_(const While* op) override; + void VisitStmt_(const Partition* op) override; + /*! + * Print Type represetnation of type t. + * \param t The type representation. + * \param os The stream to print the ctype into + */ + void PrintType(Type t, std::ostream& os); // NOLINT(*) + std::string GetType(Type t); // NOLINT(*) + /*! + * \brief Print expr representing the thread tag + * \param IterVar iv The thread index to be binded; + */ + void BindThreadIndex(const IterVar& iv); // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os); // NOLINT(*) + void PrintStorageSync(const Call* op); // NOLINT(*) + // Binary vector op. + void PrintVecBinaryOp( + const std::string&op, Type op_type, + Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) + // print vector load + std::string GetVecLoad(Type t, const Variable* buffer, Expr base); + // print vector store + void PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value); // NOLINT(*) + // print load of single element + void PrintVecElemLoad( + const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) + // print store of single element. + void PrintVecElemStore( + const std::string& vec, Type t, int i, const std::string& value); + // Get a cast type from to + std::string CastFromTo(std::string value, Type from, Type target); + + protected: + // Print reference to struct location + std::string GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind); + // print reference to a buffer as type t in index. + virtual std::string GetBufferRef( + Type t, const Variable* buffer, Expr index); + /*! + * \brief If buffer is allocated as type t. + * \param buf_var The buffer variable. + * \param t The type to be checked. + */ + bool HandleTypeMatch(const Variable* buf_var, Type t) const; + /*! + * \brief Register the data type of buf_var + * \param buf_var The buffer variable. + * \param t The type to be checked. + */ + void RegisterHandleType(const Variable* buf_var, Type t); + // override + void PrintSSAAssign( + const std::string& target, const std::string& src, Type t) final; + /*! \brief restrict keyword */ + std::string restrict_keyword_{""}; + /*! \brief the storage scope of allocation */ + std::unordered_map alloc_storage_scope_; + /*! \brief the data type of allocated buffers */ + std::unordered_map handle_data_type_; + + private: + /*! \brief set of volatile buf access */ + std::unordered_set volatile_buf_; + /*! \brief map of function arguments to their types */ + str2tupleMap map_arg_type_; +}; + +} // namespace codegen +} // namespace TVM +#endif // TVM_CODEGEN_CODEGEN_C_H_ From 51a4f77fc81a8bbdd354a421dbce463b00a8a57c Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Tue, 27 Aug 2019 17:58:41 -0400 Subject: [PATCH 052/103] bug fixed --- tvm/src/codegen/build_opencl.cc | 44 --- tvm/src/codegen/codegen_opencl.cc | 206 ----------- tvm/src/codegen/codegen_opencl.h | 51 --- tvm/src/codegen/opencl/aocl/aocl_module.cc | 0 tvm/src/codegen/opencl/aocl/aocl_module.h | 0 tvm/src/codegen/opencl/build_opencl.cc | 121 ++++--- tvm/src/codegen/opencl/codeanalys_openclc.cc | 0 tvm/src/codegen/opencl/codeanalys_openclc.h | 0 tvm/src/codegen/opencl/codegen_aocl.cc | 60 ++-- tvm/src/codegen/opencl/codegen_opencl.h | 2 +- tvm/src/codegen/opencl/codegen_sdaccel.cc | 3 +- .../codegen/opencl/sdaccel/sdaccel_module.cc | 336 ++++++++++++++++++ .../codegen/opencl/sdaccel/sdaccel_module.h | 30 ++ 13 files changed, 467 insertions(+), 386 deletions(-) delete mode 100644 tvm/src/codegen/build_opencl.cc delete mode 100644 tvm/src/codegen/codegen_opencl.cc delete mode 100644 tvm/src/codegen/codegen_opencl.h create mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.cc create mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.h mode change 100644 => 100755 tvm/src/codegen/opencl/codeanalys_openclc.cc mode change 100644 => 100755 tvm/src/codegen/opencl/codeanalys_openclc.h create mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc create mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.h diff --git a/tvm/src/codegen/build_opencl.cc b/tvm/src/codegen/build_opencl.cc deleted file mode 100644 index 5054085cd..000000000 --- a/tvm/src/codegen/build_opencl.cc +++ /dev/null @@ -1,44 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * Build opencl modules from source. - * \file build_opencl.cc - */ -#include -#include -#include "./codegen_opencl.h" -#include "./build_common.h" - -#if TVM_OPENCL_RUNTIME -#include "../runtime/opencl/opencl_module.h" -#endif // TVM_OPENCL_RUNTIME - -namespace TVM { -namespace codegen { - -runtime::Module BuildOpenCL(Array funcs) { - using TVM::runtime::Registry; - bool output_ssa = false; - CodeGenOpenCL cg; - cg.Init(output_ssa); - for (LoweredFunc f : funcs) { - cg.AddFunction(f); - } - std::string code = cg.Finish(); - - if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { - code = (*f)(code).operator std::string(); - } -#if TVM_OPENCL_RUNTIME - return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs)); -#else - LOG(WARNING) << "OpenCL runtime not enabled, return a source module..."; - return DeviceSourceModuleCreate(code, "cl", ExtractFuncInfo(funcs), "opencl"); -#endif // TVM_OPENCL_RUNTIME -} - -TVM_REGISTER_API("codegen.build_opencl") -.set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = BuildOpenCL(args[0]); - }); -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/codegen_opencl.cc b/tvm/src/codegen/codegen_opencl.cc deleted file mode 100644 index d0297a1d9..000000000 --- a/tvm/src/codegen/codegen_opencl.cc +++ /dev/null @@ -1,206 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file codegen_opencl.cc - */ -#include -#include -#include -#include -#include "./codegen_opencl.h" -#include "../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -CodeGenOpenCL::CodeGenOpenCL() { - restrict_keyword_ = "restrict"; -} - -void CodeGenOpenCL::InitFuncState(LoweredFunc f) { - CodeGenC::InitFuncState(f); - for (Var arg : f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } -} - -void CodeGenOpenCL::AddFunction(LoweredFunc f) { - this->stream << "__kernel "; - CodeGenC::AddFunction(f); -} - -std::string CodeGenOpenCL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - -void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); - if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - os << "void*"; return; - } - bool fail = false; - if (t.is_float()) { - switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: os << "float"; break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << 'u'; - } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; - } - switch (t.bits()) { - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 1: os << "int"; break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } - LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; -} - -void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenOpenCL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenOpenCL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenOpenCL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - -void CodeGenOpenCL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - os << "__global"; - } else if (scope == "shared") { - os << "__local"; - } -} - -std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/codegen_opencl.h b/tvm/src/codegen/codegen_opencl.h deleted file mode 100644 index 088ab089a..000000000 --- a/tvm/src/codegen/codegen_opencl.h +++ /dev/null @@ -1,51 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file codegen_opencl.h - * \brief Generate OpenCL device code. - */ -#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ -#define TVM_CODEGEN_CODEGEN_OPENCL_H_ - -#include -#include -#include -#include "./codegen_c.h" - -namespace TVM { -namespace codegen { - -class CodeGenOpenCL final : public CodeGenC { - public: - CodeGenOpenCL(); - void AddFunction(LoweredFunc f); - std::string Finish(); - - // override print thread tag. - void InitFuncState(LoweredFunc f) final; - void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) final; // NOLINT(*) - void PrintStorageSync(const Call* op) final; // NOLINT(*) - void PrintType(Type t, std::ostream& os) final; // NOLINT(*) - std::string GetVecLoad(Type t, const Variable* buffer, - Expr base) final; - void PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) final; // NOLINT(*) - // the address of load/store - void PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os); // NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target); // NOLINT(*) - - // overload visitor - void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) - - private: - // whether enable fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; -}; - -} // namespace codegen -} // namespace TVM - -#endif // TVM_CODEGEN_CODEGEN_OPENCL_H_ diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.cc b/tvm/src/codegen/opencl/aocl/aocl_module.cc new file mode 100755 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.h b/tvm/src/codegen/opencl/aocl/aocl_module.h new file mode 100755 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index b964176c5..394b4dd3f 100755 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -1,58 +1,63 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -# include -# include -# include -# include -# include -# include "../../runtime/meta_data.h" -# include -# include "./codegen_sdaccel.h" -# include "./codegen_aocl.h" -# include "./codeanalys_openclc.h" -# include "../build_common.h" -// # include "./sdaccel/sdaccel_module.h" -// # include "./aocl/aocl_module.h" - - -namespace TVM { -namespace codegen { - - -template -std::string BuildOpenCL(Array funcs){ - using TVM::runtime::Registry; - CodeAnalysOpenCLC ca; - CodeGen cg; - for(LoweredFunc f: funcs){ - ca.AddFunction(f); - str2tupleMapmap_arg_type; - map_arg_type = ca.Finish(); - - cg.AddFunction(f, map_arg_type); - } - std::string code = cg.Finish(); - - if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { - code = (*f)(code).operator std::string(); - } - - LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; - return code; -} - - - -TVM_REGISTER_API("codegen.build_sdaccel") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); - }); - -TVM_REGISTER_API("codegen.build_aocl") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); - }); - +/* + Yang.Bai + yb269@cornell.edu +*/ + +# include +# include +# include +# include +# include +# include "../../runtime/meta_data.h" +# include +# include "./codegen_sdaccel.h" +# include "./codegen_aocl.h" +# include "./codeanalys_openclc.h" +# include "../build_common.h" +// # include "./sdaccel/sdaccel_module.h" +// # include "./aocl/aocl_module.h" + + +namespace TVM { +namespace codegen { + + + + + +template +std::string BuildOpenCL(Array funcs){ + using TVM::runtime::Registry; + CodeAnalysOpenCLC ca; + CodeGen cg; + for(LoweredFunc f: funcs){ + ca.AddFunction(f); + str2tupleMapmap_arg_type; + map_arg_type = ca.Finish(); + + cg.AddFunction(f, map_arg_type); + } + std::string code = cg.Finish(); + + if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { + code = (*f)(code).operator std::string(); + } + + LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; + return code; +} + + + + +TVM_REGISTER_API("codegen.build_sdaccel") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildOpenCL(args[0]); + }); + +TVM_REGISTER_API("codegen.build_aocl") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildOpenCL(args[0]); + }); +} +} diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc old mode 100644 new mode 100755 diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h old mode 100644 new mode 100755 diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index f3b302d33..4ae3015d9 100755 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -14,6 +14,7 @@ namespace TVM { namespace codegen { + void CodeGenAOCL::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Clear previous generated state @@ -68,39 +69,48 @@ void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) if (t.is_handle()) { os << "void*"; return; } - if (t.is_float()) { - if (t.bits() == 16) { - enable_fp16_ = true; - os << "half"; return; - } - if (t.bits() == 32) { - os << "float"; return; + + if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; } - if (t.bits() == 64) { - enable_fp64_ = true; - os << "double"; return; + else if ( t.is_int()) { + os << "ap_int<" << t.bits() << ">" << "intd_t"; } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; - // os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_int<" << t.bits() << ">" << "intd_t"; return; - // os << "int" << t.bits() << "_t"; return; - + else { + if (t.is_float()) { + if (t.bits() == 16) { + enable_fp16_ = true; + os << "half"; return; + } + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + enable_fp64_ = true; + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; + // os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_int<" << t.bits() << ">" << "intd_t"; return; + // os << "int" << t.bits() << "_t"; return; + } + } } } } } - void CodeGenAOCL::VisitStmt_(const For* op) { std::ostringstream os; if (op->for_type == ForType::Unrolled) { diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index feb84b9bf..01070d24a 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -13,7 +13,7 @@ namespace codegen{ class CodeGenOpenCL : public CodeGenC{ public: // void AddFunction(LoweredFunc f); - CodeGenOpenCL(); + CodeGenOpenCL(); virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; std::string Finish(); void InitFuncState(LoweredFunc f) override; diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 5470a10f7..26a3c8bb5 100755 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -42,7 +42,8 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, } else { auto arg = map_arg_type[vid]; - this->stream << "__global "; + // this->stream << "__global "; + this->stream << "__local "; // this->stream << "global "; PrintType(std::get<1>(arg), this->stream); if (v.type().is_handle()) diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc new file mode 100755 index 000000000..8a640e556 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc @@ -0,0 +1,336 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-30 15:15:28 + * @LastEditTime: 2019-08-14 16:16:03 + * @LastEditors: Please set LastEditors + */ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include "./sdaccel_module.h" +# include +# include +# include +# include +# include + +namespace TVM { +namespace runtime { + +namespace { + +void PrintIndent(std::ofstream& stream, int indent) { + for (int i = 0;i < indent; i++ ) { + stream << ' '; + } +} + +inline size_t GetTypeSize(TVMType t) { + size_t byte = (t.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + return byte; +} + +inline size_t GetDataSize(TVMArray* arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= arr->shape[i]; + } + size_t byte = (arr->dtype.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + size *= (byte * 8 * arr->dtype.lanes + 7) / 8; + return size; +} + +inline TVMType Type2TVMType(Type t) { + TVMType tt; + if (t.is_int()) tt.code = kDLInt; + else if (t.is_uint()) tt.code = kDLUInt; + else if (t.is_float()) tt.code = kDLFloat; + else LOG(FATAL) << "Unacceptable type: " << t; + tt.bits = static_cast(t.bits()); + tt.fracs = static_cast(t.fracs()); + return tt; +} + +inline std::string Type2Str(TVMType t) { + +} + +inline std::string Tpye2ExtStr(TVMType t) { + +} + + + + + +inline std::string Type2Byte(TVMType t) { + std::string str = ""; + if (t.code == kDLFloat) { + str += "float"; + } else if (t.code == kDLInt || t.code == kDLUInt) { + if (t.code == kDLUInt) str += "u"; + str += "int"; + if (t.bits <= 8) str += "8"; + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + str += "_t"; + } + return str; +} + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + arg_sizes.push_back(GetDataSize(arr)); + arg_types.push_back(arr->dtype); + } else { + const Variable* var = func->api_args[i].as(); + TVMType t = Type2TVMType(var->type); + arg_sizes.push_back(GetTypeSize(t)); + arg_types.push_back(t); + } + } +} + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} + +// copy values from the shared mem to local mem +void PrintCopy() + + + + +// copy values from local mem back to shared mem +void PrintCopyBack() + + + +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string test_file) { + int indent = 0; + std::ofstream stream; + stream.open("host.cpp"); + + // write the header files and macro commmands. + stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# pragram once\n"; + stream << "# define LENGTH (1024)\n"; + stream << "# define NUM_WORKGROUPS (1)\n"; + stream << "# define WORKGROUP_SIZE (16)\n"; + stream << test_file; + stream << "int main(void) { \n"; + indent += 2; + + + // get the platform and devices + stream << "#if define(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; + PrintIndent(stream, indent); + stream << "# define STR_VALUE(arg) #arg\n"; + PrintIndent(stream, indent); + stream << "# define GET_STRING(name) STR_VALUE(name)\n"; + PrintIndent(stream, indent); + stream << "# define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n" + stream << "#endif"; + + + // get the xclbin filename . + stream << "char * xclbinFilename = argv[1]\n"; + stream << "size_t \n"; + + // source memories + + + // create the test data and goldn data locally + + + + + // OpenCL HOST CODE AREA START + // get First Platform + stream << "std::vector platforms;\n"; + stream << "cl::Platform::get(&platforms)\n;"; + stream << "cl::Platform platform = platform[0];\n"; + stream << "std::cout << "" " + + // get accelerator devices and select 1st such device + + // create context and command queue for selected device + + + // load xcl binary into the buffer + + + // creat program from binary file + + // create kernel + + // create buffers inside device + + // copy input data to device buffer from host memory + + // run the kernel + + // copy device result data to host memory + // OpenCL HOST CODE AREA END + + + + // compare the results of the kernel to the simulation + + + + + for ( int i = 0;i < args.size(); i++ ) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << Type2Byte(arg_types)[i] << "*"; + PrintIndent(stream, indent); + + + } + } + + // call the function + PrintIndent(stream, indent); + stream << func->name << "("; + for (int i = 0;i < args.size();i++) { + if (i != args.size()-1) { + stream << ", "; + } + } + stream << ");\n"; + + // copy to shared mem + for (int i = 0;i < args.size();i++ ) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + PrintIndent(stream, indent); + } + } + stream << "}\n"; + stream.close(); +} +} // namespace + +class SDAccelModuleNode final : public ModuleNode { + public: + SDAccelModuleNode(LoweredFunc func, std::string test_file) + : func_(func), test_file_(test_file) {} + + const char* type_key() const { + return "sdaccel_sw_emu"; + } + + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final { + return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) + LOG(FATAL) << "The function should take in " << func_->args.size() + << " inputs but get " << args.size(); + std::vector arg_sizes; + std::vector arg_types; + std::vector shmids; + CollectArgInfo(args, func_, arg_sizes, arg_types); + GenSharedMem(args, shmids, arg_sizes); + GenHostCode(args, shmids, arg_types, func_, test_file_); + // TODO: find a better way to do the following + LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; + LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; + system("make -f sdaccel.mk run_cpu_em"); + // system("./out"); + LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; + system("make -f sdaccel.mk clean"); + FreeSharedMem(args, shmids, arg_sizes); + }); + } + + private: + LoweredFunc func_; + std::string test_file_; +}; + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code) { + + std::shared_ptr n = + std::make_shared(func, code); + + return Module(n); +} + + +} // namespace runtime +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h new file mode 100755 index 000000000..25db653b9 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h @@ -0,0 +1,30 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-30 15:15:15 + * @LastEditTime: 2019-07-30 15:15:15 + * @LastEditors: your name + */ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef SDACCEL_MODULE_H +#define SDACCEL_MODULE_H + +# include +# include +# include "../../build_common.h" + +namespace TVM { +namespace runtime { + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code); + +} // namespace runtime +} // namespace TVM + +#endif \ No newline at end of file From 6fe29d5981efd109a00d3fd75bbfc9655807594d Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Tue, 27 Aug 2019 18:04:28 -0400 Subject: [PATCH 053/103] [delete] all of the code about opencl --- tvm/src/codegen/opencl/aocl/aocl_module.cc | 0 tvm/src/codegen/opencl/aocl/aocl_module.h | 0 tvm/src/codegen/opencl/build_opencl.cc | 63 -- tvm/src/codegen/opencl/codeanalys_openclc.cc | 919 ------------------ tvm/src/codegen/opencl/codeanalys_openclc.h | 202 ---- tvm/src/codegen/opencl/codegen_aocl.cc | 156 --- tvm/src/codegen/opencl/codegen_aocl.h | 34 - tvm/src/codegen/opencl/codegen_opencl.cc | 246 ----- tvm/src/codegen/opencl/codegen_opencl.h | 53 - tvm/src/codegen/opencl/codegen_sdaccel.cc | 197 ---- tvm/src/codegen/opencl/codegen_sdaccel.h | 36 - .../codegen/opencl/sdaccel/sdaccel_module.cc | 336 ------- .../codegen/opencl/sdaccel/sdaccel_module.h | 30 - 13 files changed, 2272 deletions(-) delete mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.cc delete mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.h delete mode 100755 tvm/src/codegen/opencl/build_opencl.cc delete mode 100755 tvm/src/codegen/opencl/codeanalys_openclc.cc delete mode 100755 tvm/src/codegen/opencl/codeanalys_openclc.h delete mode 100755 tvm/src/codegen/opencl/codegen_aocl.cc delete mode 100755 tvm/src/codegen/opencl/codegen_aocl.h delete mode 100755 tvm/src/codegen/opencl/codegen_opencl.cc delete mode 100755 tvm/src/codegen/opencl/codegen_opencl.h delete mode 100755 tvm/src/codegen/opencl/codegen_sdaccel.cc delete mode 100755 tvm/src/codegen/opencl/codegen_sdaccel.h delete mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc delete mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.h diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.cc b/tvm/src/codegen/opencl/aocl/aocl_module.cc deleted file mode 100755 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.h b/tvm/src/codegen/opencl/aocl/aocl_module.h deleted file mode 100755 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc deleted file mode 100755 index 394b4dd3f..000000000 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -# include -# include -# include -# include -# include -# include "../../runtime/meta_data.h" -# include -# include "./codegen_sdaccel.h" -# include "./codegen_aocl.h" -# include "./codeanalys_openclc.h" -# include "../build_common.h" -// # include "./sdaccel/sdaccel_module.h" -// # include "./aocl/aocl_module.h" - - -namespace TVM { -namespace codegen { - - - - - -template -std::string BuildOpenCL(Array funcs){ - using TVM::runtime::Registry; - CodeAnalysOpenCLC ca; - CodeGen cg; - for(LoweredFunc f: funcs){ - ca.AddFunction(f); - str2tupleMapmap_arg_type; - map_arg_type = ca.Finish(); - - cg.AddFunction(f, map_arg_type); - } - std::string code = cg.Finish(); - - if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { - code = (*f)(code).operator std::string(); - } - - LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; - return code; -} - - - - -TVM_REGISTER_API("codegen.build_sdaccel") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); - }); - -TVM_REGISTER_API("codegen.build_aocl") -.set_body([]( TVMArgs args, TVMRetValue * rv ) { - * rv = BuildOpenCL(args[0]); - }); -} -} diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc deleted file mode 100755 index 030453a94..000000000 --- a/tvm/src/codegen/opencl/codeanalys_openclc.cc +++ /dev/null @@ -1,919 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file tvm/src/codegen/hlsc/codegen_hlsc.cc - */ -#include -#include -#include -#include "./codeanalys_openclc.h" -#include "../codegen_common.h" -#include "../../arithmetic/compute_expr.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -void CodeAnalysOpenCLC::Init() { - ; -} - -void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { - alloc_storage_scope_.clear(); - handle_data_type_.clear(); - map_arg_type_.clear(); - CodeGenSourceBase::ClearFuncState(); -} -void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { - // Clear previous generated state. - this->InitFuncState(f); - - // Add to alloc buffer type. - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Record the arguments for analyzing the type - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - this->map_arg_type_[vid]; - } - int func_scope = this->BeginScope(); - VisitStmt(f->body); - this->EndScope(func_scope); -} - -str2tupleMap CodeAnalysOpenCLC::Finish() { - return this->map_arg_type_; -} - -void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) - VisitExpr(n, os); -} - -void CodeAnalysOpenCLC::PrintSSAAssign( - const std::string& target, const std::string& src, Type t) { - PrintType(t, stream); - stream << ' ' << target << " = "; - if (src.length() > 3 && - src[0] == '(' && src[src.length() - 1] == ')') { - stream << src.substr(1, src.length() - 2); - } else { - stream << src; - } - stream << ";\n"; -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetBufferRef( - Type t, const Variable* buffer, Expr index) { - std::ostringstream os; - std::string vid = GetVarID(buffer); - std::string scope; - if (alloc_storage_scope_.count(buffer)) { - scope = alloc_storage_scope_.at(buffer); - } - bool is_vol = volatile_buf_.count(buffer) != 0; - if (t.lanes() == 1) { - if (!HandleTypeMatch(buffer, t) || is_vol) { - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)" << vid << ')'; - } else { - os << vid; - } - os << '['; - PrintExpr(index, os); - os << ']'; - } else { - // Buffer declared as vector type. - // optimize for case where it is in register, - if (HandleTypeMatch(buffer, t) && !is_vol) { - // optimize for constant access - int offset; - if (arith::GetConstInt(index, &offset)) { - CHECK_EQ(offset % t.lanes(), 0) - << "Find unaligned vector load to a vector type"; - os << vid << '[' << (offset / t.lanes()) << ']'; - return os.str(); - } - } - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)("; - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << vid << " + "; - PrintExpr(index, os); - os << "))[0]"; - } - return os.str(); -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind) { - if (kind < intrinsic::kArrKindBound_) { - std::ostringstream os; - os << "(((TVMArray*)"; - this->PrintExpr(buffer, os); - os << ")"; - if (kind == intrinsic::kArrAddr) { - os << " + "; - this->PrintExpr(index, os); - os << ")"; - return os.str(); - } - os << '['; - this->PrintExpr(index, os); - os << "]."; - // other case: get fields. - switch (kind) { - case intrinsic::kArrData: os << "data"; break; - case intrinsic::kArrShape: os << "shape"; break; - case intrinsic::kArrStrides: os << "strides"; break; - case intrinsic::kArrNDim: os << "ndim"; break; - case intrinsic::kArrTypeCode: os << "dtype.code"; break; - case intrinsic::kArrTypeBits: os << "dtype.bits"; break; - case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; - case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; - case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; - case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; - default: os << "unknown_field_code_" << kind; - } - os << ')'; - return os.str(); - } else { - CHECK_LT(kind, intrinsic::kTVMValueKindBound_); - std::ostringstream os; - os << "(((TVMValue*)"; - this->PrintExpr(buffer, os); - os << ")[" << index << "]."; - if (t.is_handle()) { - os << "v_handle"; - } else if (t.is_float()) { - os << "v_float64"; - } else if (t.is_int()) { - os << "v_int64"; - } else { - os << t; - } - os << ")"; - return os.str(); - } -} - - -bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) return false; - return it->second == t; -} - -void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) { - handle_data_type_[buf_var] = t; - } else { - CHECK(it->second == t) - << "conflicting buf var type"; - } -} - -void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, - Type t, int i, - std::ostream& os) { // NOLINT(*) - os << vec << ".s" << std::hex << i << std::dec; -} - -void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, - Type t, int i, - const std::string& value) { - this->PrintIndent(); - stream << vec << ".s" << std::hex << i - << " = " << value << ";\n" << std::dec; -} - -std::string CodeAnalysOpenCLC::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - return GetBufferRef(t, buffer, base); -} - -void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - std::string ref = GetBufferRef(t, buffer, base); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; -} - -std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - return os.str(); -} - -void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { - LOG(FATAL) << "not implemented"; -} - -void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) -} - -void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) - CHECK_EQ(scope, "global"); -} - -std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) - std::ostringstream os; - PrintType(t, os); - return os.str(); -} - -void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - if (t.is_float()) { - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "int" << t.bits() << "_t"; return; - } - } - } - os << t; -} - - -inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == Int(32)) { - std::ostringstream temp; - temp << op->value; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == UInt(32)) { - std::ostringstream temp; - temp << op->value << "U"; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - switch (op->type.bits()) { - case 64: case 32: { - std::ostringstream temp; - temp << std::scientific << op->value; - if (op->type.bits() == 32) temp << 'f'; - p->MarkConst(temp.str()); - os << temp.str(); - break; - } - case 16: { - os << '('; - p->PrintType(op->type, os); - os << ')' << std::scientific <value << 'f'; - break; - } - default: os << op << "\n"; - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) - os << "\"" << op->value << "\""; -} - -template -inline void PrintBinaryExpr(const T* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - if (isalpha(opstr[0])) { - os << opstr << '('; - p->PrintExpr(op->a, os); - os << ", "; - p->PrintExpr(op->b, os); - os << ')'; - } else { - os << '('; - p->PrintExpr(op->a, os); - os << ' ' << opstr << ' '; - p->PrintExpr(op->b, os); - os << ')'; - } - } else { - p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); - } -} - -inline void PrintBinaryIntrinsitc(const Call* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - CHECK_EQ(op->args.size(), 2U); - os << '('; - p->PrintExpr(op->args[0], os); - os << opstr; - p->PrintExpr(op->args[1], os); - os << ')'; - } else { - p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); - } -} -void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) - std::stringstream value; - this->PrintExpr(op->value, value); - os << CastFromTo(value.str(), op->value.type(), op->type); -} -void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) - os << GetVarID(op); -} -void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "+", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "-", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "*", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "/", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "%", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "min", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "max", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "==", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "!=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "&&", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "||", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) - os << '!'; - PrintExpr(op->a, os); -} - -void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) - if (op->call_type == Call::Extern || - op->call_type == Call::PureExtern) { - os << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - this->PrintExpr(op->args[i], os); - if (i < op->args.size() - 1) { - os << ", "; - } - } - os << ")"; - } else if (op->is_intrinsic(Call::bitwise_and)) { - PrintBinaryIntrinsitc(op, " & ", os, this); - } else if (op->is_intrinsic(Call::bitwise_xor)) { - PrintBinaryIntrinsitc(op, " ^ ", os, this); - } else if (op->is_intrinsic(Call::bitwise_or)) { - PrintBinaryIntrinsitc(op, " | ", os, this); - } else if (op->is_intrinsic(Call::bitwise_not)) { - CHECK_EQ(op->args.size(), 1U); - os << "(~"; - this->PrintExpr(op->args[0], os); - os << ')'; - } else if (op->is_intrinsic(Call::shift_left)) { - PrintBinaryIntrinsitc(op, " << ", os, this); - } else if (op->is_intrinsic(Call::shift_right)) { - PrintBinaryIntrinsitc(op, " >> ", os, this); - } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintExpr(op->args[0], os); - os << " ? "; - PrintExpr(op->args[1], os); - os << " : "; - PrintExpr(op->args[2], os); - os << ")"; - } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { - const Load *l = op->args[0].as(); - CHECK(op->args.size() == 1 && l); - os << "(("; - this->PrintType(l->type.element_of(), os); - os << " *)" << this->GetVarID(l->buffer_var.get()) - << " + "; - this->PrintExpr(l->index, os); - os << ')'; - } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { - CHECK_EQ(op->args.size(), 3U); - os << GetStructRef( - op->type, op->args[0], op->args[1], - op->args[2].as()->value); - } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { - CHECK_EQ(op->args.size(), 1U); - os << "("; - this->PrintExpr(op->args[0], os); - os << " == NULL)"; - } else - os << op->name << "()"; -} - -void CodeAnalysOpenCLC::PrintVecBinaryOp( - const std::string& op, Type t, - Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) - if (isalpha(op[0])) { - os << op << "("; - this->PrintExpr(lhs, os); - os << ", "; - this->PrintExpr(rhs, os); - os << ")"; - } else { - os <<"("; - this->PrintExpr(lhs, os); - os << ' ' << op << ' '; - this->PrintExpr(rhs, os); - os << ")"; - } -} - -inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { - const Ramp* r = index.as(); - if (!r) return false; - if (!is_one(r->stride)) return false; - CHECK_EQ(r->lanes, lanes); - *base = r->base; - return true; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) - int lanes = op->type.lanes(); - // delcare type. - if (op->type.lanes() == 1) { - std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); - os << ref; - } else { - CHECK(is_one(op->predicate)) - << "predicated load is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { - std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); - os << ref; - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // load seperately. - std::string svalue = GetUniqueName("_"); - this->PrintIndent(); - this->PrintType(op->type, stream); - stream << ' ' << svalue << ";\n"; - std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string vid = GetVarID(op->buffer_var.get()); - Type elem_type = op->type.element_of(); - for (int i = 0; i < lanes; ++i) { - std::ostringstream value_temp; - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - value_temp << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, value_temp); - value_temp << ' '; - } - } - PrintType(elem_type, value_temp); - value_temp << "*)" << vid << ')'; - } else { - value_temp << vid; - } - value_temp << '['; - PrintVecElemLoad(sindex, op->index.type(), i, value_temp); - value_temp << ']'; - PrintVecElemStore(svalue, op->type, i, value_temp.str()); - } - os << svalue; - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { - Type t = op->value.type(); - if (t.lanes() == 1) { - std::string value = this->PrintExpr(op->value); - std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; - } else { - CHECK(is_one(op->predicate)) - << "Predicated store is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, t.lanes(), &base)) { - std::string value = this->PrintExpr(op->value); - this->PrintVecStore(op->buffer_var.get(), t, base, value); - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // store elements seperately - std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); - std::string vid = GetVarID(op->buffer_var.get()); - for (int i = 0; i < t.lanes(); ++i) { - this->PrintIndent(); - Type elem_type = t.element_of(); - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - stream << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, stream); - stream << ' '; - } - } - PrintType(elem_type, stream); - stream << "*)" << vid << ')'; - } else { - stream << vid; - } - stream << '['; - PrintVecElemLoad(index, op->index.type(), i, stream); - stream << "] = "; - PrintVecElemLoad(value, op->value.type(), i, stream); - stream << ";\n"; - } - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) - std::string value = PrintExpr(op->value); - CHECK(!var_idmap_.count(op->var.get())); - var_idmap_[op->var.get()] = value; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) - // constraint of current logic - CHECK_EQ(op->base.type(), Int(32)); - os << "((int" << op->lanes << ")("; - for (int i = 0; i < op->lanes; i++) { - os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; - if (i != op->lanes - 1) - os << ", "; - } - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Broadcast: not supported "; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->condition, os); - os << " ? "; - PrintExpr(op->true_value, os); - os << " : "; - PrintExpr(op->false_value, os); - os << ")"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " & (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. a' = SHR a for Idx_R bits - // 2. mask: 1.(length).1 - // (1 << (L - R + 1)) - 1 - // 3. a' & mask - - os << "(("; - PrintExpr(op->a, os); - os << " >> "; - PrintExpr(op->index_right, os); - os << ") & ((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " | (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. mask: 0.(Idx L).01..10.(Idx R).0 - // ((1 << (L - R + 1)) - 1) << R - // 2. a & mask - - os << "("; - PrintExpr(op->a, os); - os << " & (((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1) << "; - PrintExpr(op->index_right, os); - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "KernelExpr is not yet support"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { - // TODO comaniac - //std::vector vec_var = GetNodesByType(op->value); - - std::string arg_vid = "unknown"; - std::string str = PrintExpr(op->value); - if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { - size_t pos_arg = str.find("arg"); - size_t pos_data = str.find("data"); - arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); - } - else if (std::regex_match(str, std::regex("arg(.+)"))) - arg_vid = str; - - std::string vid = AllocVarID(op->var.get()); - if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { - if ("unknown" != arg_vid) - LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; - } else { - Type type = op->var.type(); - if (op->var.type() == Handle() && - handle_data_type_.count(op->var.get())) - type = handle_data_type_.at(op->var.get()); - this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); - } - VisitStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { - CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); - if (op->new_expr.defined()) { - // Prefer global static allocation for the program - CHECK_EQ(op->free_function, "nop"); - std::string new_data = PrintExpr(op->new_expr); - this->PrintIndent(); - PrintType(op->type, stream); - stream << "* "<< vid << '=' << new_data << ";\n"; - } else { - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - stream << ' '; - PrintType(op->type, stream); - stream << ' '<< vid << '[' - << constant_size << "];\n"; - } - RegisterHandleType(op->buffer_var.get(), op->type); - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { - if (op->attr_key == ir::attr::thread_extent) { - IterVar iv(op->node.node_); - if (iv->thread_tag.length() != 0) { - if (!var_idmap_.count(iv->var.get())) { - BindThreadIndex(iv); - } - } - } else if (op->attr_key == ir::attr::storage_scope) { - const Variable* v = op->node.as(); - CHECK(v); - alloc_storage_scope_[v] = op->value.as()->value; - } else if (op->attr_key == ir::attr::volatile_scope) { - const Variable* v = op->node.as(); - CHECK(v); - volatile_buf_.insert(v); - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (op->message.as()) { - // GLOG style check - stream << "CHECK(" << cond << ") << \"" - << op->message.as()->value << "\";\n"; - } else { - stream << "assert(" << cond << ");\n"; - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const For* op) { - std::string extent = PrintExpr(op->extent); - PrintIndent(); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { - PrintStmt(op->first); - if (op->rest.defined()) PrintStmt(op->rest); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { - if (is_const(op->value)) return; - const Call* call = op->value.as(); - if (call) { - if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { - this->PrintStorageSync(call); return; - } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { - CHECK_EQ(call->args.size(), 4); - std::string value = PrintExpr(call->args[3]); - std::string ref = GetStructRef( - call->args[3].type(), - call->args[0], - call->args[1], - call->args[2].as()->value); - this->PrintIndent(); - this->stream << ref << " = " << value << ";\n"; - return; - } - } - std::string vid = this->PrintExpr(op->value); - this->PrintIndent(); - this->stream << "(void)" << vid << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { - PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { - LOG(FATAL) << "KernelDef is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { - LOG(FATAL) << "KernelStmt is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { - this->stream << "return "; - PrintExpr(op->value); - this->stream << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { - // TODO: Check if the break statement is used correctly - this->stream << "break;\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const While *op) { - std::string condition = PrintExpr(op->condition); - PrintIndent(); - stream << "while (" << condition << ") {\n"; - int while_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(while_scope); - PrintIndent(); - stream << "}\n"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h deleted file mode 100755 index 8aaeedb39..000000000 --- a/tvm/src/codegen/opencl/codeanalys_openclc.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -#ifndef TVM_CODEGEN_CODEANALYS_OPENCLC_H_ -#define TVM_CODEGEN_CODEANALYS_OPENCLC_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../codegen_source_base.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -template -using str2tupleMap = std::unordered_map>; - -/*! - * \brief A class to analyze the IR AST for MerlinC generation. - * - */ -class CodeAnalysOpenCLC : - public ExprFunctor, - public StmtFunctor, - public CodeGenSourceBase { - public: - /*! - * \brief Initialize the code generator. - * \param output_ssa Whether output SSA. - */ - void Init(); - /*! - * \brief Add the function to the generated module. - * \param f The function to be compiled. - */ - void AddFunction(LoweredFunc f); - /*! - * \brief Finalize the compilation and return the code. - * \return The code. - */ - str2tupleMap Finish(); - /*! - * \brief Print the Stmt n to CodeAnalysMerlinC->stream - * \param n The statement to be printed. - */ - void PrintStmt(const Stmt& n) { - VisitStmt(n); - } - /*! - * \brief Print the expression n(or its ssa id if in ssa mode) into os - * \param n The expression to be printed. - * \param os The output stream - */ - void PrintExpr(const Expr& n, std::ostream& os); - /*! - * \brief Same as PrintExpr, but simply returns result string - * \param n The expression to be printed. - */ - std::string PrintExpr(const Expr& n) { - std::ostringstream os; - PrintExpr(n, os); - return os.str(); - } - // The following parts are overloadable print operations. - /*! - * \brief Initialize codegen state for generating f. - * \param f The function to be compiled. - */ - virtual void InitFuncState(LoweredFunc f); - // expression - void VisitExpr_(const Variable* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Load* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Let* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Call* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Add* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Sub* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Mul* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Div* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Mod* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Min* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Max* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const EQ* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const NE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const LT* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const LE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GT* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const And* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Or* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Cast* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Not* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Select* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Ramp* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Broadcast* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const IntImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const UIntImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const FloatImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const StringImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GetBit* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GetSlice* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const SetBit* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) - // statment - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const Store* op) override; - void VisitStmt_(const For* op) override; - void VisitStmt_(const IfThenElse* op) override; - void VisitStmt_(const Allocate* op) override; - void VisitStmt_(const AttrStmt* op) override; - void VisitStmt_(const AssertStmt* op) override; - void VisitStmt_(const Evaluate* op) override; - void VisitStmt_(const Block* op) override; - void VisitStmt_(const ProducerConsumer* op) override; - void VisitStmt_(const KernelDef* op) override; - void VisitStmt_(const KernelStmt* op) override; - void VisitStmt_(const Return* op) override; - void VisitStmt_(const Break* op) override; - void VisitStmt_(const While* op) override; - void VisitStmt_(const Partition* op) override; - /*! - * Print Type represetnation of type t. - * \param t The type representation. - * \param os The stream to print the ctype into - */ - void PrintType(Type t, std::ostream& os); // NOLINT(*) - std::string GetType(Type t); // NOLINT(*) - /*! - * \brief Print expr representing the thread tag - * \param IterVar iv The thread index to be binded; - */ - void BindThreadIndex(const IterVar& iv); // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os); // NOLINT(*) - void PrintStorageSync(const Call* op); // NOLINT(*) - // Binary vector op. - void PrintVecBinaryOp( - const std::string&op, Type op_type, - Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) - // print vector load - std::string GetVecLoad(Type t, const Variable* buffer, Expr base); - // print vector store - void PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value); // NOLINT(*) - // print load of single element - void PrintVecElemLoad( - const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) - // print store of single element. - void PrintVecElemStore( - const std::string& vec, Type t, int i, const std::string& value); - // Get a cast type from to - std::string CastFromTo(std::string value, Type from, Type target); - - protected: - // Print reference to struct location - std::string GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind); - // print reference to a buffer as type t in index. - virtual std::string GetBufferRef( - Type t, const Variable* buffer, Expr index); - /*! - * \brief If buffer is allocated as type t. - * \param buf_var The buffer variable. - * \param t The type to be checked. - */ - bool HandleTypeMatch(const Variable* buf_var, Type t) const; - /*! - * \brief Register the data type of buf_var - * \param buf_var The buffer variable. - * \param t The type to be checked. - */ - void RegisterHandleType(const Variable* buf_var, Type t); - // override - void PrintSSAAssign( - const std::string& target, const std::string& src, Type t) final; - /*! \brief restrict keyword */ - std::string restrict_keyword_{""}; - /*! \brief the storage scope of allocation */ - std::unordered_map alloc_storage_scope_; - /*! \brief the data type of allocated buffers */ - std::unordered_map handle_data_type_; - - private: - /*! \brief set of volatile buf access */ - std::unordered_set volatile_buf_; - /*! \brief map of function arguments to their types */ - str2tupleMap map_arg_type_; -}; - -} // namespace codegen -} // namespace TVM -#endif // TVM_CODEGEN_CODEGEN_C_H_ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc deleted file mode 100755 index 4ae3015d9..000000000 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_aocl.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - - -void CodeGenAOCL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - - this->stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" << "\n"; - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - - - -void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - - if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; - } - else if ( t.is_int()) { - os << "ap_int<" << t.bits() << ">" << "intd_t"; - } - else { - if (t.is_float()) { - if (t.bits() == 16) { - enable_fp16_ = true; - os << "half"; return; - } - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - enable_fp64_ = true; - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; - // os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_int<" << t.bits() << ">" << "intd_t"; return; - // os << "int" << t.bits() << "_t"; return; - } - } - } - } - } -} - - -void CodeGenAOCL::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - os << "#pragma unroll"; - if (unroll_factor > 0) os << " " << unroll_factor << "\n"; - else os << "\n"; - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "#pragma"; - os << " ii " << II << "\n"; - } - CodeGenAOCL::GenForStmt(op, os.str(), true); -} - - - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h deleted file mode 100755 index f3e3a0c75..000000000 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ -#define TVM_CODEGEN_CODEGEN_AOCL_H_ - -# include -# include -# include -# include "./codeanalys_openclc.h" -# include "./codegen_opencl.h" - - -namespace TVM { -namespace codegen { - - -class CodeGenAOCL : public CodeGenOpenCL { - public: - CodeGenAOCL(); - // void AddFunction(LoweredFunc f); - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - void VisitStmt_(const For* op) override; - -}; -} // namespace codegen -} // namespace TVM - -#endif // TVM_CODEGEN_CODEGEN_AOCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc deleted file mode 100755 index 6f2a43d39..000000000 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ /dev/null @@ -1,246 +0,0 @@ - -# include -# include -# include -# include -# include -# include -# include "./codegen_opencl.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM{ -namespace codegen{ - -CodeGenOpenCL::CodeGenOpenCL(){ - restrict_keyword_ = "restrict"; -} - -std::string CodeGenOpenCL::Finish() { - // inject extension enable pragma for fp16 and fp64 - if (enable_fp16_) { - decl_stream - << "#ifdef cl_khr_fp16\n" - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "#elif defined(cl_amd_fp16)\n" - "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" - "#else\n" - "#error \"Half precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - if (enable_fp64_) { - decl_stream - << "#ifdef cl_khr_fp64\n" - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "#elif defined(cl_amd_fp64)\n" - "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" - "#else\n" - "#error \"Double precision floating point not supported" - "by OpenCL implementation on your device.\" \n" - "#endif\n\n"; - } - - return CodeGenC::Finish(); -} - -void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { - CHECK(!var_idmap_.count(iv->var.get())); - runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); - std::ostringstream os; - if (ts.rank == 1) { - os << "get_local_id(" << ts.dim_index << ")"; - } else { - os << "get_group_id(" << ts.dim_index << ")"; - } - var_idmap_[iv->var.get()] = - CastFromTo(os.str(), UInt(64), iv->var.type()); -} - - - - -void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, - Expr base, std::ostream& os) { // NOLINT(*) - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - auto it = alloc_storage_scope_.find(buffer); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << GetVarID(buffer) << " + "; - PrintExpr(base, os); -} -std::string CodeGenOpenCL::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - std::ostringstream os; - os << "vload" << t.lanes() << "(0, "; - PrintVecAddr(buffer, t, base, os); - os << ")"; - return os.str(); -} - -void CodeGenOpenCL::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - this->PrintIndent(); - stream << "vstore" << t.lanes() << "(" << value << ", 0, "; - PrintVecAddr(buffer, t, base, stream); - stream << ");\n"; -} - -void CodeGenOpenCL::PrintStorageSync(const Call* op) { - const std::string& sync = op->args[0].as()->value; - if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; - } else if (sync == "shared") { - this->PrintIndent(); - this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; - } else if (sync == "global") { - LOG(FATAL) << "not supported"; - } -} - - - -void CodeGenOpenCL::PrintStorageScope( - const std::string& scope, std::ostream& os) { // NOLINT(*) - if (scope == "global") { - os << "global "; - } else if (scope == "shared") { - os << "local "; - } -} - - -std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - if (target.lanes() == 1) { - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - } else { // convert vector type - os << "("; - os << "convert_"; - this->PrintType(target, os); - os << "(" << value << "))"; - } - return os.str(); -} - -void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - std::string v = PrintExpr(op->value); - os << "(("; - PrintType(op->type, os); - os << ")("; - for (int i = 0; i < op->lanes; ++i) { - if (i != 0) os << ", "; - os << v; - } - os << "))"; -} - -void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) - if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintType(op->args[2].type(), os); - os << ")"; - } - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); -} - - -void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) - if (std::isinf(op->value)) { - if ( op->value < 0) { - os << "-"; - } - os << "INFINITY"; - } else if (std::isnan(op->value)) { - os << "NAN"; - } else { - CodeGenC::VisitExpr_(op, os); - } -} - -void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) - os << "("; - PrintType(op->true_value.type(), os); - os << ")"; - CodeGenC::VisitExpr_(op, os); -} - -void CodeGenOpenCL::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - // Skip the buffer data checking - if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) - return ; - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { - std::string extent = PrintExpr(op->extent); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - if (before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - PrintIndent(); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - if (!before && pragma.length() > 0) { - PrintIndent(); - stream << pragma; - } - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h deleted file mode 100755 index 01070d24a..000000000 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ -#define TVM_CODEGEN_CODEGEN_OPENCL_H_ - -# include -# include -# include -# include "./codeanalys_openclc.h" -# include "../codegen_c.h" - -namespace TVM{ -namespace codegen{ - -class CodeGenOpenCL : public CodeGenC{ - public: - // void AddFunction(LoweredFunc f); - CodeGenOpenCL(); - virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; - std::string Finish(); - void InitFuncState(LoweredFunc f) override; - void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void PrintStorageSync(const Call* op) override; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - virtual void PrintType(Type t, std::ostream& os) = 0; //NOLINT - std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) override; // NOLINT(*) - void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) override; //NOLINT(*) - void PrintVecAddr(const Variable * buffer, Type t, - Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - - //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) - void VisitStmt_(const LetStmt* op) override; // NOLINT - void GenForStmt(const For* op, std::string pragma, bool before); - virtual void VisitStmt_(const For* op) = 0; - - protected: - // whether enable fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; -}; - - -} // namespace codegen -} // namespace TVM - -#endif \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc deleted file mode 100755 index 26a3c8bb5..000000000 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ /dev/null @@ -1,197 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_sdaccel.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - -void CodeGenSDACCEL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - // this->stream << "__global "; - this->stream << "__local "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - - -void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - int lanes = t.lanes(); - if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - os << "void*"; return; - } - if ( t== Bool() ) { - os << "bool"; return; - } - bool fail = false; - if (t.is_float()) { - switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: - os << "float"; - break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } else if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << 'u'; - } - if (t.bits() == 8 && t.lanes() == 4) { - // directly 4 8 bit int in integer. - os << "int"; return; - } - switch (t.bits()) { - case 8: os << "char"; break; - case 16: os << "short"; break; - case 32: os << "int"; break; - case 64: os << "long"; break; - case 1: os << "int"; break; - default: fail = true; break; - } - if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } - } - LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; -} - - - - -void CodeGenSDACCEL::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - os << "__attribute__((opencl_unroll_hint("; - if (unroll_factor > 0) os << unroll_factor << ")))\n"; - else - os << "\n"; - - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "__attribute__((xcl_pipeline_loop("; - os << II << ")))\n"; - } - CodeGenSDACCEL::GenForStmt(op, os.str(), true); -} - - - -void CodeGenSDACCEL::VisitStmt_(const Partition* op) { - std::string vid = GetVarID(op->buffer_var.get()); - stream << vid << " "; - if (op->partition_type != PartitionType::Complete) { - stream << "__attribute__((xcl_array_partition("; - switch (op->partition_type) { - // case PartitionType::Complete: - // stream << "complete,"; - // break; - case PartitionType::Block: - stream << "block,"; - break; - case PartitionType::Cyclic: - stream << "cyclic,"; - break; - } - stream << op->factor << ","; - stream << op->dim << ")))\n"; - }else { - if (op->dim == 0) { - stream << "__attribute__((xcl_array_partition))\n"; - } else { - stream << "__attribute__((xcl_array_partition("; - stream << "complete,"; - stream << op->factor << ","; - stream << op->dim << ")))\n"; - } - } -} - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h deleted file mode 100755 index 5bf156db4..000000000 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ -#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ - -# include -# include -# include -# include "./codeanalys_openclc.h" -# include "./codegen_opencl.h" - -namespace TVM { -namespace codegen { - - -class CodeGenSDACCEL : public CodeGenOpenCL { - public: - CodeGenSDACCEL(); - // void AddFunction(LoweredFunc f); - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - void VisitStmt_(const For* op) override; - void VisitStmt_(const Partition* op) override; - - - -}; -} // namespace codegen -} // namespace TVM - -#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc deleted file mode 100755 index 8a640e556..000000000 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc +++ /dev/null @@ -1,336 +0,0 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-30 15:15:28 - * @LastEditTime: 2019-08-14 16:16:03 - * @LastEditors: Please set LastEditors - */ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include "./sdaccel_module.h" -# include -# include -# include -# include -# include - -namespace TVM { -namespace runtime { - -namespace { - -void PrintIndent(std::ofstream& stream, int indent) { - for (int i = 0;i < indent; i++ ) { - stream << ' '; - } -} - -inline size_t GetTypeSize(TVMType t) { - size_t byte = (t.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - return byte; -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size_t byte = (arr->dtype.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - size *= (byte * 8 * arr->dtype.lanes + 7) / 8; - return size; -} - -inline TVMType Type2TVMType(Type t) { - TVMType tt; - if (t.is_int()) tt.code = kDLInt; - else if (t.is_uint()) tt.code = kDLUInt; - else if (t.is_float()) tt.code = kDLFloat; - else LOG(FATAL) << "Unacceptable type: " << t; - tt.bits = static_cast(t.bits()); - tt.fracs = static_cast(t.fracs()); - return tt; -} - -inline std::string Type2Str(TVMType t) { - -} - -inline std::string Tpye2ExtStr(TVMType t) { - -} - - - - - -inline std::string Type2Byte(TVMType t) { - std::string str = ""; - if (t.code == kDLFloat) { - str += "float"; - } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "u"; - str += "int"; - if (t.bits <= 8) str += "8"; - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - str += "_t"; - } - return str; -} - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - arg_sizes.push_back(GetDataSize(arr)); - arg_types.push_back(arr->dtype); - } else { - const Variable* var = func->api_args[i].as(); - TVMType t = Type2TVMType(var->type); - arg_sizes.push_back(GetTypeSize(t)); - arg_types.push_back(t); - } - } -} - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } - } -} - -// copy values from the shared mem to local mem -void PrintCopy() - - - - -// copy values from local mem back to shared mem -void PrintCopyBack() - - - -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string test_file) { - int indent = 0; - std::ofstream stream; - stream.open("host.cpp"); - - // write the header files and macro commmands. - stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# pragram once\n"; - stream << "# define LENGTH (1024)\n"; - stream << "# define NUM_WORKGROUPS (1)\n"; - stream << "# define WORKGROUP_SIZE (16)\n"; - stream << test_file; - stream << "int main(void) { \n"; - indent += 2; - - - // get the platform and devices - stream << "#if define(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; - PrintIndent(stream, indent); - stream << "# define STR_VALUE(arg) #arg\n"; - PrintIndent(stream, indent); - stream << "# define GET_STRING(name) STR_VALUE(name)\n"; - PrintIndent(stream, indent); - stream << "# define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n" - stream << "#endif"; - - - // get the xclbin filename . - stream << "char * xclbinFilename = argv[1]\n"; - stream << "size_t \n"; - - // source memories - - - // create the test data and goldn data locally - - - - - // OpenCL HOST CODE AREA START - // get First Platform - stream << "std::vector platforms;\n"; - stream << "cl::Platform::get(&platforms)\n;"; - stream << "cl::Platform platform = platform[0];\n"; - stream << "std::cout << "" " - - // get accelerator devices and select 1st such device - - // create context and command queue for selected device - - - // load xcl binary into the buffer - - - // creat program from binary file - - // create kernel - - // create buffers inside device - - // copy input data to device buffer from host memory - - // run the kernel - - // copy device result data to host memory - // OpenCL HOST CODE AREA END - - - - // compare the results of the kernel to the simulation - - - - - for ( int i = 0;i < args.size(); i++ ) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << Type2Byte(arg_types)[i] << "*"; - PrintIndent(stream, indent); - - - } - } - - // call the function - PrintIndent(stream, indent); - stream << func->name << "("; - for (int i = 0;i < args.size();i++) { - if (i != args.size()-1) { - stream << ", "; - } - } - stream << ");\n"; - - // copy to shared mem - for (int i = 0;i < args.size();i++ ) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - PrintIndent(stream, indent); - } - } - stream << "}\n"; - stream.close(); -} -} // namespace - -class SDAccelModuleNode final : public ModuleNode { - public: - SDAccelModuleNode(LoweredFunc func, std::string test_file) - : func_(func), test_file_(test_file) {} - - const char* type_key() const { - return "sdaccel_sw_emu"; - } - - PackedFunc GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self) final { - return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ - if (args.size() != (int)func_->args.size()) - LOG(FATAL) << "The function should take in " << func_->args.size() - << " inputs but get " << args.size(); - std::vector arg_sizes; - std::vector arg_types; - std::vector shmids; - CollectArgInfo(args, func_, arg_sizes, arg_types); - GenSharedMem(args, shmids, arg_sizes); - GenHostCode(args, shmids, arg_types, func_, test_file_); - // TODO: find a better way to do the following - LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; - LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; - system("make -f sdaccel.mk run_cpu_em"); - // system("./out"); - LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; - system("make -f sdaccel.mk clean"); - FreeSharedMem(args, shmids, arg_sizes); - }); - } - - private: - LoweredFunc func_; - std::string test_file_; -}; - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code) { - - std::shared_ptr n = - std::make_shared(func, code); - - return Module(n); -} - - -} // namespace runtime -} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h deleted file mode 100755 index 25db653b9..000000000 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-30 15:15:15 - * @LastEditTime: 2019-07-30 15:15:15 - * @LastEditors: your name - */ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef SDACCEL_MODULE_H -#define SDACCEL_MODULE_H - -# include -# include -# include "../../build_common.h" - -namespace TVM { -namespace runtime { - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code); - -} // namespace runtime -} // namespace TVM - -#endif \ No newline at end of file From a0611c6d9f4656ad6d9cc09a2afe4ceb97e11145 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Tue, 27 Aug 2019 18:07:29 -0400 Subject: [PATCH 054/103] [ADD] new opencl back-end including xilinx & intel --- tvm/src/codegen/opencl/aocl/aocl_module.cc | 0 tvm/src/codegen/opencl/aocl/aocl_module.h | 0 tvm/src/codegen/opencl/build_opencl.cc | 65 ++ tvm/src/codegen/opencl/codeanalys_openclc.cc | 919 ++++++++++++++++++ tvm/src/codegen/opencl/codeanalys_openclc.h | 202 ++++ tvm/src/codegen/opencl/codegen_aocl.cc | 156 +++ tvm/src/codegen/opencl/codegen_aocl.h | 34 + tvm/src/codegen/opencl/codegen_opencl.cc | 256 +++++ tvm/src/codegen/opencl/codegen_opencl.h | 53 + tvm/src/codegen/opencl/codegen_sdaccel.cc | 196 ++++ tvm/src/codegen/opencl/codegen_sdaccel.h | 36 + .../codegen/opencl/sdaccel/sdaccel_module.cc | 336 +++++++ .../codegen/opencl/sdaccel/sdaccel_module.h | 30 + 13 files changed, 2283 insertions(+) create mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.cc create mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.h create mode 100755 tvm/src/codegen/opencl/build_opencl.cc create mode 100755 tvm/src/codegen/opencl/codeanalys_openclc.cc create mode 100755 tvm/src/codegen/opencl/codeanalys_openclc.h create mode 100755 tvm/src/codegen/opencl/codegen_aocl.cc create mode 100755 tvm/src/codegen/opencl/codegen_aocl.h create mode 100755 tvm/src/codegen/opencl/codegen_opencl.cc create mode 100755 tvm/src/codegen/opencl/codegen_opencl.h create mode 100755 tvm/src/codegen/opencl/codegen_sdaccel.cc create mode 100755 tvm/src/codegen/opencl/codegen_sdaccel.h create mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc create mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.h diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.cc b/tvm/src/codegen/opencl/aocl/aocl_module.cc new file mode 100755 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.h b/tvm/src/codegen/opencl/aocl/aocl_module.h new file mode 100755 index 000000000..e69de29bb diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc new file mode 100755 index 000000000..251878db5 --- /dev/null +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -0,0 +1,65 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ + +# include +# include +# include +# include +# include +# include "../../runtime/meta_data.h" +# include +# include "./codegen_sdaccel.h" +# include "./codegen_aocl.h" +# include "./codeanalys_openclc.h" +# include "../build_common.h" +// # include "./sdaccel/sdaccel_module.h" +// # include "./aocl/aocl_module.h" + + + + +namespace TVM { +namespace codegen { + + + + + +template +std::string BuildOpenCL(Array funcs){ + using TVM::runtime::Registry; + CodeAnalysOpenCLC ca; + CodeGen cg; + for(LoweredFunc f: funcs){ + ca.AddFunction(f); + str2tupleMapmap_arg_type; + map_arg_type = ca.Finish(); + + cg.AddFunction(f, map_arg_type); + } + std::string code = cg.Finish(); + + if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { + code = (*f)(code).operator std::string(); + } + + LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; + return code; +} + + + + +TVM_REGISTER_API("codegen.build_sdaccel") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildOpenCL(args[0]); + }); + +TVM_REGISTER_API("codegen.build_aocl") +.set_body([]( TVMArgs args, TVMRetValue * rv ) { + * rv = BuildOpenCL(args[0]); + }); +} +} diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc new file mode 100755 index 000000000..030453a94 --- /dev/null +++ b/tvm/src/codegen/opencl/codeanalys_openclc.cc @@ -0,0 +1,919 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file tvm/src/codegen/hlsc/codegen_hlsc.cc + */ +#include +#include +#include +#include "./codeanalys_openclc.h" +#include "../codegen_common.h" +#include "../../arithmetic/compute_expr.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +void CodeAnalysOpenCLC::Init() { + ; +} + +void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { + alloc_storage_scope_.clear(); + handle_data_type_.clear(); + map_arg_type_.clear(); + CodeGenSourceBase::ClearFuncState(); +} +void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { + // Clear previous generated state. + this->InitFuncState(f); + + // Add to alloc buffer type. + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Record the arguments for analyzing the type + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + this->map_arg_type_[vid]; + } + int func_scope = this->BeginScope(); + VisitStmt(f->body); + this->EndScope(func_scope); +} + +str2tupleMap CodeAnalysOpenCLC::Finish() { + return this->map_arg_type_; +} + +void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) + VisitExpr(n, os); +} + +void CodeAnalysOpenCLC::PrintSSAAssign( + const std::string& target, const std::string& src, Type t) { + PrintType(t, stream); + stream << ' ' << target << " = "; + if (src.length() > 3 && + src[0] == '(' && src[src.length() - 1] == ')') { + stream << src.substr(1, src.length() - 2); + } else { + stream << src; + } + stream << ";\n"; +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetBufferRef( + Type t, const Variable* buffer, Expr index) { + std::ostringstream os; + std::string vid = GetVarID(buffer); + std::string scope; + if (alloc_storage_scope_.count(buffer)) { + scope = alloc_storage_scope_.at(buffer); + } + bool is_vol = volatile_buf_.count(buffer) != 0; + if (t.lanes() == 1) { + if (!HandleTypeMatch(buffer, t) || is_vol) { + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)" << vid << ')'; + } else { + os << vid; + } + os << '['; + PrintExpr(index, os); + os << ']'; + } else { + // Buffer declared as vector type. + // optimize for case where it is in register, + if (HandleTypeMatch(buffer, t) && !is_vol) { + // optimize for constant access + int offset; + if (arith::GetConstInt(index, &offset)) { + CHECK_EQ(offset % t.lanes(), 0) + << "Find unaligned vector load to a vector type"; + os << vid << '[' << (offset / t.lanes()) << ']'; + return os.str(); + } + } + os << "(("; + if (is_vol) { + os << "volatile "; + } + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t, os); + os << "*)("; + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + if (scope.length() != 0) { + PrintStorageScope(scope, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << vid << " + "; + PrintExpr(index, os); + os << "))[0]"; + } + return os.str(); +} + +// Print a reference expression to a buffer. +std::string CodeAnalysOpenCLC::GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind) { + if (kind < intrinsic::kArrKindBound_) { + std::ostringstream os; + os << "(((TVMArray*)"; + this->PrintExpr(buffer, os); + os << ")"; + if (kind == intrinsic::kArrAddr) { + os << " + "; + this->PrintExpr(index, os); + os << ")"; + return os.str(); + } + os << '['; + this->PrintExpr(index, os); + os << "]."; + // other case: get fields. + switch (kind) { + case intrinsic::kArrData: os << "data"; break; + case intrinsic::kArrShape: os << "shape"; break; + case intrinsic::kArrStrides: os << "strides"; break; + case intrinsic::kArrNDim: os << "ndim"; break; + case intrinsic::kArrTypeCode: os << "dtype.code"; break; + case intrinsic::kArrTypeBits: os << "dtype.bits"; break; + case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; + case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; + case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; + case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; + default: os << "unknown_field_code_" << kind; + } + os << ')'; + return os.str(); + } else { + CHECK_LT(kind, intrinsic::kTVMValueKindBound_); + std::ostringstream os; + os << "(((TVMValue*)"; + this->PrintExpr(buffer, os); + os << ")[" << index << "]."; + if (t.is_handle()) { + os << "v_handle"; + } else if (t.is_float()) { + os << "v_float64"; + } else if (t.is_int()) { + os << "v_int64"; + } else { + os << t; + } + os << ")"; + return os.str(); + } +} + + +bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) return false; + return it->second == t; +} + +void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { + auto it = handle_data_type_.find(buf_var); + if (it == handle_data_type_.end()) { + handle_data_type_[buf_var] = t; + } else { + CHECK(it->second == t) + << "conflicting buf var type"; + } +} + +void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, + Type t, int i, + std::ostream& os) { // NOLINT(*) + os << vec << ".s" << std::hex << i << std::dec; +} + +void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, + Type t, int i, + const std::string& value) { + this->PrintIndent(); + stream << vec << ".s" << std::hex << i + << " = " << value << ";\n" << std::dec; +} + +std::string CodeAnalysOpenCLC::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + return GetBufferRef(t, buffer, base); +} + +void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + std::string ref = GetBufferRef(t, buffer, base); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; +} + +std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + return os.str(); +} + +void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { + LOG(FATAL) << "not implemented"; +} + +void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) +} + +void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) + CHECK_EQ(scope, "global"); +} + +std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) + std::ostringstream os; + PrintType(t, os); + return os.str(); +} + +void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + if (t.is_float()) { + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "int" << t.bits() << "_t"; return; + } + } + } + os << t; +} + + +inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == Int(32)) { + std::ostringstream temp; + temp << op->value; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + if (op->type == UInt(32)) { + std::ostringstream temp; + temp << op->value << "U"; + p->MarkConst(temp.str()); + os << temp.str(); + } else { + os << "("; + p->PrintType(op->type, os); + os << ")" << op->value; + } +} + +inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) + switch (op->type.bits()) { + case 64: case 32: { + std::ostringstream temp; + temp << std::scientific << op->value; + if (op->type.bits() == 32) temp << 'f'; + p->MarkConst(temp.str()); + os << temp.str(); + break; + } + case 16: { + os << '('; + p->PrintType(op->type, os); + os << ')' << std::scientific <value << 'f'; + break; + } + default: os << op << "\n"; + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) + PrintConst(op, os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) + os << "\"" << op->value << "\""; +} + +template +inline void PrintBinaryExpr(const T* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + if (isalpha(opstr[0])) { + os << opstr << '('; + p->PrintExpr(op->a, os); + os << ", "; + p->PrintExpr(op->b, os); + os << ')'; + } else { + os << '('; + p->PrintExpr(op->a, os); + os << ' ' << opstr << ' '; + p->PrintExpr(op->b, os); + os << ')'; + } + } else { + p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); + } +} + +inline void PrintBinaryIntrinsitc(const Call* op, + const char *opstr, + std::ostream& os, // NOLINT(*) + CodeAnalysOpenCLC* p) { + if (op->type.lanes() == 1) { + CHECK_EQ(op->args.size(), 2U); + os << '('; + p->PrintExpr(op->args[0], os); + os << opstr; + p->PrintExpr(op->args[1], os); + os << ')'; + } else { + p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); + } +} +void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) + std::stringstream value; + this->PrintExpr(op->value, value); + os << CastFromTo(value.str(), op->value.type(), op->type); +} +void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) + os << GetVarID(op); +} +void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "+", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "-", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "*", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "/", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "%", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "min", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "max", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "==", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "!=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "<=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, ">=", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "&&", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) + PrintBinaryExpr(op, "||", os, this); +} +void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) + os << '!'; + PrintExpr(op->a, os); +} + +void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) + if (op->call_type == Call::Extern || + op->call_type == Call::PureExtern) { + os << op->name << "("; + for (size_t i = 0; i < op->args.size(); i++) { + this->PrintExpr(op->args[i], os); + if (i < op->args.size() - 1) { + os << ", "; + } + } + os << ")"; + } else if (op->is_intrinsic(Call::bitwise_and)) { + PrintBinaryIntrinsitc(op, " & ", os, this); + } else if (op->is_intrinsic(Call::bitwise_xor)) { + PrintBinaryIntrinsitc(op, " ^ ", os, this); + } else if (op->is_intrinsic(Call::bitwise_or)) { + PrintBinaryIntrinsitc(op, " | ", os, this); + } else if (op->is_intrinsic(Call::bitwise_not)) { + CHECK_EQ(op->args.size(), 1U); + os << "(~"; + this->PrintExpr(op->args[0], os); + os << ')'; + } else if (op->is_intrinsic(Call::shift_left)) { + PrintBinaryIntrinsitc(op, " << ", os, this); + } else if (op->is_intrinsic(Call::shift_right)) { + PrintBinaryIntrinsitc(op, " >> ", os, this); + } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintExpr(op->args[0], os); + os << " ? "; + PrintExpr(op->args[1], os); + os << " : "; + PrintExpr(op->args[2], os); + os << ")"; + } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { + const Load *l = op->args[0].as(); + CHECK(op->args.size() == 1 && l); + os << "(("; + this->PrintType(l->type.element_of(), os); + os << " *)" << this->GetVarID(l->buffer_var.get()) + << " + "; + this->PrintExpr(l->index, os); + os << ')'; + } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { + CHECK_EQ(op->args.size(), 3U); + os << GetStructRef( + op->type, op->args[0], op->args[1], + op->args[2].as()->value); + } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { + CHECK_EQ(op->args.size(), 1U); + os << "("; + this->PrintExpr(op->args[0], os); + os << " == NULL)"; + } else + os << op->name << "()"; +} + +void CodeAnalysOpenCLC::PrintVecBinaryOp( + const std::string& op, Type t, + Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) + if (isalpha(op[0])) { + os << op << "("; + this->PrintExpr(lhs, os); + os << ", "; + this->PrintExpr(rhs, os); + os << ")"; + } else { + os <<"("; + this->PrintExpr(lhs, os); + os << ' ' << op << ' '; + this->PrintExpr(rhs, os); + os << ")"; + } +} + +inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { + const Ramp* r = index.as(); + if (!r) return false; + if (!is_one(r->stride)) return false; + CHECK_EQ(r->lanes, lanes); + *base = r->base; + return true; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) + int lanes = op->type.lanes(); + // delcare type. + if (op->type.lanes() == 1) { + std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); + os << ref; + } else { + CHECK(is_one(op->predicate)) + << "predicated load is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { + std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); + os << ref; + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // load seperately. + std::string svalue = GetUniqueName("_"); + this->PrintIndent(); + this->PrintType(op->type, stream); + stream << ' ' << svalue << ";\n"; + std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string vid = GetVarID(op->buffer_var.get()); + Type elem_type = op->type.element_of(); + for (int i = 0; i < lanes; ++i) { + std::ostringstream value_temp; + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + value_temp << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, value_temp); + value_temp << ' '; + } + } + PrintType(elem_type, value_temp); + value_temp << "*)" << vid << ')'; + } else { + value_temp << vid; + } + value_temp << '['; + PrintVecElemLoad(sindex, op->index.type(), i, value_temp); + value_temp << ']'; + PrintVecElemStore(svalue, op->type, i, value_temp.str()); + } + os << svalue; + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { + Type t = op->value.type(); + if (t.lanes() == 1) { + std::string value = this->PrintExpr(op->value); + std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + this->PrintIndent(); + stream << ref << " = " << value << ";\n"; + } else { + CHECK(is_one(op->predicate)) + << "Predicated store is not supported"; + Expr base; + if (TryGetRamp1Base(op->index, t.lanes(), &base)) { + std::string value = this->PrintExpr(op->value); + this->PrintVecStore(op->buffer_var.get(), t, base, value); + } else { + // The assignment below introduces side-effect, and the resulting value cannot + // be reused across multiple expression, thus a new scope is needed + int vec_scope = BeginScope(); + + // store elements seperately + std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); + std::string vid = GetVarID(op->buffer_var.get()); + for (int i = 0; i < t.lanes(); ++i) { + this->PrintIndent(); + Type elem_type = t.element_of(); + if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { + stream << "(("; + if (op->buffer_var.get()->type.is_handle()) { + auto it = alloc_storage_scope_.find(op->buffer_var.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, stream); + stream << ' '; + } + } + PrintType(elem_type, stream); + stream << "*)" << vid << ')'; + } else { + stream << vid; + } + stream << '['; + PrintVecElemLoad(index, op->index.type(), i, stream); + stream << "] = "; + PrintVecElemLoad(value, op->value.type(), i, stream); + stream << ";\n"; + } + EndScope(vec_scope); + } + } +} + +void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) + std::string value = PrintExpr(op->value); + CHECK(!var_idmap_.count(op->var.get())); + var_idmap_[op->var.get()] = value; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) + // constraint of current logic + CHECK_EQ(op->base.type(), Int(32)); + os << "((int" << op->lanes << ")("; + for (int i = 0; i < op->lanes; i++) { + os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; + if (i != op->lanes - 1) + os << ", "; + } + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Broadcast: not supported "; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->condition, os); + os << " ? "; + PrintExpr(op->true_value, os); + os << " : "; + PrintExpr(op->false_value, os); + os << ")"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " & (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. a' = SHR a for Idx_R bits + // 2. mask: 1.(length).1 + // (1 << (L - R + 1)) - 1 + // 3. a' & mask + + os << "(("; + PrintExpr(op->a, os); + os << " >> "; + PrintExpr(op->index_right, os); + os << ") & ((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) + os << "("; + PrintExpr(op->a, os); + os << " | (1 << ("; + PrintExpr(op->index, os); + os << " - 1)))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) + // 1. mask: 0.(Idx L).01..10.(Idx R).0 + // ((1 << (L - R + 1)) - 1) << R + // 2. a & mask + + os << "("; + PrintExpr(op->a, os); + os << " & (((1 << ("; + PrintExpr(op->index_left, os); + os << " - "; + PrintExpr(op->index_right, os); + os << " + 1)) - 1) << "; + PrintExpr(op->index_right, os); + os << "))"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "Quantize is not yet support"; +} + +void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "KernelExpr is not yet support"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { + // TODO comaniac + //std::vector vec_var = GetNodesByType(op->value); + + std::string arg_vid = "unknown"; + std::string str = PrintExpr(op->value); + if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { + size_t pos_arg = str.find("arg"); + size_t pos_data = str.find("data"); + arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); + } + else if (std::regex_match(str, std::regex("arg(.+)"))) + arg_vid = str; + + std::string vid = AllocVarID(op->var.get()); + if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { + if ("unknown" != arg_vid) + LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; + } else { + Type type = op->var.type(); + if (op->var.type() == Handle() && + handle_data_type_.count(op->var.get())) + type = handle_data_type_.at(op->var.get()); + this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); + } + VisitStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { + CHECK(!is_zero(op->condition)); + std::string vid = AllocVarID(op->buffer_var.get()); + if (op->new_expr.defined()) { + // Prefer global static allocation for the program + CHECK_EQ(op->free_function, "nop"); + std::string new_data = PrintExpr(op->new_expr); + this->PrintIndent(); + PrintType(op->type, stream); + stream << "* "<< vid << '=' << new_data << ";\n"; + } else { + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); + stream << ' '; + PrintType(op->type, stream); + stream << ' '<< vid << '[' + << constant_size << "];\n"; + } + RegisterHandleType(op->buffer_var.get(), op->type); + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::thread_extent) { + IterVar iv(op->node.node_); + if (iv->thread_tag.length() != 0) { + if (!var_idmap_.count(iv->var.get())) { + BindThreadIndex(iv); + } + } + } else if (op->attr_key == ir::attr::storage_scope) { + const Variable* v = op->node.as(); + CHECK(v); + alloc_storage_scope_[v] = op->value.as()->value; + } else if (op->attr_key == ir::attr::volatile_scope) { + const Variable* v = op->node.as(); + CHECK(v); + volatile_buf_.insert(v); + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (op->message.as()) { + // GLOG style check + stream << "CHECK(" << cond << ") << \"" + << op->message.as()->value << "\";\n"; + } else { + stream << "assert(" << cond << ");\n"; + } + this->PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const For* op) { + std::string extent = PrintExpr(op->extent); + PrintIndent(); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { + PrintStmt(op->first); + if (op->rest.defined()) PrintStmt(op->rest); +} + +void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { + if (is_const(op->value)) return; + const Call* call = op->value.as(); + if (call) { + if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { + this->PrintStorageSync(call); return; + } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { + CHECK_EQ(call->args.size(), 4); + std::string value = PrintExpr(call->args[3]); + std::string ref = GetStructRef( + call->args[3].type(), + call->args[0], + call->args[1], + call->args[2].as()->value); + this->PrintIndent(); + this->stream << ref << " = " << value << ";\n"; + return; + } + } + std::string vid = this->PrintExpr(op->value); + this->PrintIndent(); + this->stream << "(void)" << vid << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { + PrintStmt(op->body); +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { + LOG(FATAL) << "KernelDef is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { + LOG(FATAL) << "KernelStmt is not yet support"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { + this->stream << "return "; + PrintExpr(op->value); + this->stream << ";\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { + // TODO: Check if the break statement is used correctly + this->stream << "break;\n"; +} + +void CodeAnalysOpenCLC::VisitStmt_(const While *op) { + std::string condition = PrintExpr(op->condition); + PrintIndent(); + stream << "while (" << condition << ") {\n"; + int while_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(while_scope); + PrintIndent(); + stream << "}\n"; +} + + +void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h new file mode 100755 index 000000000..8aaeedb39 --- /dev/null +++ b/tvm/src/codegen/opencl/codeanalys_openclc.h @@ -0,0 +1,202 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +#ifndef TVM_CODEGEN_CODEANALYS_OPENCLC_H_ +#define TVM_CODEGEN_CODEANALYS_OPENCLC_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../codegen_source_base.h" + +namespace TVM { +namespace codegen { + +using namespace ir; + +template +using str2tupleMap = std::unordered_map>; + +/*! + * \brief A class to analyze the IR AST for MerlinC generation. + * + */ +class CodeAnalysOpenCLC : + public ExprFunctor, + public StmtFunctor, + public CodeGenSourceBase { + public: + /*! + * \brief Initialize the code generator. + * \param output_ssa Whether output SSA. + */ + void Init(); + /*! + * \brief Add the function to the generated module. + * \param f The function to be compiled. + */ + void AddFunction(LoweredFunc f); + /*! + * \brief Finalize the compilation and return the code. + * \return The code. + */ + str2tupleMap Finish(); + /*! + * \brief Print the Stmt n to CodeAnalysMerlinC->stream + * \param n The statement to be printed. + */ + void PrintStmt(const Stmt& n) { + VisitStmt(n); + } + /*! + * \brief Print the expression n(or its ssa id if in ssa mode) into os + * \param n The expression to be printed. + * \param os The output stream + */ + void PrintExpr(const Expr& n, std::ostream& os); + /*! + * \brief Same as PrintExpr, but simply returns result string + * \param n The expression to be printed. + */ + std::string PrintExpr(const Expr& n) { + std::ostringstream os; + PrintExpr(n, os); + return os.str(); + } + // The following parts are overloadable print operations. + /*! + * \brief Initialize codegen state for generating f. + * \param f The function to be compiled. + */ + virtual void InitFuncState(LoweredFunc f); + // expression + void VisitExpr_(const Variable* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Load* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Let* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Call* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Add* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Sub* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Mul* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Div* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Mod* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Min* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Max* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const EQ* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const NE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const LT* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const LE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GT* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GE* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const And* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Or* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Cast* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Not* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Select* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Ramp* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Broadcast* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const IntImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const UIntImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const FloatImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const StringImm* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GetBit* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const GetSlice* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SetBit* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) + // statment + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const Store* op) override; + void VisitStmt_(const For* op) override; + void VisitStmt_(const IfThenElse* op) override; + void VisitStmt_(const Allocate* op) override; + void VisitStmt_(const AttrStmt* op) override; + void VisitStmt_(const AssertStmt* op) override; + void VisitStmt_(const Evaluate* op) override; + void VisitStmt_(const Block* op) override; + void VisitStmt_(const ProducerConsumer* op) override; + void VisitStmt_(const KernelDef* op) override; + void VisitStmt_(const KernelStmt* op) override; + void VisitStmt_(const Return* op) override; + void VisitStmt_(const Break* op) override; + void VisitStmt_(const While* op) override; + void VisitStmt_(const Partition* op) override; + /*! + * Print Type represetnation of type t. + * \param t The type representation. + * \param os The stream to print the ctype into + */ + void PrintType(Type t, std::ostream& os); // NOLINT(*) + std::string GetType(Type t); // NOLINT(*) + /*! + * \brief Print expr representing the thread tag + * \param IterVar iv The thread index to be binded; + */ + void BindThreadIndex(const IterVar& iv); // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os); // NOLINT(*) + void PrintStorageSync(const Call* op); // NOLINT(*) + // Binary vector op. + void PrintVecBinaryOp( + const std::string&op, Type op_type, + Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) + // print vector load + std::string GetVecLoad(Type t, const Variable* buffer, Expr base); + // print vector store + void PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value); // NOLINT(*) + // print load of single element + void PrintVecElemLoad( + const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) + // print store of single element. + void PrintVecElemStore( + const std::string& vec, Type t, int i, const std::string& value); + // Get a cast type from to + std::string CastFromTo(std::string value, Type from, Type target); + + protected: + // Print reference to struct location + std::string GetStructRef( + Type t, const Expr& buffer, const Expr& index, int kind); + // print reference to a buffer as type t in index. + virtual std::string GetBufferRef( + Type t, const Variable* buffer, Expr index); + /*! + * \brief If buffer is allocated as type t. + * \param buf_var The buffer variable. + * \param t The type to be checked. + */ + bool HandleTypeMatch(const Variable* buf_var, Type t) const; + /*! + * \brief Register the data type of buf_var + * \param buf_var The buffer variable. + * \param t The type to be checked. + */ + void RegisterHandleType(const Variable* buf_var, Type t); + // override + void PrintSSAAssign( + const std::string& target, const std::string& src, Type t) final; + /*! \brief restrict keyword */ + std::string restrict_keyword_{""}; + /*! \brief the storage scope of allocation */ + std::unordered_map alloc_storage_scope_; + /*! \brief the data type of allocated buffers */ + std::unordered_map handle_data_type_; + + private: + /*! \brief set of volatile buf access */ + std::unordered_set volatile_buf_; + /*! \brief map of function arguments to their types */ + str2tupleMap map_arg_type_; +}; + +} // namespace codegen +} // namespace TVM +#endif // TVM_CODEGEN_CODEGEN_C_H_ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc new file mode 100755 index 000000000..4ae3015d9 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -0,0 +1,156 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_aocl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + + +void CodeGenAOCL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + + this->stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" << "\n"; + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; +} + + + +void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + + if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; + } + else if ( t.is_int()) { + os << "ap_int<" << t.bits() << ">" << "intd_t"; + } + else { + if (t.is_float()) { + if (t.bits() == 16) { + enable_fp16_ = true; + os << "half"; return; + } + if (t.bits() == 32) { + os << "float"; return; + } + if (t.bits() == 64) { + enable_fp64_ = true; + os << "double"; return; + } + } else if (t.is_uint()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; + // os << "uint" << t.bits() << "_t"; return; + } + case 1: os << "int"; return; + } + } else if (t.is_int()) { + switch (t.bits()) { + case 8: case 16: case 32: case 64: { + os << "ap_int<" << t.bits() << ">" << "intd_t"; return; + // os << "int" << t.bits() << "_t"; return; + } + } + } + } + } +} + + +void CodeGenAOCL::VisitStmt_(const For* op) { + std::ostringstream os; + if (op->for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + os << "#pragma unroll"; + if (unroll_factor > 0) os << " " << unroll_factor << "\n"; + else os << "\n"; + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + os << "#pragma"; + os << " ii " << II << "\n"; + } + CodeGenAOCL::GenForStmt(op, os.str(), true); +} + + + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h new file mode 100755 index 000000000..0921770c8 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -0,0 +1,34 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ +#define TVM_CODEGEN_CODEGEN_AOCL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "./codegen_opencl.h" + + +namespace TVM { +namespace codegen { + + +class CodeGenAOCL : public CodeGenOpenCL { + public: + CodeGenAOCL(){} + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + + void VisitStmt_(const For* op) override; + +}; +} // namespace codegen +} // namespace TVM + +#endif // TVM_CODEGEN_CODEGEN_AOCL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc new file mode 100755 index 000000000..ab14fc0bf --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -0,0 +1,256 @@ + +# include +# include +# include +# include +# include +# include +# include "./codegen_opencl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM{ +namespace codegen{ + +CodeGenOpenCL::CodeGenOpenCL(){ + restrict_keyword_ = "restrict"; +} + +void CodeGenOpenCL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } +} + + +std::string CodeGenOpenCL::Finish() { + // inject extension enable pragma for fp16 and fp64 + if (enable_fp16_) { + decl_stream + << "#ifdef cl_khr_fp16\n" + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" + "#elif defined(cl_amd_fp16)\n" + "#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n" + "#else\n" + "#error \"Half precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + if (enable_fp64_) { + decl_stream + << "#ifdef cl_khr_fp64\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#elif defined(cl_amd_fp64)\n" + "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" + "#else\n" + "#error \"Double precision floating point not supported" + "by OpenCL implementation on your device.\" \n" + "#endif\n\n"; + } + + return CodeGenC::Finish(); +} + +void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { + CHECK(!var_idmap_.count(iv->var.get())); + runtime::ThreadScope ts = runtime::ThreadScope::make(iv->thread_tag); + std::ostringstream os; + if (ts.rank == 1) { + os << "get_local_id(" << ts.dim_index << ")"; + } else { + os << "get_group_id(" << ts.dim_index << ")"; + } + var_idmap_[iv->var.get()] = + CastFromTo(os.str(), UInt(64), iv->var.type()); +} + + + + +void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, + Expr base, std::ostream& os) { // NOLINT(*) + if (!HandleTypeMatch(buffer, t.element_of())) { + os << '('; + auto it = alloc_storage_scope_.find(buffer); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, os); + } + os << ' '; + PrintType(t.element_of(), os); + os << "*)"; + } + os << GetVarID(buffer) << " + "; + PrintExpr(base, os); +} +std::string CodeGenOpenCL::GetVecLoad( + Type t, const Variable* buffer, Expr base) { + std::ostringstream os; + os << "vload" << t.lanes() << "(0, "; + PrintVecAddr(buffer, t, base, os); + os << ")"; + return os.str(); +} + +void CodeGenOpenCL::PrintVecStore(const Variable* buffer, + Type t, Expr base, + const std::string& value) { + this->PrintIndent(); + stream << "vstore" << t.lanes() << "(" << value << ", 0, "; + PrintVecAddr(buffer, t, base, stream); + stream << ");\n"; +} + +void CodeGenOpenCL::PrintStorageSync(const Call* op) { + const std::string& sync = op->args[0].as()->value; + if (sync == "warp") { + LOG(FATAL) << "warp sync not supported in opencl"; + } else if (sync == "shared") { + this->PrintIndent(); + this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; + } else if (sync == "global") { + LOG(FATAL) << "not supported"; + } +} + + + +void CodeGenOpenCL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global") { + os << "global "; + } else if (scope == "shared") { + os << "local "; + } +} + + +std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { + if (from == target) return value; + std::ostringstream os; + if (target.lanes() == 1) { + os << "(("; + this->PrintType(target, os); + os << ")" << value << ")"; + } else { // convert vector type + os << "("; + os << "convert_"; + this->PrintType(target, os); + os << "(" << value << "))"; + } + return os.str(); +} + +void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenOpenCL::VisitExpr_(const Call * op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { + os << "("; + PrintType(op->args[2].type(), os); + os << ")"; + } + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + } + PrintStmt(op->body); +} + + +void CodeGenOpenCL::VisitExpr_(const FloatImm * op, std::ostream& os) { // NOLINT(*) + if (std::isinf(op->value)) { + if ( op->value < 0) { + os << "-"; + } + os << "INFINITY"; + } else if (std::isnan(op->value)) { + os << "NAN"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenOpenCL::VisitExpr_(const Select * op, std::ostream& os ) { // NOINT(*) + os << "("; + PrintType(op->true_value.type(), os); + os << ")"; + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenCL::VisitStmt_(const IfThenElse* op) { + std::string cond = PrintExpr(op->condition); + // Skip the buffer data checking + if (std::regex_match(cond, std::regex("!\\((arg)(.+)(== NULL)\\)"))) + return ; + PrintIndent(); + if (cond[0] == '(' && cond[cond.length() - 1] == ')') { + stream << "if " << cond << " {\n"; + } else { + stream << "if (" << cond << ") {\n"; + } + int then_scope = BeginScope(); + PrintStmt(op->then_case); + this->EndScope(then_scope); + if (op->else_case.defined()) { + PrintIndent(); + stream << "} else {\n"; + int else_scope = BeginScope(); + PrintStmt(op->else_case); + this->EndScope(else_scope); + } + PrintIndent(); + stream << "}\n"; +} + +void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { + std::string extent = PrintExpr(op->extent); + std::string vid = AllocVarID(op->loop_var.get()); + CHECK(is_zero(op->min)); + if (before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + PrintIndent(); + stream << "for ("; + PrintType(op->loop_var.type(), stream); + stream << ' ' << vid << " = 0; " + << vid << " < " << extent + << "; ++" << vid << ") {\n"; + if (!before && pragma.length() > 0) { + PrintIndent(); + stream << pragma; + } + int for_scope = BeginScope(); + PrintStmt(op->body); + this->EndScope(for_scope); + PrintIndent(); + stream << "}\n"; +} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h new file mode 100755 index 000000000..8f9db613d --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -0,0 +1,53 @@ +#ifndef TVM_CODEGEN_CODEGEN_OPENCL_H_ +#define TVM_CODEGEN_CODEGEN_OPENCL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "../codegen_c.h" + +namespace TVM{ +namespace codegen{ + +class CodeGenOpenCL : public CodeGenC{ + public: + // void AddFunction(LoweredFunc f); + CodeGenOpenCL(); + virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; + std::string Finish(); + void InitFuncState(LoweredFunc f) override; + void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void PrintStorageSync(const Call* op) override; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + virtual void PrintType(Type t, std::ostream& os) = 0; //NOLINT + std::string GetVecLoad(Type t, const Variable * buffer, + Expr base) override; // NOLINT(*) + void PrintVecStore(const Variable * buffer, Type t, + Expr base, const std::string& value) override; //NOLINT(*) + void PrintVecAddr(const Variable * buffer, Type t, + Expr base, std::ostream& os); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) + + //overload visitor + void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) + void VisitStmt_(const LetStmt* op) override; // NOLINT + void GenForStmt(const For* op, std::string pragma, bool before); + virtual void VisitStmt_(const For* op) = 0; + + protected: + // whether enable fp16 and fp64 extension + bool enable_fp16_{false}; + bool enable_fp64_{false}; +}; + + +} // namespace codegen +} // namespace TVM + +#endif \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc new file mode 100755 index 000000000..5470a10f7 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -0,0 +1,196 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_sdaccel.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +void CodeGenSDACCEL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; +} + + +void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "do not yet support vector types"; + os << "void*"; return; + } + if ( t== Bool() ) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + enable_fp16_ = true; + break; + case 32: + os << "float"; + break; + case 64: + os << "double"; + enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + if (t.bits() == 8 && t.lanes() == 4) { + // directly 4 8 bit int in integer. + os << "int"; return; + } + switch (t.bits()) { + case 8: os << "char"; break; + case 16: os << "short"; break; + case 32: os << "int"; break; + case 64: os << "long"; break; + case 1: os << "int"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; +} + + + + +void CodeGenSDACCEL::VisitStmt_(const For* op) { + std::ostringstream os; + if (op->for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + os << "__attribute__((opencl_unroll_hint("; + if (unroll_factor > 0) os << unroll_factor << ")))\n"; + else + os << "\n"; + + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + os << "__attribute__((xcl_pipeline_loop("; + os << II << ")))\n"; + } + CodeGenSDACCEL::GenForStmt(op, os.str(), true); +} + + + +void CodeGenSDACCEL::VisitStmt_(const Partition* op) { + std::string vid = GetVarID(op->buffer_var.get()); + stream << vid << " "; + if (op->partition_type != PartitionType::Complete) { + stream << "__attribute__((xcl_array_partition("; + switch (op->partition_type) { + // case PartitionType::Complete: + // stream << "complete,"; + // break; + case PartitionType::Block: + stream << "block,"; + break; + case PartitionType::Cyclic: + stream << "cyclic,"; + break; + } + stream << op->factor << ","; + stream << op->dim << ")))\n"; + }else { + if (op->dim == 0) { + stream << "__attribute__((xcl_array_partition))\n"; + } else { + stream << "__attribute__((xcl_array_partition("; + stream << "complete,"; + stream << op->factor << ","; + stream << op->dim << ")))\n"; + } + } +} + + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h new file mode 100755 index 000000000..29109d015 --- /dev/null +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -0,0 +1,36 @@ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ +#define TVM_CODEGEN_CODEGEN_SDACCEL_H_ + +# include +# include +# include +# include "./codeanalys_openclc.h" +# include "./codegen_opencl.h" + +namespace TVM { +namespace codegen { + + +class CodeGenSDACCEL : public CodeGenOpenCL { + public: + CodeGenSDACCEL(){} + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + + void VisitStmt_(const For* op) override; + void VisitStmt_(const Partition* op); + + + +}; +} // namespace codegen +} // namespace TVM + +#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc new file mode 100755 index 000000000..8a640e556 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc @@ -0,0 +1,336 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-30 15:15:28 + * @LastEditTime: 2019-08-14 16:16:03 + * @LastEditors: Please set LastEditors + */ +/* + Yang.Bai + yb269@cornell.edu +*/ +# include "./sdaccel_module.h" +# include +# include +# include +# include +# include + +namespace TVM { +namespace runtime { + +namespace { + +void PrintIndent(std::ofstream& stream, int indent) { + for (int i = 0;i < indent; i++ ) { + stream << ' '; + } +} + +inline size_t GetTypeSize(TVMType t) { + size_t byte = (t.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + return byte; +} + +inline size_t GetDataSize(TVMArray* arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= arr->shape[i]; + } + size_t byte = (arr->dtype.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + size *= (byte * 8 * arr->dtype.lanes + 7) / 8; + return size; +} + +inline TVMType Type2TVMType(Type t) { + TVMType tt; + if (t.is_int()) tt.code = kDLInt; + else if (t.is_uint()) tt.code = kDLUInt; + else if (t.is_float()) tt.code = kDLFloat; + else LOG(FATAL) << "Unacceptable type: " << t; + tt.bits = static_cast(t.bits()); + tt.fracs = static_cast(t.fracs()); + return tt; +} + +inline std::string Type2Str(TVMType t) { + +} + +inline std::string Tpye2ExtStr(TVMType t) { + +} + + + + + +inline std::string Type2Byte(TVMType t) { + std::string str = ""; + if (t.code == kDLFloat) { + str += "float"; + } else if (t.code == kDLInt || t.code == kDLUInt) { + if (t.code == kDLUInt) str += "u"; + str += "int"; + if (t.bits <= 8) str += "8"; + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + str += "_t"; + } + return str; +} + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + arg_sizes.push_back(GetDataSize(arr)); + arg_types.push_back(arr->dtype); + } else { + const Variable* var = func->api_args[i].as(); + TVMType t = Type2TVMType(var->type); + arg_sizes.push_back(GetTypeSize(t)); + arg_types.push_back(t); + } + } +} + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} + +// copy values from the shared mem to local mem +void PrintCopy() + + + + +// copy values from local mem back to shared mem +void PrintCopyBack() + + + +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string test_file) { + int indent = 0; + std::ofstream stream; + stream.open("host.cpp"); + + // write the header files and macro commmands. + stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# include \n"; + stream << "# pragram once\n"; + stream << "# define LENGTH (1024)\n"; + stream << "# define NUM_WORKGROUPS (1)\n"; + stream << "# define WORKGROUP_SIZE (16)\n"; + stream << test_file; + stream << "int main(void) { \n"; + indent += 2; + + + // get the platform and devices + stream << "#if define(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; + PrintIndent(stream, indent); + stream << "# define STR_VALUE(arg) #arg\n"; + PrintIndent(stream, indent); + stream << "# define GET_STRING(name) STR_VALUE(name)\n"; + PrintIndent(stream, indent); + stream << "# define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n" + stream << "#endif"; + + + // get the xclbin filename . + stream << "char * xclbinFilename = argv[1]\n"; + stream << "size_t \n"; + + // source memories + + + // create the test data and goldn data locally + + + + + // OpenCL HOST CODE AREA START + // get First Platform + stream << "std::vector platforms;\n"; + stream << "cl::Platform::get(&platforms)\n;"; + stream << "cl::Platform platform = platform[0];\n"; + stream << "std::cout << "" " + + // get accelerator devices and select 1st such device + + // create context and command queue for selected device + + + // load xcl binary into the buffer + + + // creat program from binary file + + // create kernel + + // create buffers inside device + + // copy input data to device buffer from host memory + + // run the kernel + + // copy device result data to host memory + // OpenCL HOST CODE AREA END + + + + // compare the results of the kernel to the simulation + + + + + for ( int i = 0;i < args.size(); i++ ) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << Type2Byte(arg_types)[i] << "*"; + PrintIndent(stream, indent); + + + } + } + + // call the function + PrintIndent(stream, indent); + stream << func->name << "("; + for (int i = 0;i < args.size();i++) { + if (i != args.size()-1) { + stream << ", "; + } + } + stream << ");\n"; + + // copy to shared mem + for (int i = 0;i < args.size();i++ ) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + PrintIndent(stream, indent); + } + } + stream << "}\n"; + stream.close(); +} +} // namespace + +class SDAccelModuleNode final : public ModuleNode { + public: + SDAccelModuleNode(LoweredFunc func, std::string test_file) + : func_(func), test_file_(test_file) {} + + const char* type_key() const { + return "sdaccel_sw_emu"; + } + + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final { + return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) + LOG(FATAL) << "The function should take in " << func_->args.size() + << " inputs but get " << args.size(); + std::vector arg_sizes; + std::vector arg_types; + std::vector shmids; + CollectArgInfo(args, func_, arg_sizes, arg_types); + GenSharedMem(args, shmids, arg_sizes); + GenHostCode(args, shmids, arg_types, func_, test_file_); + // TODO: find a better way to do the following + LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; + LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; + system("make -f sdaccel.mk run_cpu_em"); + // system("./out"); + LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; + system("make -f sdaccel.mk clean"); + FreeSharedMem(args, shmids, arg_sizes); + }); + } + + private: + LoweredFunc func_; + std::string test_file_; +}; + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code) { + + std::shared_ptr n = + std::make_shared(func, code); + + return Module(n); +} + + +} // namespace runtime +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h new file mode 100755 index 000000000..25db653b9 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h @@ -0,0 +1,30 @@ +/* + * @Description: In User Settings Edit + * @Author: your name + * @Date: 2019-07-30 15:15:15 + * @LastEditTime: 2019-07-30 15:15:15 + * @LastEditors: your name + */ +/* + Yang.Bai + yb269@cornell.edu +*/ + +#ifndef SDACCEL_MODULE_H +#define SDACCEL_MODULE_H + +# include +# include +# include "../../build_common.h" + +namespace TVM { +namespace runtime { + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code); + +} // namespace runtime +} // namespace TVM + +#endif \ No newline at end of file From 4852b98ffd068a440cfee0edf710d4f5475e7455 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 28 Aug 2019 16:18:38 -0400 Subject: [PATCH 055/103] fixed __local --- tvm/src/codegen/opencl/codegen_sdaccel.cc | 18 ++++++++++++++++-- tvm/src/codegen/opencl/codegen_sdaccel.h | 7 +++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 5470a10f7..40a3034d0 100755 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -117,6 +117,18 @@ void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) } +void CodeGenSDACCEL::PrintStorageScope( + const std::string& scope, std::ostream& os) { // NOLINT(*) + if (scope == "global" || scope == "shared") { + os << "__local "; + } +} + + + + + + void CodeGenSDACCEL::VisitStmt_(const For* op) { @@ -133,8 +145,10 @@ void CodeGenSDACCEL::VisitStmt_(const For* op) { } i++; } - os << "__attribute__((opencl_unroll_hint("; - if (unroll_factor > 0) os << unroll_factor << ")))\n"; + if (unroll_factor > 0) { + os << "__attribute__((opencl_unroll_hint("; + os << unroll_factor << ")))\n"; + } else os << "\n"; diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index 29109d015..064d3098a 100755 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -19,13 +19,12 @@ namespace codegen { class CodeGenSDACCEL : public CodeGenOpenCL { public: CodeGenSDACCEL(){} - // void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - - void VisitStmt_(const For* op) override; - void VisitStmt_(const Partition* op); + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const For* op) override; //NOLINT(*) + void VisitStmt_(const Partition* op); //NOLINT(*) From ea0771f52aa54313449d04412bf76319dda98397 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 2 Sep 2019 13:44:55 -0400 Subject: [PATCH 056/103] fixed data_type for xilinx opencl --- samples/kmeans/kmeans_main.py | 11 ++ samples/kmeans/merlinc_code.cl | 52 ++++++ samples/kmeans/sdaccel_code.cl | 48 ++++++ samples/kmeans/vhls_code.cl | 52 ++++++ samples/lenet/lenet_main.py | 13 +- samples/lenet/lenet_main_withoutq.py | 124 ++++++++++++++ samples/lenet/merlinc_code.cl | 155 ++++++++++++++++++ samples/lenet/sdaccel_code.cl | 151 +++++++++++++++++ samples/lenet/vhls_code.cl | 155 ++++++++++++++++++ samples/smith_waterman/merlinc_code.cl | 146 +++++++++++++++++ samples/smith_waterman/sdaccel_code.cl | 141 ++++++++++++++++ samples/smith_waterman/smith_waterman_main.py | 31 +++- tvm/src/codegen/opencl/codegen_sdaccel.cc | 112 ++++++++++--- 13 files changed, 1157 insertions(+), 34 deletions(-) create mode 100644 samples/kmeans/merlinc_code.cl create mode 100644 samples/kmeans/sdaccel_code.cl create mode 100644 samples/kmeans/vhls_code.cl create mode 100644 samples/lenet/lenet_main_withoutq.py create mode 100644 samples/lenet/merlinc_code.cl create mode 100644 samples/lenet/sdaccel_code.cl create mode 100644 samples/lenet/vhls_code.cl create mode 100644 samples/smith_waterman/merlinc_code.cl create mode 100644 samples/smith_waterman/sdaccel_code.cl diff --git a/samples/kmeans/kmeans_main.py b/samples/kmeans/kmeans_main.py index 6e7906cff..ed4af0d5f 100644 --- a/samples/kmeans/kmeans_main.py +++ b/samples/kmeans/kmeans_main.py @@ -19,6 +19,8 @@ niter = 200 hcl.init() +#hcl.init(hcl.Float()) + ############################################################################## # Main Algorithm @@ -66,6 +68,15 @@ def calc_sum(n): return hcl.build(s, target=target) f = top() +code = top('merlinc') +with open('merlinc_code.cl', 'w') as f: + f.write(code) +code2 = top('sdaccel') +with open('sdaccel_code.cl', 'w') as f: + f.write(code2) +code3 = top('vhls') +with open('vhls_code.cl', 'w') as f: + f.write(code3) points_np = np.random.randint(100, size=(N, dim)) labels_np = np.zeros(N) diff --git a/samples/kmeans/merlinc_code.cl b/samples/kmeans/merlinc_code.cl new file mode 100644 index 000000000..ea672313d --- /dev/null +++ b/samples/kmeans/merlinc_code.cl @@ -0,0 +1,52 @@ +#include +#include +#include +#pragma ACCEL kernel +void default_function(int* placeholder2, int* placeholder3, int* compute3) { + for (int x = 0; x < 320; ++x) { + compute3[x] = 0; + } + int main_loop; + for (int _1 = 0; _1 < 200; ++_1) { +#pragma ACCEL pipeline + for (int N = 0; N < 320; ++N) { + int local2; + local2 = 100000; + for (int i = 0; i < 16; ++i) { + int local3; + local3 = 0; + for (int i1 = 0; i1 < 32; ++i1) { + local3 = ((int)(((long)local3) + ((long)(((long)((long)(placeholder2[(i1 + (N * 32))] - placeholder3[(i1 + (i * 32))]))) * ((long)((long)(placeholder2[(i1 + (N * 32))] - placeholder3[(i1 + (i * 32))]))))))); + } + if (local3 < local2) { + local2 = local3; + compute3[N] = i; + } + } + } + int compute4[16]; + for (int x1 = 0; x1 < 16; ++x1) { + compute4[x1] = 0; + } + int compute5[512]; + for (int x2 = 0; x2 < 16; ++x2) { + for (int y = 0; y < 32; ++y) { + compute5[(y + (x2 * 32))] = 0; + } + } + int calc_sum; +#pragma ACCEL parallel flatten + for (int n = 0; n < 320; ++n) { + compute4[compute3[n]] = (compute4[compute3[n]] + 1); + for (int i2 = 0; i2 < 32; ++i2) { + compute5[(i2 + (compute3[n] * 32))] = ((int)(((long)compute5[(i2 + (compute3[n] * 32))]) + ((long)placeholder2[(i2 + (n * 32))]))); + } + } + int update_mean; +#pragma ACCEL parallel flatten + for (int k_d_fused = 0; k_d_fused < 512; ++k_d_fused) { + placeholder3[k_d_fused] = (compute5[k_d_fused] / compute4[(k_d_fused / 32)]); + } + } +} + diff --git a/samples/kmeans/sdaccel_code.cl b/samples/kmeans/sdaccel_code.cl new file mode 100644 index 000000000..196f96257 --- /dev/null +++ b/samples/kmeans/sdaccel_code.cl @@ -0,0 +1,48 @@ +__kernel void default_function(__global int* placeholder4, __global int* placeholder5, __global int* compute6) { + for (int x = 0; x < 320; ++x) { + compute6[x] = 0; + } + __local int main_loop; + for (int _1 = 0; _1 < 200; ++_1) { + __attribute__((xcl_pipeline_loop(1))) + for (int N = 0; N < 320; ++N) { + __local int local4; + local4 = 100000; + for (int i = 0; i < 16; ++i) { + __local int local5; + local5 = 0; + for (int i1 = 0; i1 < 32; ++i1) { + local5 = ((int)(((long)local5) + ((long)(((long)((long)(placeholder4[(i1 + (N * 32))] - placeholder5[(i1 + (i * 32))]))) * ((long)((long)(placeholder4[(i1 + (N * 32))] - placeholder5[(i1 + (i * 32))]))))))); + } + if (local5 < local4) { + local4 = local5; + compute6[N] = i; + } + } + } + __local int compute7[16]; + for (int x1 = 0; x1 < 16; ++x1) { + compute7[x1] = 0; + } + __local int compute8[512]; + for (int x2 = 0; x2 < 16; ++x2) { + for (int y = 0; y < 32; ++y) { + compute8[(y + (x2 * 32))] = 0; + } + } + __local int calc_sum; + + for (int n = 0; n < 320; ++n) { + compute7[compute6[n]] = (compute7[compute6[n]] + 1); + for (int i2 = 0; i2 < 32; ++i2) { + compute8[(i2 + (compute6[n] * 32))] = ((int)(((long)compute8[(i2 + (compute6[n] * 32))]) + ((long)placeholder4[(i2 + (n * 32))]))); + } + } + __local int update_mean; + + for (int k_d_fused = 0; k_d_fused < 512; ++k_d_fused) { + placeholder5[k_d_fused] = (compute8[k_d_fused] / compute7[(k_d_fused / 32)]); + } + } +} + diff --git a/samples/kmeans/vhls_code.cl b/samples/kmeans/vhls_code.cl new file mode 100644 index 000000000..b651dd8bf --- /dev/null +++ b/samples/kmeans/vhls_code.cl @@ -0,0 +1,52 @@ +#include +#include +#include + +void default_function(ap_int<32> placeholder6[320][32], ap_int<32> placeholder7[16][32], ap_int<32> compute9[320]) { + for (ap_int<32> x = 0; x < 320; ++x) { + compute9[x] = 0; + } + ap_int<32> main_loop; + for (ap_int<32> _ = 0; _ < 200; ++_) { + for (ap_int<32> N = 0; N < 320; ++N) { + #pragma HLS pipeline + ap_int<32> local6; + local6 = 100000; + for (ap_int<32> i = 0; i < 16; ++i) { + ap_int<32> local7; + local7 = 0; + for (ap_int<32> i1 = 0; i1 < 32; ++i1) { + local7 = ((ap_int<32>)(((ap_int<67>)local7) + ((ap_int<67>)(((ap_int<66>)((ap_int<33>)(placeholder6[N][i1] - placeholder7[i][i1]))) * ((ap_int<66>)((ap_int<33>)(placeholder6[N][i1] - placeholder7[i][i1]))))))); + } + if (local7 < local6) { + local6 = local7; + compute9[N] = i; + } + } + } + ap_int<32> compute10[16]; + for (ap_int<32> x1 = 0; x1 < 16; ++x1) { + compute10[x1] = 0; + } + ap_int<32> compute11[16][32]; + for (ap_int<32> x2 = 0; x2 < 16; ++x2) { + for (ap_int<32> y = 0; y < 32; ++y) { + compute11[x2][y] = 0; + } + } + ap_int<32> calc_sum; + for (ap_int<32> n = 0; n < 320; ++n) { + #pragma HLS unroll + compute10[compute9[n]] = (compute10[compute9[n]] + 1); + for (ap_int<32> i2 = 0; i2 < 32; ++i2) { + compute11[compute9[n]][i2] = ((ap_int<32>)(((ap_int<33>)compute11[compute9[n]][i2]) + ((ap_int<33>)placeholder6[n][i2]))); + } + } + ap_int<32> update_mean; + for (ap_int<32> k_d_fused = 0; k_d_fused < 512; ++k_d_fused) { + #pragma HLS unroll + placeholder7[(k_d_fused / 32)][(k_d_fused % 32)] = (compute11[(k_d_fused / 32)][(k_d_fused % 32)] / compute10[(k_d_fused / 32)]); + } + } +} + diff --git a/samples/lenet/lenet_main.py b/samples/lenet/lenet_main.py index a3bdc8282..418c64cf0 100644 --- a/samples/lenet/lenet_main.py +++ b/samples/lenet/lenet_main.py @@ -94,6 +94,7 @@ def build_lenet_inf(batch_size=batch_size, target=None): f = build_lenet_inf() + ############################################################################### # Prepare the numpy arrays for testing. Remember that we need to set the input # tensors with the same type as the placeholders @@ -119,11 +120,11 @@ def build_lenet_inf(batch_size=batch_size, target=None): # remove downloaded files import os -os.remove("t10k-images-idx3-ubyte.gz") -os.remove("t10k-labels-idx1-ubyte.gz") -os.remove("train-images-idx3-ubyte.gz") -os.remove("train-labels-idx1-ubyte.gz") -os.remove("lenet-0010.params") -os.remove("lenet-symbol.json") +#os.remove("t10k-images-idx3-ubyte.gz") +#os.remove("t10k-labels-idx1-ubyte.gz") +#os.remove("train-images-idx3-ubyte.gz") +#os.remove("train-labels-idx1-ubyte.gz") +#os.remove("lenet-0010.params") +#os.remove("lenet-symbol.json") assert correct_sum == 9882 diff --git a/samples/lenet/lenet_main_withoutq.py b/samples/lenet/lenet_main_withoutq.py new file mode 100644 index 000000000..f20ca8c94 --- /dev/null +++ b/samples/lenet/lenet_main_withoutq.py @@ -0,0 +1,124 @@ +import heterocl as hcl +import hlib +import numpy as np + +hcl.init(hcl.Float()) + +def softmax(out, x): + assert len(x.shape) == 2, "only support 2-dim softmax" + m, n = x.shape + k = hcl.reduce_axis(0, n) + max_elem = hcl.compute((m,), lambda i: hcl.max(x[i, k], axis=k)) + k = hcl.reduce_axis(0, n) + expsum = hcl.compute((m,), + lambda i: hcl.sum(hcl.exp(x[i, k] - max_elem[i]), axis=k)) + return hcl.update(out, + lambda i, j: hcl.exp(x[i, j] - max_elem[i]) / expsum[i]) + +def build_lenet(input_image, weight_conv1, weight_conv2, + weight_fc1, weight_fc2, lenet): + # first conv + conv1 = hlib.nn.conv2d_nchw(input_image, weight_conv1) + tanh1 = hlib.nn.tanh(conv1, "tanh1") + pool1 = hlib.nn.max_pool(tanh1, kernel=(2,2), stride=(2,2)) + # second conv + conv2 = hlib.nn.conv2d_nchw(pool1, weight_conv2) + tanh2 = hlib.nn.tanh(conv2, "tanh2") + pool2 = hlib.nn.max_pool(tanh2, kernel=(2,2), stride=(2,2)) + # first fc + flat = hlib.nn.flatten(pool2) + fc1 = hlib.nn.dense(flat, weight_fc1) + tanh3 = hlib.nn.tanh(fc1, "tanh3") + # second fc + fc2 = hlib.nn.dense(tanh3, weight_fc2) + # loss + return softmax(lenet, fc2) + + +import mxnet as mx +# download pretrained lenet model +mx.gluon.utils.download('https://gist.githubusercontent.com/Huyuwei/dc00ce83f537914c64a204133d23b019/raw/79af41e7c8ba9120ea7f35fb1d0484b65bccd54f/lenet-0010.params') +mx.gluon.utils.download('https://gist.githubusercontent.com/Huyuwei/dc00ce83f537914c64a204133d23b019/raw/79af41e7c8ba9120ea7f35fb1d0484b65bccd54f/lenet-symbol.json') +sym, arg_params, aux_params = mx.model.load_checkpoint('lenet', 10) +# get weights +weight_conv1_np = arg_params['convolution0_weight'].asnumpy() +weight_conv2_np = arg_params['convolution1_weight'].asnumpy() +weight_fc1_np = arg_params['fullyconnected0_weight'].asnumpy() +weight_fc2_np = arg_params['fullyconnected1_weight'].asnumpy() + + +# qtype1 = hcl.Fixed(16, 14) +# qtype2 = hcl.Fixed(16, 14) + +# qtype1 = hcl.Fixed(16, 12) +# qtype2 = hcl.Fixed(16, 12) + + + +correct_sum = 0 +batch_size = 1000 +mnist = mx.test_utils.get_mnist() + + +def build_lenet_inf(batch_size=batch_size, target=None): + # set up input/output placeholders + input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") + # weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) + # weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1) + # weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1) + # weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1) + weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1") + weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2") + weight_fc1 = hcl.placeholder((500, 800), "weight_fc1") + weight_fc2 = hcl.placeholder((10, 500), "weight_fc2") + lenet = hcl.placeholder((batch_size, 10), "lenet") + # create a quantization scheme + # scheme = hcl.create_scheme( + # [input_image, weight_conv1, weight_conv2, + # weight_fc1, weight_fc2, lenet], build_lenet) + # # quantize the three activation layers + # scheme.quantize( + # [build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3], qtype2) + # s = hcl.create_schedule_from_scheme(scheme) + s = hcl.create_schedule([input_image, weight_conv1, weight_conv2, weight_fc1, weight_fc2, lenet], build_lenet) + return hcl.build(s, target=target) + +code1 = build_lenet_inf(batch_size, 'merlinc') +# print (code1) +with open('merlinc_code.cl', 'w') as f: + f.write(code1) + +code2 = build_lenet_inf(batch_size, 'sdaccel') + +with open('sdaccel_code.cl', 'w') as f: + f.write(code2) + +code3 = build_lenet_inf(batch_size, 'vhls') +with open('vhls_code.cl', 'w') as f: + f.write(code3) + + + +# weight_conv1_hcl = hcl.asarray(weight_conv1_np, dtype=qtype1) +# weight_conv2_hcl = hcl.asarray(weight_conv2_np, dtype=qtype1) +# weight_fc1_hcl = hcl.asarray(weight_fc1_np, dtype=qtype1) +# weight_fc2_hcl = hcl.asarray(weight_fc2_np, dtype=qtype1) + +weight_conv1_hcl = hcl.asarray(weight_conv1_np) +weight_conv2_hcl = hcl.asarray(weight_conv2_np) +weight_fc1_hcl = hcl.asarray(weight_fc1_np) +weight_fc2_hcl = hcl.asarray(weight_fc2_np) + + +for i in range(10000 // batch_size): + label = mnist['test_label'][i*batch_size:(i+1)*batch_size] + input_image_np = mnist['test_data'][i*batch_size:(i+1)*batch_size] + input_image_hcl = hcl.asarray(input_image_np) + output_hcl = hcl.asarray(np.zeros((batch_size,10))) + f(input_image_hcl, weight_conv1_hcl, weight_conv2_hcl, + weight_fc1_hcl, weight_fc2_hcl, output_hcl) + prediction = np.argmax(output_hcl.asnumpy(), axis=1) + correct_sum += np.sum(np.equal(prediction, label)) + +print("Testing accuracy: {}".format(correct_sum / 10000.)) + diff --git a/samples/lenet/merlinc_code.cl b/samples/lenet/merlinc_code.cl new file mode 100644 index 000000000..e8a24c836 --- /dev/null +++ b/samples/lenet/merlinc_code.cl @@ -0,0 +1,155 @@ +#include +#include +#include +#pragma ACCEL kernel +void default_function(float* input_image, float* weight_conv1, float* weight_conv2, float* weight_fc1, float* weight_fc2, float* lenet) { + float conv2d[11520000]; + for (int nn = 0; nn < 1000; ++nn) { + for (int ff = 0; ff < 20; ++ff) { + for (int yy = 0; yy < 24; ++yy) { + for (int xx = 0; xx < 24; ++xx) { + float reducer0; + reducer0 = 0.000000e+00f; + for (int ra1 = 0; ra1 < 5; ++ra1) { + for (int ra2 = 0; ra2 < 5; ++ra2) { + reducer0 = ((input_image[(((xx + ra2) + ((yy + ra1) * 28)) + (nn * 784))] * weight_conv1[((ra2 + (ra1 * 5)) + (ff * 25))]) + reducer0); + } + } + conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = reducer0; + } + } + } + } + float tanh1[11520000]; + for (int args = 0; args < 1000; ++args) { + for (int args0 = 0; args0 < 20; ++args0) { + for (int args1 = 0; args1 < 24; ++args1) { + for (int args2 = 0; args2 < 24; ++args2) { + tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((float)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); + } + } + } + } + float max_pool[2880000]; + for (int i = 0; i < 1000; ++i) { + for (int c = 0; c < 20; ++c) { + for (int h = 0; h < 12; ++h) { + for (int w = 0; w < 12; ++w) { + float reducer1; + reducer1 = -1.000000e+00f; + for (int ra3 = 0; ra3 < 2; ++ra3) { + for (int ra4 = 0; ra4 < 2; ++ra4) { + reducer1 = max(tanh1[(((((w * 2) + ra4) + (((h * 2) + ra3) * 24)) + (c * 576)) + (i * 11520))], reducer1); + } + } + max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = reducer1; + } + } + } + } + float conv2d1[3200000]; + for (int nn1 = 0; nn1 < 1000; ++nn1) { + for (int ff1 = 0; ff1 < 50; ++ff1) { + for (int yy1 = 0; yy1 < 8; ++yy1) { + for (int xx1 = 0; xx1 < 8; ++xx1) { + float reducer2; + reducer2 = 0.000000e+00f; + for (int ra5 = 0; ra5 < 20; ++ra5) { + for (int ra6 = 0; ra6 < 5; ++ra6) { + for (int ra7 = 0; ra7 < 5; ++ra7) { + reducer2 = ((max_pool[((((xx1 + ra7) + ((yy1 + ra6) * 12)) + (ra5 * 144)) + (nn1 * 2880))] * weight_conv2[(((ra7 + (ra6 * 5)) + (ra5 * 25)) + (ff1 * 500))]) + reducer2); + } + } + } + conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = reducer2; + } + } + } + } + float tanh2[3200000]; + for (int args3 = 0; args3 < 1000; ++args3) { + for (int args01 = 0; args01 < 50; ++args01) { + for (int args11 = 0; args11 < 8; ++args11) { + for (int args21 = 0; args21 < 8; ++args21) { + tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((float)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); + } + } + } + } + float max_pool1[800000]; + for (int i1 = 0; i1 < 1000; ++i1) { + for (int c1 = 0; c1 < 50; ++c1) { + for (int h1 = 0; h1 < 4; ++h1) { + for (int w1 = 0; w1 < 4; ++w1) { + float reducer3; + reducer3 = -1.000000e+00f; + for (int ra8 = 0; ra8 < 2; ++ra8) { + for (int ra9 = 0; ra9 < 2; ++ra9) { + reducer3 = max(tanh2[(((((w1 * 2) + ra9) + (((h1 * 2) + ra8) * 8)) + (c1 * 64)) + (i1 * 3200))], reducer3); + } + } + max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = reducer3; + } + } + } + } + float compute0[800000]; + for (int i2 = 0; i2 < 1000; ++i2) { + for (int j = 0; j < 800; ++j) { + compute0[(j + (i2 * 800))] = max_pool1[((((((j / 4) % 4) * 4) + (j % 4)) + ((j / 16) * 16)) + (i2 * 800))]; + } + } + float dense[500000]; + for (int i3 = 0; i3 < 1000; ++i3) { + for (int j1 = 0; j1 < 500; ++j1) { + float reducer4; + reducer4 = 0.000000e+00f; + for (int ra10 = 0; ra10 < 800; ++ra10) { + reducer4 = ((compute0[(ra10 + (i3 * 800))] * weight_fc1[(ra10 + (j1 * 800))]) + reducer4); + } + dense[(j1 + (i3 * 500))] = reducer4; + } + } + float tanh3[500000]; + for (int args4 = 0; args4 < 1000; ++args4) { + for (int args02 = 0; args02 < 500; ++args02) { + tanh3[(args02 + (args4 * 500))] = ((float)tanh(((double)dense[(args02 + (args4 * 500))]))); + } + } + float dense1[10000]; + for (int i4 = 0; i4 < 1000; ++i4) { + for (int j2 = 0; j2 < 10; ++j2) { + float reducer5; + reducer5 = 0.000000e+00f; + for (int ra11 = 0; ra11 < 500; ++ra11) { + reducer5 = ((tanh3[(ra11 + (i4 * 500))] * weight_fc2[(ra11 + (j2 * 500))]) + reducer5); + } + dense1[(j2 + (i4 * 10))] = reducer5; + } + } + float compute1[1000]; + for (int i5 = 0; i5 < 1000; ++i5) { + int max; + max = 0; + for (int ra12 = 0; ra12 < 10; ++ra12) { + max = ((int)max(dense1[(ra12 + (i5 * 10))], ((float)max))); + } + compute1[i5] = ((float)max); + } + float compute2[1000]; + for (int i6 = 0; i6 < 1000; ++i6) { + int sum; + sum = 0; + for (int ra13 = 0; ra13 < 10; ++ra13) { + sum = ((int)(exp(((double)(dense1[(ra13 + (i6 * 10))] - compute1[i6]))) + ((double)sum))); + } + compute2[i6] = ((float)sum); + } + float update0; + for (int i7 = 0; i7 < 1000; ++i7) { + for (int j3 = 0; j3 < 10; ++j3) { + lenet[(j3 + (i7 * 10))] = ((float)(exp(((double)(dense1[(j3 + (i7 * 10))] - compute1[i7]))) / ((double)compute2[i7]))); + } + } +} + diff --git a/samples/lenet/sdaccel_code.cl b/samples/lenet/sdaccel_code.cl new file mode 100644 index 000000000..160ecbd18 --- /dev/null +++ b/samples/lenet/sdaccel_code.cl @@ -0,0 +1,151 @@ +__kernel void default_function(__global float* input_image, __global float* weight_conv1, __global float* weight_conv2, __global float* weight_fc1, __global float* weight_fc2, __global float* lenet) { + __local float conv2d[11520000]; + for (int nn = 0; nn < 1000; ++nn) { + for (int ff = 0; ff < 20; ++ff) { + for (int yy = 0; yy < 24; ++yy) { + for (int xx = 0; xx < 24; ++xx) { + __local float reducer6; + reducer6 = 0.000000e+00f; + for (int ra15 = 0; ra15 < 5; ++ra15) { + for (int ra16 = 0; ra16 < 5; ++ra16) { + reducer6 = ((input_image[(((xx + ra16) + ((yy + ra15) * 28)) + (nn * 784))] * weight_conv1[((ra16 + (ra15 * 5)) + (ff * 25))]) + reducer6); + } + } + conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = reducer6; + } + } + } + } + __local float tanh1[11520000]; + for (int args = 0; args < 1000; ++args) { + for (int args0 = 0; args0 < 20; ++args0) { + for (int args1 = 0; args1 < 24; ++args1) { + for (int args2 = 0; args2 < 24; ++args2) { + tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((float)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); + } + } + } + } + __local float max_pool[2880000]; + for (int i = 0; i < 1000; ++i) { + for (int c = 0; c < 20; ++c) { + for (int h = 0; h < 12; ++h) { + for (int w = 0; w < 12; ++w) { + __local float reducer7; + reducer7 = -1.000000e+00f; + for (int ra17 = 0; ra17 < 2; ++ra17) { + for (int ra18 = 0; ra18 < 2; ++ra18) { + reducer7 = max(tanh1[(((((w * 2) + ra18) + (((h * 2) + ra17) * 24)) + (c * 576)) + (i * 11520))], reducer7); + } + } + max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = reducer7; + } + } + } + } + __local float conv2d1[3200000]; + for (int nn1 = 0; nn1 < 1000; ++nn1) { + for (int ff1 = 0; ff1 < 50; ++ff1) { + for (int yy1 = 0; yy1 < 8; ++yy1) { + for (int xx1 = 0; xx1 < 8; ++xx1) { + __local float reducer8; + reducer8 = 0.000000e+00f; + for (int ra19 = 0; ra19 < 20; ++ra19) { + for (int ra20 = 0; ra20 < 5; ++ra20) { + for (int ra21 = 0; ra21 < 5; ++ra21) { + reducer8 = ((max_pool[((((xx1 + ra21) + ((yy1 + ra20) * 12)) + (ra19 * 144)) + (nn1 * 2880))] * weight_conv2[(((ra21 + (ra20 * 5)) + (ra19 * 25)) + (ff1 * 500))]) + reducer8); + } + } + } + conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = reducer8; + } + } + } + } + __local float tanh2[3200000]; + for (int args3 = 0; args3 < 1000; ++args3) { + for (int args01 = 0; args01 < 50; ++args01) { + for (int args11 = 0; args11 < 8; ++args11) { + for (int args21 = 0; args21 < 8; ++args21) { + tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((float)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); + } + } + } + } + __local float max_pool1[800000]; + for (int i1 = 0; i1 < 1000; ++i1) { + for (int c1 = 0; c1 < 50; ++c1) { + for (int h1 = 0; h1 < 4; ++h1) { + for (int w1 = 0; w1 < 4; ++w1) { + __local float reducer9; + reducer9 = -1.000000e+00f; + for (int ra22 = 0; ra22 < 2; ++ra22) { + for (int ra23 = 0; ra23 < 2; ++ra23) { + reducer9 = max(tanh2[(((((w1 * 2) + ra23) + (((h1 * 2) + ra22) * 8)) + (c1 * 64)) + (i1 * 3200))], reducer9); + } + } + max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = reducer9; + } + } + } + } + __local float compute3[800000]; + for (int i2 = 0; i2 < 1000; ++i2) { + for (int j = 0; j < 800; ++j) { + compute3[(j + (i2 * 800))] = max_pool1[((((((j / 4) % 4) * 4) + (j % 4)) + ((j / 16) * 16)) + (i2 * 800))]; + } + } + __local float dense[500000]; + for (int i3 = 0; i3 < 1000; ++i3) { + for (int j1 = 0; j1 < 500; ++j1) { + __local float reducer10; + reducer10 = 0.000000e+00f; + for (int ra24 = 0; ra24 < 800; ++ra24) { + reducer10 = ((compute3[(ra24 + (i3 * 800))] * weight_fc1[(ra24 + (j1 * 800))]) + reducer10); + } + dense[(j1 + (i3 * 500))] = reducer10; + } + } + __local float tanh3[500000]; + for (int args4 = 0; args4 < 1000; ++args4) { + for (int args02 = 0; args02 < 500; ++args02) { + tanh3[(args02 + (args4 * 500))] = ((float)tanh(((double)dense[(args02 + (args4 * 500))]))); + } + } + __local float dense1[10000]; + for (int i4 = 0; i4 < 1000; ++i4) { + for (int j2 = 0; j2 < 10; ++j2) { + __local float reducer11; + reducer11 = 0.000000e+00f; + for (int ra25 = 0; ra25 < 500; ++ra25) { + reducer11 = ((tanh3[(ra25 + (i4 * 500))] * weight_fc2[(ra25 + (j2 * 500))]) + reducer11); + } + dense1[(j2 + (i4 * 10))] = reducer11; + } + } + __local float compute4[1000]; + for (int i5 = 0; i5 < 1000; ++i5) { + __local int max; + max = 0; + for (int ra26 = 0; ra26 < 10; ++ra26) { + max = ((int)max(dense1[(ra26 + (i5 * 10))], ((float)max))); + } + compute4[i5] = ((float)max); + } + __local float compute5[1000]; + for (int i6 = 0; i6 < 1000; ++i6) { + __local int sum; + sum = 0; + for (int ra27 = 0; ra27 < 10; ++ra27) { + sum = ((int)(exp(((double)(dense1[(ra27 + (i6 * 10))] - compute4[i6]))) + ((double)sum))); + } + compute5[i6] = ((float)sum); + } + __local float update1; + for (int i7 = 0; i7 < 1000; ++i7) { + for (int j3 = 0; j3 < 10; ++j3) { + lenet[(j3 + (i7 * 10))] = ((float)(exp(((double)(dense1[(j3 + (i7 * 10))] - compute4[i7]))) / ((double)compute5[i7]))); + } + } +} + diff --git a/samples/lenet/vhls_code.cl b/samples/lenet/vhls_code.cl new file mode 100644 index 000000000..95e8313b4 --- /dev/null +++ b/samples/lenet/vhls_code.cl @@ -0,0 +1,155 @@ +#include +#include +#include + +void default_function(float input_image[1000][1][28][28], float weight_conv1[20][1][5][5], float weight_conv2[50][20][5][5], float weight_fc1[500][800], float weight_fc2[10][500], float lenet[1000][10]) { + float conv2d[1000][20][24][24]; + for (ap_int<32> nn = 0; nn < 1000; ++nn) { + for (ap_int<32> ff = 0; ff < 20; ++ff) { + for (ap_int<32> yy = 0; yy < 24; ++yy) { + for (ap_int<32> xx = 0; xx < 24; ++xx) { + float reducer12; + reducer12 = 0.000000e+00f; + for (ap_int<32> ra29 = 0; ra29 < 5; ++ra29) { + for (ap_int<32> ra30 = 0; ra30 < 5; ++ra30) { + reducer12 = ((input_image[nn][0][(yy + ra29)][(xx + ra30)] * weight_conv1[ff][0][ra29][ra30]) + reducer12); + } + } + conv2d[nn][ff][yy][xx] = reducer12; + } + } + } + } + float tanh1[1000][20][24][24]; + for (ap_int<32> args = 0; args < 1000; ++args) { + for (ap_int<32> args0 = 0; args0 < 20; ++args0) { + for (ap_int<32> args1 = 0; args1 < 24; ++args1) { + for (ap_int<32> args2 = 0; args2 < 24; ++args2) { + tanh1[args][args0][args1][args2] = ((float)tanh(((double)conv2d[args][args0][args1][args2]))); + } + } + } + } + float max_pool[1000][20][12][12]; + for (ap_int<32> i = 0; i < 1000; ++i) { + for (ap_int<32> c = 0; c < 20; ++c) { + for (ap_int<32> h = 0; h < 12; ++h) { + for (ap_int<32> w = 0; w < 12; ++w) { + float reducer13; + reducer13 = -1.000000e+00f; + for (ap_int<32> ra31 = 0; ra31 < 2; ++ra31) { + for (ap_int<32> ra32 = 0; ra32 < 2; ++ra32) { + reducer13 = std::max(tanh1[i][c][((h * 2) + ra31)][((w * 2) + ra32)], reducer13); + } + } + max_pool[i][c][h][w] = reducer13; + } + } + } + } + float conv2d1[1000][50][8][8]; + for (ap_int<32> nn1 = 0; nn1 < 1000; ++nn1) { + for (ap_int<32> ff1 = 0; ff1 < 50; ++ff1) { + for (ap_int<32> yy1 = 0; yy1 < 8; ++yy1) { + for (ap_int<32> xx1 = 0; xx1 < 8; ++xx1) { + float reducer14; + reducer14 = 0.000000e+00f; + for (ap_int<32> ra33 = 0; ra33 < 20; ++ra33) { + for (ap_int<32> ra34 = 0; ra34 < 5; ++ra34) { + for (ap_int<32> ra35 = 0; ra35 < 5; ++ra35) { + reducer14 = ((max_pool[nn1][ra33][(yy1 + ra34)][(xx1 + ra35)] * weight_conv2[ff1][ra33][ra34][ra35]) + reducer14); + } + } + } + conv2d1[nn1][ff1][yy1][xx1] = reducer14; + } + } + } + } + float tanh2[1000][50][8][8]; + for (ap_int<32> args3 = 0; args3 < 1000; ++args3) { + for (ap_int<32> args01 = 0; args01 < 50; ++args01) { + for (ap_int<32> args11 = 0; args11 < 8; ++args11) { + for (ap_int<32> args21 = 0; args21 < 8; ++args21) { + tanh2[args3][args01][args11][args21] = ((float)tanh(((double)conv2d1[args3][args01][args11][args21]))); + } + } + } + } + float max_pool1[1000][50][4][4]; + for (ap_int<32> i1 = 0; i1 < 1000; ++i1) { + for (ap_int<32> c1 = 0; c1 < 50; ++c1) { + for (ap_int<32> h1 = 0; h1 < 4; ++h1) { + for (ap_int<32> w1 = 0; w1 < 4; ++w1) { + float reducer15; + reducer15 = -1.000000e+00f; + for (ap_int<32> ra36 = 0; ra36 < 2; ++ra36) { + for (ap_int<32> ra37 = 0; ra37 < 2; ++ra37) { + reducer15 = std::max(tanh2[i1][c1][((h1 * 2) + ra36)][((w1 * 2) + ra37)], reducer15); + } + } + max_pool1[i1][c1][h1][w1] = reducer15; + } + } + } + } + float compute6[1000][800]; + for (ap_int<32> i2 = 0; i2 < 1000; ++i2) { + for (ap_int<32> j = 0; j < 800; ++j) { + compute6[i2][j] = max_pool1[i2][(j / 16)][((j / 4) % 4)][(j % 4)]; + } + } + float dense[1000][500]; + for (ap_int<32> i3 = 0; i3 < 1000; ++i3) { + for (ap_int<32> j1 = 0; j1 < 500; ++j1) { + float reducer16; + reducer16 = 0.000000e+00f; + for (ap_int<32> ra38 = 0; ra38 < 800; ++ra38) { + reducer16 = ((compute6[i3][ra38] * weight_fc1[j1][ra38]) + reducer16); + } + dense[i3][j1] = reducer16; + } + } + float tanh3[1000][500]; + for (ap_int<32> args4 = 0; args4 < 1000; ++args4) { + for (ap_int<32> args02 = 0; args02 < 500; ++args02) { + tanh3[args4][args02] = ((float)tanh(((double)dense[args4][args02]))); + } + } + float dense1[1000][10]; + for (ap_int<32> i4 = 0; i4 < 1000; ++i4) { + for (ap_int<32> j2 = 0; j2 < 10; ++j2) { + float reducer17; + reducer17 = 0.000000e+00f; + for (ap_int<32> ra39 = 0; ra39 < 500; ++ra39) { + reducer17 = ((tanh3[i4][ra39] * weight_fc2[j2][ra39]) + reducer17); + } + dense1[i4][j2] = reducer17; + } + } + float compute7[1000]; + for (ap_int<32> i5 = 0; i5 < 1000; ++i5) { + ap_int<32> max; + max = 0; + for (ap_int<32> ra40 = 0; ra40 < 10; ++ra40) { + max = ((ap_int<32>)std::max(dense1[i5][ra40], ((float)max))); + } + compute7[i5] = ((float)max); + } + float compute8[1000]; + for (ap_int<32> i6 = 0; i6 < 1000; ++i6) { + ap_int<32> sum; + sum = 0; + for (ap_int<32> ra41 = 0; ra41 < 10; ++ra41) { + sum = ((ap_int<32>)(exp(((double)(dense1[i6][ra41] - compute7[i6]))) + ((double)sum))); + } + compute8[i6] = ((float)sum); + } + float update2; + for (ap_int<32> i7 = 0; i7 < 1000; ++i7) { + for (ap_int<32> j3 = 0; j3 < 10; ++j3) { + lenet[i7][j3] = ((float)(exp(((double)(dense1[i7][j3] - compute7[i7]))) / ((double)compute8[i7]))); + } + } +} + diff --git a/samples/smith_waterman/merlinc_code.cl b/samples/smith_waterman/merlinc_code.cl new file mode 100644 index 000000000..c3a347f35 --- /dev/null +++ b/samples/smith_waterman/merlinc_code.cl @@ -0,0 +1,146 @@ +#include +#include +#include +#pragma ACCEL kernel +void default_function(unsigned char* seqAs, unsigned char* seqBs, unsigned char* outAs, unsigned char* outBs) { + int B; +#pragma ACCEL pipeline + for (int t_outer = 0; t_outer < 32; ++t_outer) { +#pragma ACCEL parallel + for (int t_inner = 0; t_inner < 32; ++t_inner) { + int maxtrix_max; + maxtrix_max = 0; + int i_max; + i_max = 0; + int j_max; + j_max = 0; + short matrix[16641]; + for (int x = 0; x < 129; ++x) { + for (int y = 0; y < 129; ++y) { + matrix[(y + (x * 129))] = (short)0; + } + } + short action[16641]; + for (int x1 = 0; x1 < 129; ++x1) { + for (int y1 = 0; y1 < 129; ++y1) { + action[(y1 + (x1 * 129))] = (short)3; + } + } + int mutate3; + for (int i = 0; i < 129; ++i) { + for (int j = 0; j < 129; ++j) { + int trace_back[4]; + for (int x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((int)(((long)matrix[((j + (i * 129)) + -130)]) + ((long)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[3] = 0; + int max; + max = trace_back[0]; + int act; + act = 0; + for (int i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[(j + (i * 129))] = ((short)max); + action[(j + (i * 129))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { + maxtrix_max = ((int)matrix[(j + (i * 129))]); + i_max = i; + j_max = j; + } + } + } + } + int T; + int curr_i; + curr_i = i_max; + int curr_j; + curr_j = j_max; + int next_i; + next_i = 0; + int next_j; + next_j = 0; + int act1; + act1 = ((int)action[(curr_j + (curr_i * 129))]); + int next_i1; + next_i1 = 0; + int next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + int tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + int a; + a = 0; + int b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)b); + curr_i = next_i; + curr_j = next_j; + int act2; + act2 = ((int)action[(curr_j + (curr_i * 129))]); + int next_i2; + next_i2 = 0; + int next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/samples/smith_waterman/sdaccel_code.cl b/samples/smith_waterman/sdaccel_code.cl new file mode 100644 index 000000000..d71ee9230 --- /dev/null +++ b/samples/smith_waterman/sdaccel_code.cl @@ -0,0 +1,141 @@ +__kernel void default_function(__global unsigned char* seqAs, __global unsigned char* seqBs, __global unsigned char* outAs, __global unsigned char* outBs) { + __local int B; + __attribute__((xcl_pipeline_loop(1))) + for (int t_outer = 0; t_outer < 32; ++t_outer) { + for (int t_inner = 0; t_inner < 32; ++t_inner) { + __local int maxtrix_max; + maxtrix_max = 0; + __local int i_max; + i_max = 0; + __local int j_max; + j_max = 0; + __local short matrix[16641]; + for (int x = 0; x < 129; ++x) { + for (int y = 0; y < 129; ++y) { + matrix[(y + (x * 129))] = (short)0; + } + } + __local short action[16641]; + for (int x1 = 0; x1 < 129; ++x1) { + for (int y1 = 0; y1 < 129; ++y1) { + action[(y1 + (x1 * 129))] = (short)3; + } + } + __local int mutate1; + for (int i = 0; i < 129; ++i) { + for (int j = 0; j < 129; ++j) { + __local int trace_back[4]; + for (int x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((int)(((long)matrix[((j + (i * 129)) + -130)]) + ((long)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[3] = 0; + __local int max; + max = trace_back[0]; + __local int act; + act = 0; + for (int i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[(j + (i * 129))] = ((short)max); + action[(j + (i * 129))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { + maxtrix_max = ((int)matrix[(j + (i * 129))]); + i_max = i; + j_max = j; + } + } + } + } + __local int T; + __local int curr_i; + curr_i = i_max; + __local int curr_j; + curr_j = j_max; + __local int next_i; + next_i = 0; + __local int next_j; + next_j = 0; + __local int act1; + act1 = ((int)action[(curr_j + (curr_i * 129))]); + __local int next_i1; + next_i1 = 0; + __local int next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + __local int tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + __local int a; + a = 0; + __local int b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)b); + curr_i = next_i; + curr_j = next_j; + __local int act2; + act2 = ((int)action[(curr_j + (curr_i * 129))]); + __local int next_i2; + next_i2 = 0; + __local int next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/samples/smith_waterman/smith_waterman_main.py b/samples/smith_waterman/smith_waterman_main.py index fb947ff78..6515863dd 100644 --- a/samples/smith_waterman/smith_waterman_main.py +++ b/samples/smith_waterman/smith_waterman_main.py @@ -118,6 +118,11 @@ def batch_sw(seqAs, seqBs, outAs, outBs): outAs = hcl.placeholder((num, lenA+lenB), "outAs", dtype) outBs = hcl.placeholder((num, lenA+lenB), "outBs", dtype) + # seqAs = hcl.placeholder((num, lenA), "seqAs") + # seqBs = hcl.placeholder((num, lenB,), "seqBs") + # outAs = hcl.placeholder((num, lenA+lenB), "outAs") + # outBs = hcl.placeholder((num, lenA+lenB), "outBs") + scheme = hcl.create_scheme([seqAs, seqBs, outAs, outBs], batch_sw) scheme.downsize([batch_sw.B.matrix, batch_sw.B.action], mtype) s = hcl.create_schedule_from_scheme(scheme) @@ -133,7 +138,24 @@ def batch_sw(seqAs, seqBs, outAs, outBs): _consA = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) _consB = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) -f = top() +# _seqA = hcl.asarray(np.random.randint(1, 5, size=(num, lenA))) +# _seqB = hcl.asarray(np.random.randint(1, 5, size=(num, lenB))) +# _consA = hcl.asarray(np.zeros((num, (lenA + lenB)))) +# _consB = hcl.asarray(np.zeros((num, (lenA + lenB)))) + + + +# f = top() +code = top('sdaccel'); +with open('sdaccel_code.cl', 'w') as f: + f.write(code) + +code2 = top('merlinc') +with open('merlinc_code.cl', 'w') as f: + f.write(code2) + + + start = time.time() f(_seqA, _seqB, _consA, _consB) total_time = time.time() - start @@ -149,6 +171,13 @@ def batch_sw(seqAs, seqBs, outAs, outBs): _seqB = hcl.asarray(_seqB_np, dtype) _consA = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) _consB = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) + +# _seqA = hcl.asarray(_seqA_np) +# _seqB = hcl.asarray(_seqB_np) +# _consA = hcl.asarray(np.zeros((num, (lenA + lenB)))) +# _consB = hcl.asarray(np.zeros((num, (lenA + lenB)))) + + f(_seqA, _seqB, _consA, _consB) _consA_np = _consA.asnumpy() _consB_np = _consB.asnumpy() diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 40a3034d0..e40599c10 100755 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -60,33 +60,78 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, } +// void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) +// int lanes = t.lanes(); +// if (t.is_handle()) { +// CHECK_EQ(lanes, 1) +// << "do not yet support vector types"; +// os << "void*"; return; +// } +// if ( t== Bool() ) { +// os << "bool"; return; +// } +// bool fail = false; +// if (t.is_float()) { +// switch (t.bits()) { +// case 16: +// os << "half"; +// enable_fp16_ = true; +// break; +// case 32: +// os << "float"; +// break; +// case 64: +// os << "double"; +// enable_fp64_ = true; +// break; +// default: +// fail = true; +// break; +// } +// if (!fail && lanes == 1) return; +// if (!fail && (lanes >= 2 && lanes <= 16)) { +// os << lanes; return; +// } +// } else if (t.is_uint() || t.is_int()) { +// if (t.is_uint()) { +// os << 'u'; +// } +// if (t.bits() == 8 && t.lanes() == 4) { +// // directly 4 8 bit int in integer. +// os << "int"; return; +// } +// switch (t.bits()) { +// case 8: os << "char"; break; +// case 16: os << "short"; break; +// case 32: os << "int"; break; +// case 64: os << "long"; break; +// case 1: os << "int"; break; +// default: fail = true; break; +// } +// if (!fail && lanes == 1) return; +// if (!fail && (lanes >= 2 && lanes <= 16)) { +// os << lanes; return; +// } +// } +// LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; +// } + + void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { - CHECK_EQ(lanes, 1) - << "do not yet support vector types"; - os << "void*"; return; - } - if ( t== Bool() ) { - os << "bool"; return; + //LOG(FATAL) << "The buffer shouldn't call PrintType for printing type"; + os << "void*"; + return ; } bool fail = false; if (t.is_float()) { switch (t.bits()) { - case 16: - os << "half"; - enable_fp16_ = true; - break; - case 32: - os << "float"; - break; - case 64: - os << "double"; - enable_fp64_ = true; - break; - default: - fail = true; - break; + case 16: os << "half"; break; + case 32: os << "float"; break; + case 64: os << "double"; break; + // case 128: os << "double double"; break; + default: fail = true; break; } if (!fail && lanes == 1) return; if (!fail && (lanes >= 2 && lanes <= 16)) { @@ -94,29 +139,42 @@ void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) } } else if (t.is_uint() || t.is_int()) { if (t.is_uint()) { - os << 'u'; + os << "unsigned "; } if (t.bits() == 8 && t.lanes() == 4) { // directly 4 8 bit int in integer. os << "int"; return; } - switch (t.bits()) { + + int target_bit = 1; + while (target_bit < t.bits()) + target_bit <<= 1; + + switch (target_bit) { + case 1: os << "int"; break; + case 2: os << "char"; break; + case 4: os << "char"; break; case 8: os << "char"; break; case 16: os << "short"; break; case 32: os << "int"; break; case 64: os << "long"; break; - case 1: os << "int"; break; + case 128: os << "long"; break; // FIXME: Should use long long default: fail = true; break; } if (!fail && lanes == 1) return; - if (!fail && (lanes >= 2 && lanes <= 16)) { - os << lanes; return; - } + // FIXME: Not yet support multiple lanes + //if (!fail && (lanes >= 2 && lanes <= 16)) { + // os << lanes; return; + //} } - LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; + os << t; + LOG(WARNING) << "Cannot convert type " << t ; + return ; } + + void CodeGenSDACCEL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global" || scope == "shared") { From 4ebdfb7215c3dd61e35ec49bc6d891dc7c1c5dd0 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Mon, 2 Sep 2019 14:35:19 -0400 Subject: [PATCH 057/103] add makefile for SDAccel_runtime --- samples/kmeans/kmeans_main.py | 18 ++++++------- tvm/Makefile | 4 +-- tvm/src/codegen/hlsc/vhls_module.cc | 4 +-- tvm/src/codegen/opencl/build_opencl.cc | 25 +++++++++++++++++++ .../codegen/opencl/sdaccel/sdaccel_module.cc | 14 +++-------- .../codegen/opencl/sdaccel/sdaccel_module.h | 7 ------ 6 files changed, 41 insertions(+), 31 deletions(-) diff --git a/samples/kmeans/kmeans_main.py b/samples/kmeans/kmeans_main.py index ed4af0d5f..eb11f2fde 100644 --- a/samples/kmeans/kmeans_main.py +++ b/samples/kmeans/kmeans_main.py @@ -68,15 +68,15 @@ def calc_sum(n): return hcl.build(s, target=target) f = top() -code = top('merlinc') -with open('merlinc_code.cl', 'w') as f: - f.write(code) -code2 = top('sdaccel') -with open('sdaccel_code.cl', 'w') as f: - f.write(code2) -code3 = top('vhls') -with open('vhls_code.cl', 'w') as f: - f.write(code3) +# code = top('merlinc') +# with open('merlinc_code.cl', 'w') as f: +# f.write(code) +# code2 = top('sdaccel') +# with open('sdaccel_code.cl', 'w') as f: +# f.write(code2) +# code3 = top('vhls') +# with open('vhls_code.cl', 'w') as f: +# f.write(code3) points_np = np.random.randint(100, size=(N, dim)) labels_np = np.zeros(N) diff --git a/tvm/Makefile b/tvm/Makefile index d6f11ca12..1b2030645 100644 --- a/tvm/Makefile +++ b/tvm/Makefile @@ -127,9 +127,9 @@ else endif ifeq ($(USE_SDACCEL_HLS), 1) - CFLAGS += -DOPENCL_SDACCEL_RUNTIME=1 + CFLAGS += -DHCL_SDACCEL_RUNTIME=1 else - CFLAGS += -DOPENCL_SDACCEL_RUNTIME=0 + CFLAGS += -DHCL_SDACCEL_RUNTIME=0 endif diff --git a/tvm/src/codegen/hlsc/vhls_module.cc b/tvm/src/codegen/hlsc/vhls_module.cc index 1addcf58b..c5f004a93 100644 --- a/tvm/src/codegen/hlsc/vhls_module.cc +++ b/tvm/src/codegen/hlsc/vhls_module.cc @@ -349,7 +349,7 @@ class VivadoHLSModuleNode final : public ModuleNode { LOG(CLEAN) << "Running C simulation ..."; system("./out"); LOG(CLEAN) << "Finished C simulation"; - system("rm out main.cpp"); + // system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); }); } @@ -370,4 +370,4 @@ Module CreateVivadoHLSModule( } } // namespace runtime -} // namespace TVM +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 251878db5..d882bb1e3 100755 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -24,6 +24,31 @@ namespace TVM { namespace codegen { +#if HCL_SDACCEL_RUNTIME +runtime::Module BuildSDAccelSim(Array funcs) { + CodeAnalysOpenCLC ca; + CodeGenSDACCEL cg; + for (LoweredFunc f : funcs) { + // 1st pass: Analyze AST and collect necessary information + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); + // 2nd pass: Generate kernel code + cg.AddFunction(f, map_arg_type); + } + std::string code = cg.Finish(); + + return runtime::CreateSDAccelModule(funcs[0], code); +} + +TVM_REGISTER_API("codegen.sdaccel_sw_emu") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildSDAccelSim(args[0]); + }); +#endif + + + diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc index 8a640e556..066e1602e 100755 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc @@ -1,10 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-30 15:15:28 - * @LastEditTime: 2019-08-14 16:16:03 - * @LastEditors: Please set LastEditors - */ /* Yang.Bai yb269@cornell.edu @@ -12,8 +5,8 @@ # include "./sdaccel_module.h" # include # include -# include -# include +// # include +// # include # include namespace TVM { @@ -309,9 +302,8 @@ class SDAccelModuleNode final : public ModuleNode { LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; system("make -f sdaccel.mk run_cpu_em"); - // system("./out"); LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; - system("make -f sdaccel.mk clean"); + system("make -f sdaccel.mk cleanall"); FreeSharedMem(args, shmids, arg_sizes); }); } diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h index 25db653b9..cfdf8cadd 100755 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h +++ b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h @@ -1,10 +1,3 @@ -/* - * @Description: In User Settings Edit - * @Author: your name - * @Date: 2019-07-30 15:15:15 - * @LastEditTime: 2019-07-30 15:15:15 - * @LastEditors: your name - */ /* Yang.Bai yb269@cornell.edu From 05ff646a8b98b703b6972a7b2dd38681db6bd74a Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Tue, 3 Sep 2019 17:13:56 -0400 Subject: [PATCH 058/103] add the runtime for sdaccel --- .../digitrec/digitrec/data/testing_set.dat | 180 ++ .../digitrec/digitrec/data/training_set_0.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_1.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_2.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_3.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_4.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_5.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_6.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_7.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_8.dat | 1800 +++++++++++++++++ .../digitrec/digitrec/data/training_set_9.dat | 1800 +++++++++++++++++ samples/digitrec/digitrec/hcl_code_dig.py | 150 ++ samples/gemm/gemm_main.py | 7 +- samples/gemm/gemm_sdaccel.py | 7 + samples/gemm/gemm_vhls.py | 6 +- samples/smith_waterman/smith_waterman_main.py | 16 +- tvm/src/codegen/hlsc/build_hlsc.cc | 2 +- tvm/src/codegen/hlsc/vhls_module.cc | 4 +- tvm/src/codegen/opencl/aocl/aocl_module.cc | 0 tvm/src/codegen/opencl/aocl/aocl_module.h | 0 tvm/src/codegen/opencl/build_opencl.cc | 33 +- tvm/src/codegen/opencl/common/common.mk | 150 ++ tvm/src/codegen/opencl/sdaccel.mk | 84 + .../codegen/opencl/sdaccel/sdaccel_module.cc | 328 --- .../codegen/opencl/sdaccel/sdaccel_module.h | 23 - tvm/src/codegen/opencl/sdaccel_module.cc | 408 ++++ tvm/src/codegen/opencl/sdaccel_module.h | 23 + 27 files changed, 19029 insertions(+), 392 deletions(-) create mode 100644 samples/digitrec/digitrec/data/testing_set.dat create mode 100644 samples/digitrec/digitrec/data/training_set_0.dat create mode 100644 samples/digitrec/digitrec/data/training_set_1.dat create mode 100644 samples/digitrec/digitrec/data/training_set_2.dat create mode 100644 samples/digitrec/digitrec/data/training_set_3.dat create mode 100644 samples/digitrec/digitrec/data/training_set_4.dat create mode 100644 samples/digitrec/digitrec/data/training_set_5.dat create mode 100644 samples/digitrec/digitrec/data/training_set_6.dat create mode 100644 samples/digitrec/digitrec/data/training_set_7.dat create mode 100644 samples/digitrec/digitrec/data/training_set_8.dat create mode 100644 samples/digitrec/digitrec/data/training_set_9.dat create mode 100644 samples/digitrec/digitrec/hcl_code_dig.py create mode 100644 samples/gemm/gemm_sdaccel.py delete mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.cc delete mode 100755 tvm/src/codegen/opencl/aocl/aocl_module.h create mode 100755 tvm/src/codegen/opencl/common/common.mk create mode 100755 tvm/src/codegen/opencl/sdaccel.mk delete mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc delete mode 100755 tvm/src/codegen/opencl/sdaccel/sdaccel_module.h create mode 100644 tvm/src/codegen/opencl/sdaccel_module.cc create mode 100644 tvm/src/codegen/opencl/sdaccel_module.h diff --git a/samples/digitrec/digitrec/data/testing_set.dat b/samples/digitrec/digitrec/data/testing_set.dat new file mode 100644 index 000000000..df55b0afb --- /dev/null +++ b/samples/digitrec/digitrec/data/testing_set.dat @@ -0,0 +1,180 @@ +0x3041060800,1 +0x4081020400,1 +0x4081020400,1 +0x2041020400,1 +0x4081020600,1 +0x6081020c00,1 +0x20c1020800,1 +0x4081020400,1 +0x6081020400,1 +0x2081020400,1 +0xc081830600,1 +0x1061861800,1 +0x20c1060c00,1 +0x4081020400,1 +0x20c1020400,1 +0x20c1020400,1 +0x4081020400,1 +0x3041860800,1 +0x20c1860800,1 +0x40c1020400,1 +0x20c1060800,1 +0x2041020c00,1 +0x2041860c00,1 +0x4183070300,1 +0xe1c1860f00,2 +0xe041060f00,2 +0xc3c1060e00,2 +0x8181e70c00,2 +0x8181078000,2 +0x181078000,2 +0xc041078000,2 +0xe060c7be00,2 +0xe040820c00,2 +0x3c7cb8000,2 +0xe260870f00,2 +0x20e041060f00,2 +0x30e78f0a00,2 +0xc040870c00,2 +0xf1618f9800,2 +0xe040831e00,2 +0x20c1060200,2 +0x6043870a00,2 +0x61e0878c00,2 +0x6141870200,2 +0xc08207c000,2 +0xe041040e00,2 +0xe0c10f9f00,2 +0x70608f1a00,2 +0x20a041079e00,2 +0xe061879e00,2 +0x1e0c3060f80,2 +0xe041020e00,2 +0x10f0208f1800,2 +0xe0c307c000,2 +0x20e041040e00,2 +0xc0c1860300,2 +0x60434b0e00,2 +0x70e18f9c00,2 +0x41c18f8900,2 +0x105126cb1e00,2 +0x2070618e1f00,2 +0xe06381111c,3 +0xe0c1c31c00,3 +0x1c3c1858f00,3 +0xe143c19e00,3 +0xe0c1c19e00,3 +0x40c3810a00,3 +0x60c3030a08,3 +0xe041811e10,3 +0xe0c3010e00,3 +0x6043811c00,3 +0x1c083808e00,3 +0xe081811e00,3 +0xe041811e00,3 +0xc081010400,3 +0xf0c0cf9f00,3 +0xe1c3c19e00,3 +0x6080810c00,3 +0x1f063809e00,3 +0x10142878100,4 +0x91a3c90400,4 +0x11438a0400,4 +0x8147878200,4 +0x21e48f0600,4 +0x9143c70408,4 +0x8363830600,4 +0x11238a0000,4 +0x8163830608,4 +0x3147c70400,4 +0x8142830400,4 +0x2347830304,4 +0x1162870400,4 +0x1326cf0200,4 +0x9648e0800,4 +0x10a38b0400,4 +0x1167860800,4 +0x41c3830408,4 +0x10364cf8300,4 +0x41a7860800,4 +0x1167820c10,4 +0xa1448f8200,4 +0xc2878200,4 +0x1a7c70408,4 +0x51c68f8600,4 +0x11e7cd0200,4 +0x7103810600,5 +0xf303c68700,5 +0xe303c48f00,5 +0x23e6070608,5 +0xf3020800,5 +0xf183899e00,5 +0x1e3020c00,5 +0x31e30b1c00,5 +0x1e30f0c00,5 +0x6183030e00,5 +0x30c1021800,5 +0x6081810c00,5 +0xc2020c00,5 +0x3183811e00,5 +0xc103808e00,5 +0x71e3851e00,5 +0x7181830c00,5 +0x1071038d0e00,5 +0x6083c78c00,6 +0x408103870000,6 +0x106103078700,6 +0x41c3870e00,6 +0xc107cf8e00,6 +0x6083870400,6 +0x2081060400,6 +0x18387c6c700,6 +0x20c107c78400,6 +0x3c0820408,7 +0x41c0820410,7 +0x23e081060c,7 +0x83c0830608,7 +0xe0820c10,7 +0x1c0810200,7 +0xc1c3830408,7 +0x1e6830c10,7 +0x41c0830408,7 +0x3e4c30c10,7 +0x71c1820820,7 +0x3e4c10204,7 +0xe1c1e71810,7 +0x3c1830608,7 +0x3c4810608,7 +0x41e0830c10,7 +0xe160820820,7 +0xe043860408,7 +0x23c0830408,7 +0x21e0820810,7 +0x1c3c0820408,7 +0x3c0810608,7 +0x3e7c10608,7 +0x3160820c10,7 +0x81c1830600,7 +0x43c0830408,7 +0xc3c0830408,7 +0x6123850e00,8 +0x41c1060c00,8 +0x3c7810204,9 +0x41c3810204,9 +0x6166c70600,9 +0xc103810204,9 +0xe3664d8e00,0 +0xe3e64d8e00,0 +0x6142850c00,0 +0x30a2c91c00,0 +0xe3e7488f00,0 +0x61a24c8e00,0 +0x31e64f0c00,0 +0x60c2850e00,0 +0x30f24d1c00,0 +0x61e2c58e00,0 +0xe3e6cf9e00,0 +0x60c2850c00,0 +0x71a2499c00,0 +0x71a24c9a00,0 +0x31e6cf8c00,0 diff --git a/samples/digitrec/digitrec/data/training_set_0.dat b/samples/digitrec/digitrec/data/training_set_0.dat new file mode 100644 index 000000000..b408c8fff --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_0.dat @@ -0,0 +1,1800 @@ +0x30e3cd1c00, +0x61e2cd0e00, +0x31a6491c00, +0x6142cc8e00, +0x71a6c91c00, +0xe1264d8e00, +0x61a2450e00, +0x61e76f8f00, +0xe1224c8e00, +0x20e24f9c00, +0xe162c70c00, +0x71e7cd9e00, +0x1e6698800, +0x61e64d9e08, +0xc3e44c8e00, +0x60e2450c00, +0x61e64d8e00, +0x71e64d8e00, +0x7164cf8e00, +0x20e2cf1800, +0x71a64f8e00, +0x21e2891e00, +0x60e2450e00, +0xc162470c00, +0xc3e44c8f00, +0x30e3491c00, +0x61e2c50e00, +0xe142850e00, +0x6142850e00, +0x1e4298c00, +0x4164498400, +0x6122458c00, +0xe1c2448e08, +0x71a64b1c00, +0x6142c50e00, +0x71a6499e00, +0x30e34f9c00, +0xe364489e00, +0xe3644d8e00, +0x61e24d8e00, +0xe1e64d8e00, +0x71f6499e00, +0xe1e2cd8e00, +0xe1e24c8f08, +0x61644d8e00, +0x61c6898e08, +0x71a4491c00, +0x30a2850c00, +0x71a6499c00, +0x61e4488f00, +0x20e2491c00, +0x71e64f9c00, +0x61c2850c00, +0x6164491c00, +0x71e74b9e10, +0x71224d8e00, +0xe3e4488f00, +0x20c2850c00, +0x20e34f0000, +0x30a2451c00, +0x6126488e00, +0x61a6488e00, +0x60e3470c00, +0xe166448f00, +0x20c3850c00, +0xe1e24d8e00, +0x1e6498e00, +0x21a4688e00, +0x20e2cd1c00, +0xf3e4499e08, +0x6142c50e00, +0xe144488e00, +0x60c3850c00, +0xf3e6cf9f00, +0x61c2448a08, +0x71e6489e00, +0x41c2448e00, +0xe3e74f8f00, +0x61e74d8f00, +0xe3264c8f00, +0x31a24c8e00, +0x61e2cd0e00, +0x11e64f8c00, +0x3e7c78600, +0x20e3850c00, +0x71a2491c00, +0x71e3c70c00, +0x6166489e00, +0xc142c58e00, +0x61e6c99e00, +0x61a64d8e00, +0x61a2c91c00, +0x6224488e00, +0x30e34f1c00, +0x71a6491c00, +0x61e64d9e00, +0x60c2850c00, +0xf1664d8e00, +0x1071264f8e00, +0x4144499c00, +0xe146cd8e00, +0x30e2491c00, +0x61c2cd0e00, +0x4142850e00, +0xe166458e00, +0x61e64f8c00, +0x6126448e00, +0x61e7cf9e00, +0x41c2c48600, +0x20a6698c00, +0x61e7cf8e00, +0x71e64f9e00, +0xf324498e00, +0x1071a24f1c00, +0x61c2448e00, +0xe2a4688f00, +0x61e6499e00, +0x4142850e08, +0xe124488f00, +0x30a24f9c00, +0x71e6cd1c10, +0xe1e2cd8e08, +0xc1224d8e00, +0x6124489e00, +0x10a2499c00, +0xa126458e00, +0xf324689e00, +0x71a2491c00, +0x61e2cd0e00, +0x61c2cd0e00, +0x61c2850c00, +0x41e66ccf00, +0x61a2cf0c00, +0x71a24d9e00, +0xe3a4499e00, +0xe64f0000, +0xe142870e00, +0x60c2850c00, +0x61a2450c00, +0x6124689e00, +0x6146890e00, +0x30e2cb1c00, +0x61e2499e10, +0x20e3850c00, +0x61e6499e00, +0x81e4498c00, +0x71a2450c00, +0x61e64d9e00, +0xe1624d8e00, +0x71e64f9c00, +0xc162458e00, +0x61e2c58e00, +0x71e28f1c00, +0x61a2491e00, +0x30a2850c00, +0x51e2489c00, +0x43e66f8e00, +0x71e7cf1c00, +0x60e2850c00, +0x71e4499e00, +0x71e34f9e00, +0xe144488e00, +0xe3e46f8e00, +0x61e6cf1c10, +0x61c2c50e00, +0x70e3cd0c00, +0x61428d0e00, +0xe122c70c00, +0xc1c4c98e00, +0xe124448e00, +0x71e64d9e00, +0x60e34d0c00, +0x61e7c48f00, +0xe164489f00, +0x61e6c99e00, +0x61a6699e00, +0x61e6c91e00, +0x61e24d9e00, +0x61c2448e00, +0x61e2cf8e00, +0x61e2499e00, +0xe1664c8e00, +0x30a2491c00, +0xf2a42c8f08, +0x366cf8000, +0x41e4488e00, +0x61e64b9c00, +0xe1e6cd9e00, +0x6162850c00, +0xe1e6cd8f08, +0x1e46f8e00, +0x61a3468e00, +0x41c2850e00, +0x6324488e00, +0x71e4c99e00, +0x6162448e00, +0x61c3c78e00, +0x61e34d9e00, +0xe1264d9e00, +0x71a2491c00, +0xe3e64c8f00, +0x30e2cf1c00, +0xe162448e00, +0xc142448e00, +0x61e2450e00, +0x61e6cd8e00, +0x61a24d9e00, +0x6142cd0e00, +0x21e64f1800, +0xe364488e00, +0x1e64f8400, +0x1e3e44d8e00, +0x61c2850e00, +0x71e2cf1c00, +0x21e24d9c00, +0x60c2850e00, +0x6142850e00, +0x61a6498e00, +0x61c2448e00, +0x61a4488e00, +0x61e4488f00, +0x71a6499e00, +0x61c2c50c00, +0x71c4c98e00, +0x6124499e00, +0x71e24d1c00, +0x61e64c8f08, +0x41e4488e00, +0xe146448e00, +0x61e2850c00, +0x1e44c8700, +0x41e2440e00, +0xe1a4488e08, +0xc146848f00, +0xe3c6cf8f00, +0x31e6cb1c00, +0xe1c2c58e00, +0x71e74d9e00, +0x61e2488e00, +0x71c28d1e00, +0xf3664d8e00, +0x63e44f9e00, +0x6166499c00, +0x61e64d8e00, +0x61c0cf0e00, +0xc2860000, +0x61e24f0c00, +0x63e2cd8e00, +0x41c3458e00, +0x61e6cf8e00, +0xe2cf0800, +0x30e3cf1c00, +0xe122448e00, +0x6142c50c00, +0x61e3c58e08, +0x10e38f1c00, +0xe162448e00, +0x63e6ef8e00, +0x4142850e00, +0x61c2c50c00, +0x4142850c00, +0x61c2c50e00, +0x71e2cf1c00, +0xe1e64c8f08, +0x61e48c8f00, +0xe162478e00, +0xe1e2c91e00, +0x61e3c50e00, +0x21e64f1800, +0x61a64c8e00, +0x61a2448e00, +0x63e44f8c00, +0xe3644c8f00, +0xe1e2cd8e00, +0x31e2491e00, +0x71a24d1c00, +0x4142858e00, +0xc364488f00, +0x71e64f9c00, +0x61664c8e00, +0x20c28d0c00, +0xc1e6448e00, +0x61e64c8e00, +0x61c2850c00, +0x20e3451c00, +0x30e28f1c00, +0x1e46d8400, +0x71a6499e00, +0x61a24d0c00, +0x61e2c51e10, +0xe1264c8e00, +0x6122499e00, +0x61e2cd8e00, +0x61e2cd9e00, +0xc1444c8f00, +0x63e46d8e00, +0x61a6688f00, +0x61c3cf1e00, +0xf1a64d8e00, +0xc142448f00, +0xe122448f00, +0x60c1870c00, +0x6140c90a00, +0xc1c3850e00, +0x60c2c50e00, +0x71a2c91c00, +0x60c38d0c00, +0x41e64c8e00, +0x61e4489e00, +0x31e64b1800, +0xe1e24d9e00, +0x61e2458e00, +0x61c24c8f00, +0x41c2448e00, +0x6126499c00, +0x20a64f9c00, +0x60e3cd1c00, +0x71e64d9e00, +0xc1c4488e00, +0x1e6cf8e00, +0x71e34d1c00, +0xe324499e00, +0x61a64d8e00, +0xe1664d8e00, +0x4102450c00, +0x83e44c8e00, +0x71a2499400, +0x71a64d9e00, +0x61644c8f00, +0x3e44c8600, +0xc3e4688e00, +0x41c24d8e00, +0x50a24d9e00, +0xe1664c8e00, +0x3126491c00, +0x60e2c70c00, +0x61c2c50e00, +0x71a6491e00, +0x7126490e00, +0x61624c8e08, +0x71264d8e00, +0x71a6499e00, +0x31e6491c00, +0x7166499e00, +0x30a24d9c00, +0x71e6cd0e00, +0x6164488e00, +0xe3e64c9e00, +0x30a2c91c00, +0x71e6499e00, +0x43e46c8e00, +0xe1264d8e00, +0x6142850600, +0x61e2c99e00, +0x61c3c50e00, +0x21e6489e00, +0xe3644c8e00, +0x70a2491c00, +0xe3e64d9e00, +0x60c3850c00, +0x31e64d9e10, +0x61e24d8e00, +0xe146858600, +0x4142458e00, +0xf3244d8e00, +0x20e2850c00, +0xe346448f00, +0x71a6499e00, +0x6122488e00, +0xb334491c00, +0x61e24d9e00, +0xc2850c00, +0x61e34d9e10, +0xa3644f8e00, +0x61e2cd0c00, +0x71b24b1c00, +0x71e64b9e00, +0x61b64f0800, +0x71e6cf9c00, +0x71b4499c00, +0x71a6489e00, +0x61e2cd1c00, +0x61e6cd8e00, +0x20a2cf0c00, +0x6122488e00, +0x61c2c70c00, +0xe1664c8e00, +0xe126489e00, +0x31b64f0800, +0x7026cb1c00, +0xc1c2c58e00, +0xf126688e00, +0x41e44f8c00, +0x4182850e00, +0xe3e64d8f00, +0x71a24d9c10, +0x7126498e00, +0x61e24c9e00, +0xe1e3c50e08, +0x20c2850e00, +0x21e4688e00, +0xe3264c8f00, +0x61666f9e00, +0x60c3850c00, +0x61e7699e00, +0xe264488e00, +0x61e2458e00, +0x30a3c50c00, +0x61e64d9e00, +0xe102448a08, +0x61e76f9e00, +0xe1624c8e00, +0x30d6099e00, +0x61e6cc9f00, +0x6142450e00, +0xc142448e00, +0x71e2499e00, +0x61c2cd0e00, +0x6164489e00, +0x61e64d9e00, +0x71224c8e00, +0x30e3cf0c00, +0x4142850600, +0x73e66f9e00, +0x20c2861800, +0x61e7cf9e00, +0x61c2850c00, +0x61a3468e00, +0x81e3478e00, +0x71e6491e00, +0x61e6499200, +0x69e2cf0c00, +0x3082450c00, +0xe1664d8e00, +0x61e6499c00, +0x61c2850e00, +0x60e3c50c00, +0x61a24d8e00, +0x61a2458e00, +0xe124490c00, +0x41e2478e00, +0x60e2851c00, +0x61e2cd8e00, +0x61e24f8800, +0x60e2491e00, +0x30e3cd1c00, +0x71e76c8f00, +0xe1e4499e00, +0x61a44d9e00, +0x30e3cf1c00, +0x20e2850c00, +0xe2a4488f00, +0x60e2478c00, +0x73e64f8e00, +0x61e2c50e00, +0x61e2c50c00, +0xe3644c8f00, +0x7164489e00, +0x21c28f0c00, +0x30e6cf1800, +0x63e4499e00, +0xe142850c00, +0x30e2491e10, +0x20e34f1c00, +0xc142c58600, +0x20a2450c00, +0x71a4489e00, +0x60e2450c00, +0x61e4c88f08, +0x30e34f1c00, +0x6142850c00, +0xe1264d8e00, +0x61e64c8e00, +0x23e44c8e00, +0xc244488f00, +0xc3e46c8e00, +0xc3c6448700, +0x61e4c88f08, +0x30a64b9c00, +0x20e24d1e00, +0xe166498e00, +0x61e64d8e00, +0x61e2c50e00, +0x71e6cb9c00, +0xe324489e00, +0x60e2c50e00, +0x61e64c8e00, +0x7122450e00, +0x1e7c89e00, +0x71a64b1c00, +0xe3e44c8e08, +0x71e4499e00, +0x20a24f8800, +0x63e4499e00, +0xe3e6cf8e00, +0xe3a4488f00, +0x63e66f8e00, +0xe324688e00, +0x41c2c48e00, +0x61e2491c00, +0xc3e64f8700, +0x61a6498e00, +0xe1e46f9e00, +0x61e64d8e00, +0x6166499e00, +0x30e34d1c00, +0xc1e66c8e00, +0xe324488e08, +0x6122448e00, +0xc1c2448e00, +0x61e6cd8e00, +0x21e74f1800, +0x30e24b1c00, +0x60c2850e00, +0x60c2850c00, +0x61e64f9e00, +0x3e74d9c00, +0x61a2850e00, +0x61e64d9e00, +0x31e64f9800, +0x71a24f1c00, +0x71e6c91c00, +0x61c2440e00, +0x71a6499e00, +0x70a24d1c00, +0x61e24d9e10, +0x6124498e00, +0x61e2458e00, +0x61e4489e00, +0xc1c3870600, +0x61c2850e00, +0xf3a4489e00, +0xe1e6448f00, +0x71a2491c10, +0x61e2450c00, +0x71e24d9e00, +0x6120410410, +0x41c6cd0e00, +0xc3e64f8e00, +0x20c3c50c00, +0x63e4688e00, +0x60e2491c00, +0xe324498e00, +0x71e64c8e00, +0x23e44f8e00, +0x61e3c58e08, +0xe1664c8e00, +0xc3e4488f08, +0x71b64f8e00, +0x31e64b9c00, +0xe1a24c8e08, +0x63644d8e00, +0xe3e64d9e00, +0x30e3491c00, +0x7224499c00, +0x4364478200, +0x61c28f0c00, +0x61a6498e00, +0x61c3c50c00, +0x60c2458e00, +0x61e2478e00, +0x71e7cd1c00, +0x41c2c50e00, +0x60c2850c00, +0x21e4489e00, +0xe1244c8e00, +0xe3e66ccf08, +0x61c38f0c00, +0x60c2c50c00, +0x21c2c50e00, +0x61e66f8400, +0x71e64c9f00, +0x61428f0c00, +0x20c1870c00, +0x61e6488e00, +0xf1e64d8e00, +0xe3a4698e00, +0xe1e24d8e00, +0x61a2cd0e00, +0x6122440e00, +0xf324488e00, +0xe3cf1c00, +0xe166cf8e00, +0x61e24c9e00, +0x61e2850c00, +0x71a74d9e00, +0x60c2850c00, +0xe1e6489f00, +0x71e3cf1c00, +0xf1e64f9c10, +0xe2450c00, +0x71e6cd9e00, +0x61e64f8c00, +0x61e6489e00, +0x6124490000, +0xf3e4499e00, +0x71a24d1c00, +0x384e78000, +0xf1e24d8e00, +0xf124499e00, +0x10e38e1800, +0x71e6cf9c00, +0xc3c6cf8f00, +0x71e64f9e00, +0x61a64c8e00, +0x60c2c50e00, +0x23e4688e00, +0xc3e4488f00, +0x61624c8e00, +0x6164499e00, +0x73e64f8e00, +0x71a64d8e00, +0x70e2870c00, +0x61e6c90e00, +0x71a64d8e00, +0x31e6cb1c00, +0x30c3870c00, +0x21c2cd1c00, +0x1e46f8000, +0x41e64f9e00, +0x21e2490600, +0xb326448f00, +0x6166c50e00, +0x71a24c9e10, +0x31a2cf1c00, +0xe364488e00, +0x7126499c00, +0x61e74b9e00, +0xe3e44c8e00, +0x60e3870c00, +0x61c28d0c00, +0x7324698e00, +0xe142850e00, +0x30e2491c00, +0xe1e64d8e00, +0x6142448e00, +0x71e64d9e00, +0xc142c50e00, +0xe146448f00, +0xc1c2858600, +0x21e4489e00, +0x61224c8e00, +0x71e24d9e00, +0x71e6cb1c00, +0xc3c6c58e00, +0x61e6cd0e00, +0x7126499e00, +0x31e6cb9c00, +0x20e38d1c00, +0xf324699e00, +0x4126488008, +0x61e2cd8e00, +0x61e6688e00, +0x71e6499c00, +0xe124689e00, +0x61224d8e00, +0x61e74f8c00, +0x142c50c00, +0x1e6cf9c00, +0x63e64d8e00, +0x10e34f1c00, +0x61e2448e08, +0x60c2850c00, +0x21e3cf1c00, +0x31e64f1c00, +0x6142c99200, +0x60c6c91c00, +0x43e44f8c00, +0x61a6491c00, +0x41464c8e00, +0x60a2081e00, +0x71a2491e10, +0x31e68b1c00, +0x6162cf1c00, +0xe3e64c8f08, +0x41e64f9c00, +0x61c2c58e00, +0xe142448e00, +0x40c2850e00, +0x61e24d9e00, +0x61e6c90e00, +0x71e6cf9c00, +0x61e24d8e00, +0x71a64c9e00, +0xe2e4498e00, +0xc344488e00, +0x61e3cf9e00, +0x6143850e00, +0xc1e62c8e00, +0x71e2c91c00, +0x6142850c00, +0x71e74d9e10, +0x41c2850e00, +0x61e2cd0c00, +0x71246f8e00, +0x61e24c9e00, +0x71a64b9c00, +0x61e66c9e10, +0x30a2cd0c00, +0xf1e34f9e10, +0x61a6490c00, +0x30c3450c00, +0x71a6cb1c00, +0x61a6488e00, +0x61c24d8e00, +0x61e6cd1e00, +0x41e66d9e00, +0x30e2cb1c00, +0x70e3cd1c00, +0xe144498e00, +0x41e6499e00, +0x61a2c58e00, +0x71e24d9e00, +0x20e24f0800, +0x71224d8e00, +0x6122450e00, +0x21e6499c00, +0x61e64f9e00, +0xe142c50e00, +0x31e6cb1800, +0x61e64c8e00, +0xe1c6cd9e00, +0x31e24d9c00, +0x31e6cb1c00, +0x60c2870c00, +0x71a2450c00, +0xe3e64d9e00, +0x61e2c58e00, +0xe1624d8c00, +0x61c2cd8e08, +0x63e64d8e00, +0xe1c6cd8e00, +0x71e64d9e00, +0x70e2c50c00, +0x30e3cd1c00, +0x61e64d9e00, +0x31e74f9e00, +0x41e64f8c00, +0x21e6489e00, +0xe324488e00, +0xf3e4499e00, +0xe3246c8f00, +0x61e66f8e00, +0x61c2c50e00, +0x61e3478e08, +0xe3e44c8e08, +0x1f3e7cf8f00, +0xe3e4cf8e00, +0x30a2491c00, +0xf126491c00, +0x61e2cf0c00, +0xc3870000, +0x30e3cf1800, +0x71e2cf0e00, +0x61224d8e00, +0x20c28f1c00, +0x1e366468e00, +0x20e24f1c00, +0x61624d0e00, +0x6122448e00, +0xe124488f08, +0x61a6499e00, +0x71a6491c00, +0x61e2c50e00, +0x20a28d1c00, +0xe3a64c890c, +0x31a6699c00, +0xe1e24d8e00, +0x61a64d8e00, +0x61e2c58e00, +0x70a2491c00, +0x20e3cf1c00, +0x41c2cf8e00, +0xe1a4499e00, +0x61c2c50e00, +0xc3e44c8f00, +0x1c366458e00, +0x61428d0c00, +0x30e3cf1c00, +0x6142c50e00, +0x41e2c50e00, +0xc3e4488f00, +0xc142c48e00, +0x6142cc8e00, +0x70a3451e10, +0x71e7cf9c00, +0x31e3491c00, +0x61c2488e00, +0x6364489e00, +0x71a4499e00, +0xe3e4499e08, +0x61e2c58e00, +0x61e2c58e00, +0x60c3850c00, +0xe1a3458e00, +0x71e24d8c00, +0x71a6499e00, +0xe224488e00, +0xe364499e00, +0x61e2450e00, +0x21e4c99e00, +0x61c2cd0e00, +0x63e64c8f00, +0x6124488e00, +0x71e7cf9e00, +0x21e64d8e00, +0x71e6cf8e00, +0xe3664c8e00, +0xe122448e00, +0x61e4488e00, +0x71e7cf9e00, +0x60a2448e00, +0x61e6499e00, +0xe142c58e00, +0x71664c8e00, +0xe142850e00, +0x61e2cd0c00, +0x61e2c50e00, +0x41e66f8c00, +0x6160890c00, +0x70a24d1c00, +0x71a6489e00, +0x61c3c70e00, +0x71e6499e10, +0x61e6cd8e00, +0xd3266c8e00, +0x30a24d1c00, +0x6124488e00, +0x61c3850e08, +0xe142448e00, +0x30e2cf1c00, +0xe1e2cf8e00, +0x7162c91c00, +0x71e6499e00, +0xe24f1c00, +0x61a2699e00, +0x61664d8c00, +0x61e6cd0e00, +0xe2244c8e00, +0x61e2cd0c00, +0xe3644c8e00, +0x71a6499e00, +0x7124491c00, +0x30a2491c00, +0xe364489e00, +0x61e6499e00, +0xc1c3850e08, +0x30e2cb1c00, +0x31a64f1c00, +0x71e2cd0e00, +0x30a2cf1c00, +0xf126699e00, +0x71e34b1c00, +0xc142448600, +0x31a64b9c00, +0x31a64f9c00, +0xc2e4488e00, +0x1a2a5484f00, +0xf366cd8e00, +0x61e44f9c00, +0x1e6c98c00, +0x41e6498e00, +0x71a24d9e00, +0x61c2cd0e00, +0x71a7cb9c00, +0xe3266c8f08, +0x31e24f1c00, +0x61464c8e00, +0x61c2c50e00, +0xe162458e08, +0x61e6489e00, +0x61e64c8e08, +0xc142450e00, +0x61e6cf8c00, +0x61c2850e00, +0x61e2c50c00, +0x63e46f8e00, +0x20c2850c00, +0x61e64c8e00, +0xe1a2cf8e00, +0x71a6489e08, +0x61c2870c00, +0x71a6c98e00, +0x6162458e00, +0x71a6491e00, +0xe342cd8f08, +0x41c2448e08, +0x31e24d9e00, +0x61c6c90e00, +0x7324498c00, +0x7124488e00, +0x60c3850c00, +0x41e4688e00, +0x31e6491c00, +0x61a64c8e00, +0x60c2850e00, +0x21e24d9e00, +0x71a3cf1c00, +0x71e2499e10, +0x71a2458e00, +0x61a2450c00, +0xe3264c8e00, +0xe142448e00, +0x71a2499c00, +0xc144488a00, +0x71a6cf1c00, +0x71e6c91c00, +0x41c2850c00, +0x71e2cf1c00, +0x71664d9e00, +0x1c3c444c700, +0xe1c6c78e00, +0x61c2cc8e00, +0xc3e64f8600, +0x61e6499e00, +0xe3e64c9f08, +0x61e64ccf00, +0x31e6cd1e00, +0x61c2850e00, +0xe162458e00, +0xe3874c8f00, +0x61428d0c00, +0x1e6488e00, +0x61e24d8e00, +0xe3244c8e00, +0x61a24d0c00, +0x61e2cf1c00, +0x1e64f8400, +0x61c2c58e00, +0x21666f8c00, +0x61c2c50e08, +0x71e24d9e00, +0xc1e6498f00, +0x61c2c50e00, +0x61e2c50e00, +0x6102440a00, +0xc146448f00, +0x60c3c50e00, +0xf324489f00, +0x61c28d0e00, +0x41c2850e00, +0x41e2c50e00, +0x63e44f8c00, +0x71e2cb1c00, +0x61e7850e00, +0x61e64b9c00, +0x61a2458e00, +0x1e64d8e00, +0x61a6499e00, +0x20e28d0c00, +0xe162c58e00, +0x81624c8e00, +0x1e54f8c00, +0xe164c99e00, +0x11a24b1c00, +0x20e64f0800, +0x30e3cf1c00, +0x61e2c50e00, +0x6124488e00, +0xc3e4498e08, +0xc1e0408618, +0x6122450e00, +0x1e7488e00, +0x20c3870c00, +0xe162448600, +0x30a2491c00, +0x21e2c50c00, +0x61c2c50e00, +0xe1264c8e00, +0x60c2448e00, +0xe1e64c8f00, +0x20e34f1800, +0xf1e64d9e10, +0x61c3c50c00, +0x71e2cd1e00, +0x1e3e6ef8f00, +0x61e24d9c00, +0x6164488e00, +0x61e4499e00, +0x61e7cd9e00, +0x30a24b1c00, +0x20c28f0c00, +0xe1224d8e00, +0x61264d0e00, +0x61e2cd0e00, +0x71a26d9e00, +0x21a64f9c00, +0x71e7cf1c00, +0xe124698e00, +0x61e4489e00, +0x61c2c78e00, +0x61664d8e00, +0x71a6498e00, +0x61e2448e08, +0xc1c3850c00, +0x60e2450c00, +0xe3264ccf00, +0xe2c4488e00, +0x41e2499c00, +0x60c2850c00, +0x30e3cf1800, +0x71e2cd1c00, +0x61c3870c00, +0x41e6499c00, +0x30e24b1c00, +0x41e44f8400, +0xc1c2cf8e00, +0x61e6490e00, +0x21e7499e18, +0xc166448e00, +0x23e4498e00, +0x31e44f1c00, +0x61e24d8e00, +0x6124488e00, +0xe3e44c8f00, +0x61a2499c00, +0x61e6499e00, +0x6166c90e00, +0xe1e7c58e00, +0x70e24d9e00, +0xe3644c8f00, +0x71a64f9e00, +0xe3644f8e00, +0x3c426c200, +0x7126448e00, +0xc142c58600, +0x6142850c00, +0x61c2cd8e00, +0x30a3499e00, +0x71e2c99c00, +0x71a2450c00, +0x71a2470c00, +0x71a64b1c00, +0x41e46f8600, +0x61e2c50e00, +0xe3e4489e00, +0x70e3c70c00, +0x61624d8e00, +0x71e6cd9e00, +0x61e6cf9e00, +0x6142c50c00, +0xc1c64c8e00, +0xc3e444c700, +0x163e6cd8e08, +0x61e2499e00, +0x20a2450c00, +0xe1e6cd8e00, +0xe1c2c58e00, +0xe364488e00, +0xe1224d0c00, +0x6164488e00, +0xc1c3870600, +0x61e6cd9e00, +0xc3644d8e00, +0x10c2090800, +0x61e6cd8f00, +0x60e3c51c10, +0xe3664c8e00, +0x21e74d9e00, +0x60c2870600, +0x6162448e00, +0xe1a2499e10, +0x61c2850e00, +0xe2244c8e00, +0x30e2cf1800, +0x61c2c50e08, +0xe366448e00, +0x30a24c9e00, +0x30e24d1c00, +0x1122cb1c00, +0x41e6489f00, +0x83c6c48f00, +0x30e64b1c00, +0x61e2450e00, +0x61c24d8e00, +0x1e6499c00, +0x71a6499c00, +0xe162450e00, +0x61e24d9e00, +0x71e4499e00, +0xf324699e00, +0xe164498e00, +0xe142c50e00, +0x30e28e1c00, +0x61624d9e00, +0xe3264c8e08, +0x61e64f8e00, +0x61e28d0c00, +0x6124489e00, +0xc146448e00, +0xe1a2458e00, +0x30e2cf1c00, +0x61c6c98e00, +0xc244489f00, +0x30e3cd9e00, +0x6142448e00, +0x61e6499e00, +0xe1e24d8e00, +0x31b24d9e00, +0xc3646f8e00, +0xe122458e00, +0x71b24d9e00, +0xe124499e00, +0x71e24f8e00, +0x30c28d1c00, +0x61a2499c00, +0x61c2458e00, +0x41c6cd9e00, +0xe1c2c78e00, +0xe324488e00, +0x61e24d8e00, +0x61c2850c00, +0x31a24d9e00, +0x61e4488e08, +0xe122450e00, +0x60c3c50c00, +0x61e64b9c00, +0xe122448f08, +0x6166491e00, +0x60e34f9c00, +0x60c3870c00, +0xe364488f00, +0xe324499e00, +0x61e4488e00, +0x61c2c50e00, +0x61c2cd0e00, +0x71a64d9e00, +0x21e34d9e00, +0x61a0498e00, +0x61e64f8e00, +0x60c3c70c00, +0xf1224d9e00, +0x61c2870c00, +0xe162448e00, +0xe2450800, +0x71a6499e00, +0x71e66d9e10, +0x30e3cf0c00, +0xe166498c00, +0x20e2c91e00, +0x61e6c99c00, +0x70e3cf1c00, +0x1e366478f00, +0xe3e64f8e00, +0x6142850600, +0x61e6491c00, +0x41c2850c00, +0x71a6489e00, +0x61e64f8c00, +0x61e24f0c00, +0xe1264c8e08, +0x71a6499e00, +0xb1a64d8e00, +0x61a24c8e00, +0x21e4481e00, +0x61c2850e08, +0xc142458600, +0x30a3cf1800, +0x43c5488f00, +0x71e6489f00, +0x61224d8e00, +0x71a6499e10, +0x2122450c00, +0x71e6c99e00, +0x30e6491c00, +0x61e4c90c00, +0x71e2cf1c00, +0xe364498e00, +0x6142448c00, +0xe1c2c58e00, +0x6126448e00, +0x30e24b1c00, +0x61e64d9e00, +0x21e6499c00, +0x6122488e08, +0x71a7491c00, +0x61e6499e00, +0x61e2c50e00, +0xf3244c8f08, +0x20e2401e10, +0x61e64d9e00, +0x61c28f0c00, +0x61a2850c00, +0x61e2c50e00, +0x61a24d8e00, +0x6142450c00, +0xe122448e00, +0x1e2e44c8f0c, +0xc0c1850e00, +0x61e24d8e00, +0x31e24b1c00, +0x21e6488e00, +0xe244c88f00, +0x61224d8c00, +0xa224488f00, +0xc364488e00, +0xe1e64c8f00, +0xe1e6cd8e00, +0xc146448f00, +0xc3c44e8600, +0xe1e6cd8e00, +0xe144489e00, +0x20c2870c00, +0x6142cd0c00, +0x61a2c70c00, +0x61e64c8e00, +0x6124409e00, +0x61861800, +0x30e24d9e00, +0x70e2871c00, +0x41e4288e00, +0x60c2850c00, +0xc162458e00, +0xe1e64f8e00, +0x61e64f9e00, +0x61e66d9e00, +0xc3644c8f00, +0x61c2c50e00, +0x61e6489e08, +0x61e6c70e00, +0xe1664d8e00, +0x60c2850e00, +0x60c1830c00, +0x61c3c50e00, +0x61c6cd0e00, +0xe162c50600, +0x61e6499e00, +0x61e74d9e00, +0xe226448e00, +0x6142448e00, +0x61e4cf1c00, +0x7124499e00, +0x7122cd1c00, +0x61e2cf0c00, +0x31e6cb1c00, +0xc142448600, +0xe3e4499e00, +0x107224491c00, +0x7164489e08, +0x10e38f1c00, +0x61e2c50e00, +0xf126689f00, +0xc3644d8e00, +0x61224d0c00, +0x31a64f1c00, +0x61664d8e00, +0x41644c8f00, +0x61c3830400, +0x70a24d1c00, +0x20e34d9c00, +0x20a2c91c00, +0xe1266c8f08, +0x30e3cf1800, +0x61e24d0e00, +0x30a34d8e00, +0xe364488f08, +0x60c2c70c00, +0x61e64c8e00, +0xc1e66c8f00, +0x7126498e00, +0x6364488f00, +0x61c2850e00, +0x30a2c50c00, +0x71e3cf1e00, +0x30a24d0c00, +0x61c2850c00, +0x20e2870c00, +0x71a6499e00, +0x30e3cf1800, +0x71e74f9e00, +0x61c2458e00, +0x61e2cd1e00, +0x71e6cf8e00, +0xe2a448cf00, +0x61e6499e00, +0x61c2c50e00, +0xe3644c8f00, +0x30a24b1c00, +0xe3e6cf8e00, +0x1e364489f00, +0xe1664c8e00, +0xe1264d8e00, +0xe64f0800, +0xe1e64c8e00, +0x1e1e1448e08, +0xe166cd8e00, +0x61c2c50e00, +0x40c2850a00, +0x61e6488f08, +0x7024c91c00, +0xc142c58600, +0x6164488e00, +0xe1e64d9e00, +0x30e2cd1e00, +0x71a4499e00, +0x60e64b9c00, +0xc142850600, +0x31a2cb1c00, +0x60e24d1e10, +0x61e24d8e00, +0x61a24d8e00, +0x6162cd0c00, +0x63e4498e00, +0x70a2499c00, +0xe324688f00, +0x71264d9c00, +0x61a24d8e00, +0xc146448e00, +0x6164c98e00, +0x6126489f00, +0x71a2450c00, +0x30e2870c00, +0x61a24b1c00, +0x71a2699e00, +0xe1a64c8e08, +0xe164488e00, +0x61e74f9e00, +0xe142c50e00, +0x7126488e00, +0x61e3cf9e00, +0x61e4698c00, +0x61e2458e00, +0x61c2850c00, +0x61e6499e00, +0x71e24d9e10, +0x71a6cf9e00, +0x60c2cd0e00, +0x30c2830c00, +0xe1e3c50e08, +0x31e2cd1e00, +0xc3c4489f00, +0x61e2850e00, +0xe364488f00, +0x61e2cd0e00, +0x61e6c90e00, +0xf3e66d9e00, +0x6166499e00, +0xe366cd8e00, +0xe224448e00, +0x41c2858e00, +0xe3e4489e08, +0x61e24d9c00, +0x71e66f8e00, +0x61c2c58e00, +0x61224d8e00, +0xf1e64f9e00, +0x61e2c58e00, +0x71e2cf8e00, +0xe364699e00, +0x61e64f9e00, +0x61e64c9e08, +0xe224499e00, +0x71e2cd1c10, +0xa3e6488f08, +0x30a24d1c00, +0x38e2491c00, +0x61e2458e00, +0xe1e2458e00, +0x60e3cd1c10, +0x61e64d8e00, +0x71e74d9e00, +0xc3c7cf8f00, +0x61e6cf8c00, +0xe1264d8e00, +0x31e24d1c00, +0x61664d8e00, +0x71a4489e10, +0x31e7cf9e00, +0x6162458e00, +0x61e6499e00, +0x61e2850e00, +0x31a2cf1c00, +0xe1644d9e00, +0x61e6cd8e00, +0x6142850e00, +0x71a64c8e00, +0x20e3851c10, +0xe3e64c8f04, +0x61224d8e00, +0x30e3cf1800, +0x61c2c50e00, +0x6162c50e00, +0xe1c7448f00, +0xe3644c4f00, +0x61224d0e00, +0x1e7c70400, +0xf3e66c9f00, +0x61e6489e00, +0x38c28f1c00, +0xe324488e00, +0x71a6491e00, +0x6142850c00, +0x61a2450e00, +0x6122448e00, +0xe1e64c8f00, +0x20e3850c00, +0x71e2c70c00, +0x61e6cf9e00, +0x63e4cf9e00, +0xe1264b8e00, +0x7122891c00, +0x41c2c48e00, +0x70e2cd0c00, +0x71e64d9c00, +0xe326448e00, +0xe144cd8e00, +0x41c2cc8f00, +0x63e644c700, +0x71e64d8e00, +0x61c6cf8e00, +0x1e76f8c00, +0x7122491e00, +0x30c3850c00, +0x61c2870c00, +0x61a2458e00, +0xc3644ccf00, +0x1e54f8c00, +0x60c3870c00, +0x30e3870c00, +0x61e2850c00, +0x21e64f8c00, +0x11e6c91c00, +0x41c2850600, +0x30e2491c00, +0xc1c2cc8e00, +0x61e6499c00, +0x20c2870c00, +0xe1c2c50e00, +0xc164289e00, +0x41c2850600, +0x6142c58e00, +0x63c4cc8f00, +0x30a64b1800, +0xe3664c8e00, +0x83e3458e00, +0x21e64b9e00, +0xc1c4cd0e00, +0x61e3451e10, +0x7104048e00, +0x30a24c9e00, +0x21664f1800, +0xf1264d9e00, +0x41c2850e00, +0xc146c58e00, +0xe1e3850e00, +0xc1464c8f00, +0x6126488e00, +0x61a2448e00, +0x70e64b1c00, +0x71e6488e08, +0x71e6499e00, +0x30a24d1c00, +0x61e2cd0e00, +0x1e64f8000, +0x30e3cf1c00, +0x6124491e00, +0x61e6c99e00, +0x61e6cd9e08, +0x61c2850e00, +0xe3264c8e08, +0x60c2870c00, +0x20e3cd9e00, +0x43c74ccf00, +0x30e3cd1c00, +0x70a24d0c00, +0x21e6498c00, +0x61e24c9e00, +0x73e6499e00, +0xe162488e00, +0x30e3cf1c00, +0x60c2450c00, +0xe144488e00, +0xe122458e00, +0x60c2cd0e00, +0x71a64d9e00, +0xe324488900, +0x30e34d1c00, +0x61e2448e00, +0x61624c8e00, +0x61a4289e00, +0x61e2cd8e00, +0x71e74d9e00, +0x30e2cf1c00, +0x71a24d9e00, +0xc102450c00, +0x71e6cf1c00, +0x71a6cd0e00, +0x71e64f9c00, +0x41e44f8c00, +0x61e64f9e00, +0x70e2cd1e00, +0x6122450e00, +0x6166489e00, +0x71e2cd0c00, +0x21e6499c00, +0x71a24d8e00, +0x61a24d8c00, +0xc1c2850e00, +0xe3e44c8f00, +0x6122448e00, +0x1e44f8000, +0xe1e2499e00, +0xc1c3870e08, +0x61e6cd0c00, +0x61a4489e00, +0x71a6491e00, +0x61c2850c00, +0xe126448e00, +0x61644d8e00, +0xc3e6448f00, +0x71a2450c00, +0x1e74f9e00, +0xe1264c8e00, +0x6166489e00, +0x31a24b1e00, +0x61a4499e00, +0xc3e644cf00, +0x61e2c70e08, +0x61e2448e00, +0x30e6499e00, +0x31a2491c00, +0x7126499e00, +0x71a6499e00, +0x1e3e44c8f00, +0x61e64f9e00, +0xe1e648d900, +0xe346448e00, +0xc3e66d8e00, +0x61f74f9e00, +0x30e2450c00, +0x71a64d9e00, +0x61c2458e00, +0x4164489e00, +0xe1644c8f00, +0x60a2450c00, +0x6120488e00, +0x31e24f9c00, +0xc126458e00, +0xe1664d8e00, +0x60c3870c00, +0x60e2870c00, +0x61c6cf9e00, +0x40c2850e00, +0x61e2c70e00, +0x60c2850c00, +0x6162450e00, +0x40c2850c00, +0x31f7499e00, +0x21c2490e00, +0x1e64f8800, +0x71a2cd1c00, +0x61224c8e00, +0x1e4499e00, +0x20e2850e00, +0xe324498e00, +0x71a6c91c00, +0x60e2cd9e00, +0x60e24f1c00, +0x6142488b08, +0xe3e64d9e00, +0x73e4489f00, +0xc2244c8700, +0x21e7cf9c00, +0xe1664c8e00, +0xe142458e00, +0x71a4489e00, +0x61c24c8e00, +0x61e64d8e00, +0x41e4498c00, +0x10f2489200, +0x7122491600, +0x61e5489e10, +0x63e64f8c00, +0x61e6499e00, +0xf326498e00, +0x30e28f1c00, +0x71e2c91c00, +0xe3e36d9e00, +0x30a3cf0c00, +0xe1a6489e00, +0x30e3870c00, +0x6106c98e00, +0x30a34f1c00, +0xe1c3850e00, +0x6122489e10, +0x21e2c51c10, +0x1e3264c8f00, +0x71e3c70c00, +0x61c2850c00, +0x60c2870c00, +0x61c2850e00, +0x61e6689f00, +0x71e6cf1c10, +0x30a34f1c00, +0x61264c8e00, +0x61c3c70c00, +0x61a2488e00, +0x10e24b0800, +0xc166488e00, +0xe3e6cd8e00, +0x30c3871c00, +0x41424d8e00, +0x61e64f9e00, +0x71a64d9e00, +0x61e24c8f00, +0x61c2c50c00, +0x61c2cf0c00, +0x6164489e00, +0x61c24d8e00, +0xe264489e00, +0xc126448600, +0x6162cd0c00, +0x6124488e00, +0xe1e64d8e08, +0xf1264c8e00, +0xe162cd8e00, +0x6324488e00, +0x71a2cf1c00, +0xe1c2cd8e08, +0xe3664d9e00, +0xe3e4488f00, +0x61c64d8e08, +0x61e2cf1c00, +0x31e64f8e00, +0x6104490e00, +0x31e3cf1c00, +0xc1c3870600, +0x71e6c91c00, +0x6142850e00, +0xe1e6cd8e00, +0x30a24d1c00, +0x60a2450c00, +0x6142499e00, +0x61624d8e00, +0x61e64d9e00, +0xe3e64d8e08, +0xe1c68d0e00, +0x61664d0e00, +0x30e34f9c00, +0x41e64d8e00, +0xe166448f00, +0x10f24b1c00, +0x30828f1800, +0x20a3450c00, +0xe142c58e00, +0xe164489e00, +0xc2c4484f00, +0x71a6491c00, +0x30e2cb1c00, +0x71a6488f00, +0xe3264c8e08, +0x6126498e00, +0x11f64f8800, +0xe1664c8f00, +0x71a6688e00, +0x71a64d9e00, +0x30e64b1e00, +0x30e2cd1c00, +0x41c2858e00, +0x61e2cf0c00, +0x31a6cf1800, +0x60a2440a00, +0x71a6499c00, +0xe1e6688f00, +0x70e2cd9c00, +0x61c2cd0e00, +0x20e2cd9e00, +0x71a66d8e00, +0x61e6cd9e00, +0xc364488f00, +0x1e6cf8e00, +0x61a2458e00, +0x61a64c8e08, +0xe3e64f9e00, +0x61a2459e10, +0x63e4488e00, +0x31a44b0c00, +0x71a2499e10, +0x71f24d9e00, +0x71a24d1c10, +0x6142850c00, +0x83c6478200, +0x71e6cd1c00, +0x61c2c48e08, +0x82c44c8e00, +0xe162458e00, +0x41c2c58600, +0x1c44f8000, +0x61a64d8e00, +0x30a24f9800, +0x20e264cf8e00, +0xf1264d8e00, +0x41c6c89e00, +0x61e2c58e00, +0x6142450e00, +0x61e2c58e00, +0x61624d8e00, +0x70e2c70c00, +0xe162cd8e00, +0x61e2cd0c00, +0x21e2449800, +0x30e3cf1c00, +0x60e2c91e00, +0x61e2850c00, +0x60e38d1c10, +0xe3a4488f00, +0x61e2cf0c00, +0x61e24f8c00, +0xe1664c8f08, +0x1e3e64c8f08, +0x61c28d0c00, +0x1e4499800, +0x61c3c58e08, +0x71a24d0e00, +0x61e6489f08, +0x61e3870c00, +0x70a2450c00, +0x60a2448e00, +0x71e6cd9e00, +0x6324489f00, +0xe162458e00, +0x71e24f1c00, +0x71a64f8e00, +0x61264d8e00, +0x61e2c50e00, +0x20e24b9c00, +0x71e6499208, +0x61a4488e00, +0x61e2c50e00, +0x6182850e00, +0xe142c58e08, +0x61a44f8c00, +0x70a2491c00, +0x41c2cc8e00, +0xe3e647c700, +0xe3e4488e00, +0x30a2871c00, +0x6162458e00, +0x61e4499e00, +0xe1e4488e00, +0xe142450e00, +0x41c2850e00, +0x61c2c70c00, +0x61224c8900, +0xc3644e8700, +0xc1c2c50e00, +0x61a6499e10, +0x30e2499e00, +0x61e6cd0e00, +0x61c2850e00, +0x61a4499e00, +0x71e2491c00, +0x61c2488e00, +0xe1e64c9f00, +0x60c2850c00, +0xc3c4c98e00, +0x71a6698e00, +0x71a6499c00, +0x31e64f9e00, +0x61e7ef8c00, +0x61c64c8f00, +0x6164488e00, +0xe1e2478e00, +0x61c2850e00, +0x61a24d1c00, +0x41c2870600, +0x61a4489e00, +0x41e4698c00, +0x71e6cf1c00, +0xe1e64f8e00, +0x71a2cd1c00, +0x20e24b9c00, +0xe124489f00, +0x61e2470c00, +0x61624d8e00, +0x6162448a00, +0x61e24d9a10, +0x61e2cf0c00, +0x31e24d9c00, +0x61c6c58f00, +0xe1e64f8e00, +0x102e6678400, +0x61a2448e00, +0x6122448e00, +0xc142850e00, +0x30e24f1c00, +0xe122458e00, +0x60e3cf1c00, +0x61e6499c00, +0x71a44b9c00, +0xe3664c8f08, +0x71a64d9e00, +0x21e64d0c00, +0x41c2448e00, +0x20c2850c00, +0x61e24c9e00, +0x30a2491c00, +0x61c2850c00, +0xe3e4488e08, +0x21e7cf9e00, diff --git a/samples/digitrec/digitrec/data/training_set_1.dat b/samples/digitrec/digitrec/data/training_set_1.dat new file mode 100644 index 000000000..e3070666e --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_1.dat @@ -0,0 +1,1800 @@ +0x2041020c00, +0x4081020400, +0x4081020400, +0x2041020800, +0x4081020c00, +0xc181830600, +0x60c1020400, +0x2001020400, +0x20c1060400, +0x60c1020c00, +0x1041020800, +0x1e041811f00, +0x1041861800, +0x20c1060c00, +0x1041020800, +0x20c3860800, +0x4081020400, +0x6081020400, +0xc1020800, +0x2041060800, +0x4081020600, +0x3041861800, +0x2041020800, +0x4081020400, +0x2041020c00, +0x60c1830600, +0x4081020c00, +0x3041860800, +0x2041060800, +0x4081020400, +0x20c1060810, +0x20c1820c00, +0x4081020400, +0x4081020400, +0x1041060800, +0x60c1060400, +0x60c1020400, +0x4081020400, +0x4081830600, +0x4081020400, +0x70c3870c00, +0x3041061800, +0x20c1060800, +0x4081020400, +0x4081020600, +0x60c1830400, +0x4081020400, +0x4081020400, +0x2041020800, +0x4081020400, +0x2001020400, +0x4081020400, +0x2041020c00, +0x1041041000, +0x2041060800, +0x4081020400, +0x4081020400, +0x4081020400, +0x3041861800, +0x2041020c00, +0x20c1020800, +0x60c1830400, +0x20c1020800, +0x1041060800, +0x2041020400, +0x6081020c00, +0x4081420c00, +0x4081020400, +0x40c1830400, +0xc1020400, +0x4081020200, +0x4081020400, +0x20c1020c00, +0x20c1020c00, +0x1041060800, +0x6081020600, +0x4081820400, +0x6081020400, +0x60c1020c00, +0x2081020400, +0x60c1060c00, +0x4081020400, +0x4081020c00, +0x20c1020400, +0x20c1060800, +0x4081020400, +0x60c1060c00, +0x2081020800, +0x20c1020400, +0x3041860c00, +0x4081020600, +0x4081020400, +0x1041860800, +0x2041060800, +0x81020400, +0x60c1020400, +0x2041020800, +0x4081020400, +0x4081020400, +0x2041860800, +0x20c1060800, +0x20c1020c00, +0x60c1820400, +0x20c1020400, +0x20c1020400, +0x20c1820810, +0x20c1020800, +0x20c1020c00, +0x60c1020c00, +0x4081020400, +0x2041020800, +0x3041860800, +0x20c1020800, +0x1041860800, +0x40c1020400, +0x4081020400, +0x20c1060800, +0x60c1060c00, +0x4081020400, +0x3041861800, +0x20a1040900, +0x4081020400, +0x2041060800, +0x20c1060800, +0x4081020400, +0x60c1020400, +0x2041820c00, +0x4081020600, +0x4081020200, +0x20c1860800, +0x2041020400, +0x4081020400, +0x20c1860c00, +0x20c1020800, +0xc183830600, +0x204080030c00, +0x2081020400, +0x40c1830e00, +0x20c1020800, +0x2041020c00, +0x2041020400, +0x60c1820400, +0x2041060800, +0x20c1020800, +0x20c1020c00, +0x2041020c00, +0x4081020400, +0x2041060800, +0x2041820c00, +0x30c3821c00, +0x4081020400, +0x6081020400, +0x4081030600, +0x2081020400, +0x20c1020800, +0x1041860800, +0x4081020400, +0x2041020800, +0x20c1020400, +0x40c1020400, +0x20c1020400, +0x20c1020c00, +0x3041060800, +0x8101030200, +0x1041041800, +0x20c1060800, +0x4081020400, +0x60c1870c00, +0x81020400, +0x20c1060800, +0x4181830600, +0x6081020400, +0x6081020604, +0x1041060800, +0x20c1020c00, +0x20c1820c00, +0x2041060800, +0x20c1020c00, +0x3041060800, +0x20c1060800, +0x4081020400, +0x3041060800, +0x60c1060400, +0x4081020400, +0x20c1820400, +0x4081020400, +0x4081020400, +0x4081020400, +0x2081020400, +0x2041020800, +0x2041060800, +0x2041820c00, +0x40c1830400, +0x3041060800, +0x4081020600, +0x20c1020c00, +0x20c1020800, +0x2081020800, +0x4081020600, +0x20c1060800, +0x4083020400, +0x20c1020400, +0x4081020400, +0x60c1060c00, +0x60c1820400, +0xc081020400, +0x2041020000, +0x4081030600, +0x204183870600, +0x60c1020400, +0x1041061800, +0x60c1020c00, +0x60c1020400, +0x4081830600, +0x60c1020408, +0x20c1020800, +0x2041020800, +0x4081020400, +0x4081030600, +0x4081020400, +0x4081830600, +0x40c1020400, +0x2081020400, +0x4081020400, +0x4081020200, +0x30c1060c00, +0x4081020400, +0x4081020c00, +0x20c1020c00, +0xc081030600, +0x2041860800, +0x4081020400, +0x4081020400, +0x6081020400, +0x4081830600, +0x40c1820400, +0x4081020400, +0x4081020400, +0x4081020400, +0x2041020800, +0x20c1060c00, +0x20c1020400, +0x20c1020800, +0x3041060800, +0x40c1820400, +0x4081020400, +0x2041820c00, +0x20c1861800, +0x60c1020400, +0xc1020400, +0x4081020400, +0x2041020c00, +0x4081020400, +0x6081020400, +0x20c1860800, +0x40c1820400, +0x4081020400, +0x4081020400, +0x4081020600, +0x1041060800, +0x60c1060c00, +0x4081020400, +0x4081020400, +0xc1020000, +0x4081020400, +0x4081830200, +0x4081030600, +0x81020400, +0x8181030600, +0x20c1060c00, +0x3041060800, +0x2041060800, +0x81020400, +0x1041060800, +0x4081020400, +0x4081020600, +0x4081020400, +0x20c1860c00, +0x3041861800, +0x60c1020c00, +0x20c1020c00, +0x4081020600, +0x4081020400, +0x4081020400, +0x3041060800, +0x2041020800, +0x40c1020400, +0x30c1060800, +0x4081830600, +0x20c1020c00, +0x2081020c00, +0xc081030600, +0x2041820800, +0x4081020400, +0x4081020400, +0x20c1020c00, +0x2041020c00, +0x20c1060800, +0x4081020600, +0x40c1020400, +0x60c1020c00, +0x3041861800, +0x2041060800, +0x20c1020400, +0x4081020400, +0x60c1860c00, +0x2081020400, +0x4081020400, +0x60c1020400, +0x4081020400, +0x4081020400, +0x20c1020400, +0x2081020400, +0x4081830400, +0x1041860c00, +0x30c1861800, +0x20c1020c00, +0x40c1830600, +0x20c1020400, +0x20c1020c00, +0x20c1820c00, +0x4081020400, +0x20c1020800, +0x1061861800, +0x4081020400, +0x81020400, +0x4081020600, +0x20c1860800, +0x4081020600, +0x2041820800, +0xc081020200, +0x4081020400, +0x20c1060800, +0x20c1060800, +0x40c1820400, +0x20c1020800, +0x4081020400, +0x20c1820c00, +0x1041060800, +0x4081020400, +0x2041020c00, +0x1041060800, +0x2041060800, +0x20c1020400, +0x20c1820c00, +0x2041020800, +0x2041060800, +0x6081060400, +0x81020400, +0x40c1020400, +0x20c1860800, +0x40c1830400, +0x4081020600, +0x40c1830400, +0x40c1020400, +0x6081020c00, +0x20c1060800, +0x4081020400, +0x40c1020400, +0x40c1020400, +0x4081030200, +0x2041860800, +0x2081020c00, +0x40c1820400, +0x60c1020c00, +0x30e38e1800, +0x2081020400, +0x60c1020400, +0x20c1820800, +0x2041020800, +0x60c1860c00, +0xc183870400, +0x60c1820c00, +0x40c1020c00, +0x20c1060800, +0x2041020800, +0x2041020800, +0x40c1820400, +0x60c1020c00, +0x4081020400, +0x4081020400, +0x4081020400, +0x20c1020c00, +0x6081020c00, +0x4081020400, +0x20c1060800, +0x20c1020c00, +0x2081020400, +0x81020400, +0x6081020400, +0x81020c00, +0x20c1060800, +0x20c1860800, +0x4081020400, +0x4081020400, +0x20c1020400, +0x2041020400, +0x20c1020400, +0x60c1860c00, +0x4081030200, +0x4081020400, +0x2041060800, +0x4081020400, +0x2041020400, +0x4081020400, +0x20c1020c00, +0x20c1020800, +0x4081030600, +0x2041060800, +0x4181020600, +0x4081020400, +0x4081020400, +0x4081020400, +0x60c1020400, +0x60c1820c00, +0x2041860800, +0x4081020400, +0x20c1020c00, +0x2041020c00, +0x4081020400, +0x1041041800, +0x2041020800, +0x20c1020c00, +0x4081030600, +0x20c1860c00, +0x2041020400, +0x2081020800, +0x2081020400, +0x20c1060800, +0x60c1020c00, +0x1041860800, +0x81020400, +0x2041060800, +0x4081830600, +0x20c1060800, +0x60c1820400, +0x4081020400, +0x20c1060c00, +0x20c1020400, +0x60c1020e00, +0x20c1820800, +0x3041860800, +0x4081020400, +0x2041020c00, +0x60c1020400, +0x40c1820400, +0x4081020400, +0x4081020400, +0x4081030600, +0x20c1060c00, +0x2041020800, +0x2081020400, +0x4081020600, +0x4081020400, +0x40c1820400, +0x4081020400, +0x4081020400, +0x4081020400, +0x20c1060800, +0x20c1820c00, +0x20c1820c00, +0x4081030600, +0x60c1020e00, +0x60c1820400, +0x20c1020800, +0x4081830200, +0x4083020600, +0x1061861800, +0x4081020400, +0x60c1820c00, +0x6081020c00, +0x40c1830600, +0x4081020400, +0x181020200, +0x6081020600, +0x20c1860c00, +0x4081030600, +0x40c1830400, +0x4081020400, +0x61c4821e00, +0xc1020400, +0x20c1820c00, +0x40c1830c00, +0x4081830600, +0x2041820800, +0x20c1020400, +0x20c1060800, +0x4081020400, +0x30c1860800, +0x4081060c00, +0x2081020400, +0x4081020400, +0x20c1020400, +0x4081020400, +0x4081020600, +0x4081020600, +0x4081020400, +0x1061861800, +0x60c1020c00, +0x1041060800, +0x1041861800, +0x20c1860c00, +0xc081020600, +0x8181830600, +0xe040830e00, +0x60c1020400, +0x20c1060800, +0x20c1060800, +0x2041860800, +0x4081030600, +0x20c1060800, +0x20c1020c00, +0x2041860800, +0x6081020400, +0x20c1060800, +0x2041020400, +0x4081020400, +0x4081020400, +0x60c1860c00, +0x20c1020400, +0x2081020c00, +0x20c1020c00, +0x20c1860c00, +0x4081020600, +0x4081020600, +0x1061861800, +0x20c1020c00, +0x20c1020400, +0x2041060800, +0x20c1020c00, +0x4081020400, +0x60c1020400, +0x2041060800, +0x4081030600, +0x4081020400, +0x30c1060800, +0x2041020c00, +0x4081020400, +0x20c1020800, +0x20c1020400, +0x4081020400, +0x1041861800, +0x40c1020e00, +0x4081020400, +0x1041020800, +0x4081030200, +0x4081020000, +0x20c1020400, +0x2041820c00, +0x4081020600, +0x20c1060c00, +0x40c1020c00, +0x2081020400, +0x3041860800, +0x1061861800, +0x20c1060800, +0x4081020c00, +0x20c1060800, +0x204081031c00, +0x4081020400, +0x2041020800, +0x4081020600, +0x1041040c00, +0x4081020400, +0x20c1860c00, +0x4081030600, +0x20c1020400, +0x20c1060c00, +0x4081020400, +0x20c1020400, +0x20c1020c00, +0x20c1020400, +0x4081020400, +0x4081020600, +0x6081020600, +0x20c1060c00, +0x4081020600, +0x20c1020c00, +0x60c1820400, +0x2041020800, +0x2041060800, +0x2041860800, +0x81020400, +0x4081020400, +0x4081030200, +0x3041860c00, +0x4081020400, +0x4081020400, +0x60c1820c00, +0x4081020600, +0x6081020400, +0x20c1060c00, +0x4081020400, +0x20c3060800, +0x4081030600, +0x4081020400, +0x20c1020400, +0x20c1860c00, +0x20c1020c00, +0x60c1060c00, +0x40c1830400, +0x4081020400, +0x1061861800, +0x6081020400, +0x4081020400, +0x4081020600, +0x4081020600, +0x20c1860c00, +0x4081020400, +0x4081020400, +0x20c1020c00, +0x4081020400, +0x20c1020800, +0x2041820800, +0x20c1020400, +0x4081020400, +0x4081020400, +0x3041060800, +0x60c1820c00, +0x4081020400, +0x4081030600, +0x3061860800, +0x4081020400, +0x40c1020400, +0x60c1820c00, +0x4183030600, +0x20c1060800, +0x60c1860c00, +0x4081020600, +0x60c1020800, +0x4081820600, +0x1061861800, +0x20c1020400, +0x2041060800, +0x20c1020400, +0x3041060800, +0x20c3820408, +0x60c1830600, +0x4081020400, +0x40c1020400, +0x4081020400, +0x60c1020400, +0x60c1020600, +0x2041860800, +0x20c1060800, +0x4081020400, +0x4081030600, +0x4081020400, +0x20c1020400, +0x20c1060800, +0x20c1060800, +0x2081020400, +0x1041060800, +0x20c1060c00, +0x2041860800, +0x40c1020400, +0x4081020400, +0x4081020400, +0x20c1020c00, +0x4081020600, +0x2081020400, +0x4081030600, +0x20c1820c00, +0xe0c1830e00, +0x20c1020c00, +0x2041020c00, +0x20c30c4000, +0x2041860800, +0x2041060800, +0x2041020400, +0x20c1020400, +0x4081020400, +0x20c1020400, +0xc1020400, +0xc081030200, +0x20c1820810, +0x2041060800, +0x4081020400, +0x2041060800, +0x81020400, +0x2041020c00, +0x4081020400, +0x4081020600, +0x4081020600, +0x4081020400, +0xc081020700, +0x20c1020400, +0x3041860800, +0x2041020800, +0x20c1060c00, +0x60c1020400, +0x4081020400, +0x20c1060800, +0x2041060800, +0x20c1860c00, +0x40c1020400, +0x20c1020400, +0x20c1060800, +0x20c1020c00, +0x2081020400, +0x40c1020400, +0x4081020400, +0x20c1060800, +0x2081020400, +0x4081030600, +0x4081020400, +0x20c1020c00, +0x4081020400, +0x20c1020400, +0x2041060800, +0x2081020400, +0x41c3830600, +0x20c1020c00, +0x4081020400, +0x20c1860c00, +0x4081030400, +0x8081020200, +0x60c1820400, +0x4081020400, +0x2041060c00, +0xc181030600, +0x4081020400, +0x40c1060c00, +0x20c1020c00, +0x4081020400, +0x2081020c00, +0x2041820c00, +0x40c1820400, +0x2041060800, +0x2041860c00, +0x4181030200, +0x4081020400, +0x3041861800, +0x20c1060c00, +0x4081020400, +0x4081020400, +0xc181020600, +0x4081020600, +0x20c1820c00, +0x6081020400, +0x40c1830400, +0x4081020400, +0x3041060800, +0x4081020400, +0x20c1060c00, +0x2041860800, +0x4081020400, +0x2041020800, +0x2081020400, +0x41020400, +0x20c1020400, +0x4081020600, +0x4081020400, +0x61c1830e00, +0x20c1020800, +0x20c1020c00, +0x20c1060800, +0x60c1838e00, +0x2041020800, +0x4081020600, +0x20c1860c00, +0x4081020600, +0x20c1060800, +0x60c1020400, +0x2001020800, +0x4081020400, +0x2081020c00, +0x4081020400, +0x6081020400, +0x3041820800, +0x4081020400, +0x4083068800, +0x20c1060c00, +0x40c1830600, +0x2041820800, +0x20c1060800, +0xc1c3870600, +0x1041041000, +0x20c1860c00, +0x4081030400, +0x2041020800, +0xc081030600, +0x4081020400, +0x4081020400, +0x2041860800, +0x60c1820c00, +0x6081020400, +0x60c1020400, +0xc0c1830400, +0x4081020400, +0x20c1060800, +0x4081020c00, +0x2041020800, +0x6081020c00, +0x4081020400, +0x2081020400, +0x20c1860800, +0x4081020400, +0x40c3820400, +0x60c1020400, +0x20c1060800, +0x81020400, +0x2081020400, +0x2041020800, +0x1041860800, +0x6081020400, +0x20c1020400, +0x2041060800, +0x81020400, +0x20c1020c00, +0x20c3060c00, +0x20c1060800, +0x20c1020400, +0x60c1860c00, +0x4081020200, +0x4081020400, +0xc081030200, +0x20c1020800, +0x2041820800, +0x2041060800, +0x60c1060c00, +0x4081020400, +0x2041060800, +0x4081020400, +0x4081020400, +0x81020400, +0x6081020c00, +0x4081020400, +0x2041060800, +0x4081020400, +0x20c1060c00, +0x4081020600, +0x4081020400, +0x4081020400, +0x20c1060800, +0x2041020800, +0x2041020800, +0x20c1020800, +0x2041820800, +0x60c1020400, +0x4081020400, +0x30c1060800, +0x1061860800, +0x4081030600, +0xc081830600, +0x20c1020c00, +0x81020400, +0x8081030200, +0x20c1020800, +0x20c1020800, +0x4081020400, +0x81020400, +0x20c1860c00, +0x4081020200, +0x20c3820400, +0x4081020400, +0x81020400, +0x60c1020c00, +0x4081020400, +0x30c1860800, +0x4081020400, +0x81020800, +0x20c1060800, +0x4081020400, +0x20c1020400, +0x20c1020c00, +0x20c1020c00, +0x3041060c00, +0x2041860c00, +0x1041041000, +0x2081020400, +0x20c1860800, +0x4081020200, +0x3041060800, +0x2081020400, +0x20c1020c00, +0x4081020400, +0x4081020400, +0x20c1860c00, +0x1041061800, +0x20c1020c00, +0x20c1060800, +0x4081020400, +0x2041020800, +0x2041060800, +0x4081020400, +0x4081020400, +0x4081020400, +0x20c1060800, +0x4081020400, +0x4081020400, +0x4081030400, +0x60c1020400, +0x1041861800, +0x2041020c00, +0x6081020c00, +0x20c1060800, +0x3041060800, +0x3041860800, +0x1e1c1870f00, +0x4081020400, +0x30c1860800, +0x20c1020c00, +0x6081020c00, +0x2041020800, +0x4081020400, +0x30c1060800, +0x4081020400, +0x20c1020400, +0x2081020400, +0x4081030600, +0x20c1020400, +0x6081020c00, +0x2041820c00, +0x60c1020400, +0x4081020400, +0x6081020400, +0x4081020400, +0x4081020600, +0x2081020400, +0x2041821800, +0x20c1020400, +0x4081020400, +0x4081020400, +0x4081020400, +0x2081020800, +0x2041020800, +0x4081030600, +0x4081020400, +0x2041020800, +0x1041041000, +0x4081820400, +0x3041060800, +0x4081020400, +0x20c1820400, +0x4081020400, +0x6081020400, +0x2041060800, +0x4081020600, +0x60c1860c00, +0x4081830600, +0x4081020600, +0x20c1020400, +0x1041860800, +0x4081020400, +0x20c1020400, +0x20c1060c00, +0x60c1020c00, +0x4081020400, +0x4081020400, +0x4081020400, +0x4081020600, +0xc1020400, +0x4081030600, +0x2041020c00, +0x1061861800, +0x20c1020c00, +0x20c1020800, +0x4081020400, +0x4183830600, +0x4081020600, +0x1041861800, +0x4081020000, +0x20c1020400, +0x20c1020c00, +0x4081020200, +0x4081020600, +0x20c1020400, +0x4081020400, +0x4081020400, +0x20c1060c00, +0x4081020400, +0x2041860800, +0x2081020400, +0x1041861800, +0x40c1020400, +0x40c1020400, +0x2041020c00, +0x20c1860c00, +0x4081020600, +0xc081020400, +0x4081020400, +0x4081020400, +0x2041020400, +0x2041820c00, +0x20c1060800, +0x2041020400, +0x30c1060c00, +0x60c1020400, +0x20c1060800, +0x20c1860800, +0x2041020800, +0x4081020400, +0x20c1020400, +0x20c1020800, +0x20c1860c00, +0x60c1020400, +0x4081020400, +0x20c1060800, +0x20c1020400, +0x3041061800, +0x3041060800, +0x4081020400, +0x4081020400, +0x2041060800, +0x60c1020c00, +0x4081020400, +0x4081020400, +0x2041860800, +0x3041020800, +0x2041020800, +0x2041061800, +0x2041060800, +0x3041041800, +0x4081020400, +0x2041020800, +0x4081020400, +0x4081020400, +0x6081020400, +0x4081020600, +0x20c1020c00, +0x4081020408, +0x4081020400, +0x4081020c00, +0x4081020400, +0x2081020c00, +0x4081020400, +0x2041020400, +0x20c1020400, +0x4081020200, +0x20c1820c00, +0x6081020800, +0x4081020400, +0x4081020600, +0x3041860800, +0x60c1020400, +0x20c1020c00, +0x20c1060c00, +0x20c1020800, +0x4081020400, +0x4081020400, +0x60c1060c00, +0x3041061800, +0x2041860800, +0x60c1020600, +0x3041860800, +0x20c1860c00, +0x60c1020400, +0x4081020400, +0x4081830408, +0x40c1830600, +0x4081020400, +0x60c1020c00, +0x20c1060800, +0x10c1060800, +0x1041861800, +0x4183020600, +0x3041860800, +0x4081020400, +0x20c1060800, +0x4081030600, +0x60c1830600, +0x4081020400, +0x2041020800, +0x20c1060800, +0x2041820800, +0x2041060800, +0x2041020400, +0x4081020600, +0x20c1020408, +0x4081020c00, +0x40c1020400, +0x1041861800, +0x4081020400, +0x2041020c00, +0x81020400, +0x3041060800, +0x4081020200, +0x4081020400, +0x4081020400, +0x4081030200, +0x3041061800, +0x20c1060c00, +0x4081020400, +0x4081020400, +0x4081020c00, +0x4081020400, +0x4081020600, +0x30c3861c00, +0xc181030600, +0x4081020400, +0x4081020400, +0x4081020400, +0x4081030600, +0x20c1820c00, +0x20c1060800, +0x2041020c00, +0x2041020800, +0x4081020400, +0x81020200, +0x4081020400, +0x4081020400, +0x30c1060800, +0x20c1020800, +0x4081020400, +0x3041860800, +0x4081020400, +0x20c1060800, +0x4081020400, +0x60c1820400, +0x4081020600, +0x4081020200, +0x40c1830400, +0x20c1020c00, +0x4081020400, +0x2081060800, +0x2041020400, +0x4081020400, +0x4081020400, +0x2041860c00, +0x40c1020400, +0x20c1020c00, +0x4081020400, +0x4081020400, +0x1061861800, +0x3061861800, +0x60c1820c00, +0x20e1060c00, +0x20c1060800, +0x81020400, +0x8081020600, +0x4081020400, +0x4081020400, +0x4081020c00, +0x40c1820400, +0x4081030400, +0x20c1020400, +0x4081020400, +0x6083020600, +0x4081030600, +0x60c1020c00, +0x81020400, +0x40c1020400, +0x4081020400, +0x4081020400, +0x81020c00, +0x60c1820400, +0x3041860800, +0x4081020400, +0x20c1020c00, +0x2041820c00, +0x60c1020c00, +0x2041060800, +0x30c1860800, +0x2081020400, +0x20c1060c00, +0x4081020400, +0x60c1020400, +0x3041820800, +0x20c1020400, +0x4081020400, +0x4081020400, +0x4081020400, +0x4081030600, +0x4081020400, +0x20c1020c00, +0x1061041000, +0x2041060800, +0x4081020400, +0x6081020400, +0x4081020400, +0x2041020800, +0x1041860800, +0x4081030600, +0x2041020800, +0x4081020400, +0x60c1020400, +0x20c1060800, +0x20c1060800, +0x81020000, +0x2041020400, +0xc1020400, +0x4081020c00, +0x20c1060800, +0x6081020400, +0x60c1020400, +0x2041820c00, +0x20c1020c00, +0x4081830400, +0x3041860800, +0x4081020600, +0x4081020400, +0x6081020400, +0x4081030600, +0x4081020400, +0x20c1020c00, +0x81020400, +0x4081020400, +0x20c1060800, +0x20c1060800, +0x20c1060c00, +0x4081020600, +0x60c1820c00, +0x4081030600, +0x4081020400, +0x2041020400, +0x20c1020400, +0x20c1020800, +0x4081030600, +0x4081020400, +0x4081020600, +0x2081020400, +0x30c1060800, +0x3041860800, +0x4081020600, +0x1041861000, +0x4081020200, +0x20c1060800, +0x2041020800, +0x20c1060800, +0x4081020600, +0x4081020400, +0x20c1820800, +0x4081020400, +0x4081020400, +0x3041861800, +0x4081830600, +0x4081020400, +0x20c1020800, +0x81020400, +0xc0c1820400, +0x4081020400, +0x4081020400, +0x3041860800, +0x60c1020400, +0x4081020400, +0x60c1820c00, +0x2041060800, +0x4081020400, +0x60c1020c00, +0x4081820400, +0x4081020400, +0x4081020400, +0x2041020800, +0x2041060800, +0x20c1060800, +0x4081020400, +0x4081020400, +0x20c1060800, +0x20c1060c00, +0x20c1020c00, +0x4081020400, +0x4081020400, +0x60c1020400, +0x2041060800, +0x81020400, +0x4081020400, +0x20c1020800, +0x1041061800, +0x20c1060800, +0x4081020400, +0x60c1020400, +0x2041060c00, +0x4081020400, +0x4081030600, +0x4081020400, +0x20c1020400, +0x4081020400, +0x40c1020400, +0x60c1020400, +0x20c1020400, +0x4081020600, +0x20c1020c00, +0x4081020400, +0x204181078c00, +0x60c1820400, +0x2041060c00, +0x20c1060400, +0x4081020400, +0x3041861800, +0x6081020400, +0x20c1060800, +0x4081020400, +0x4081020400, +0x4081020200, +0x81020400, +0x2041020800, +0x4081020400, +0x20c1020c00, +0x4081020400, +0x4081020400, +0x3041861800, +0x4081020400, +0x3041060800, +0x6081020400, +0x30c3860800, +0x20c1020800, +0x60c1020c00, +0x4081020400, +0x2041020c00, +0x4081020400, +0x20c1020c00, +0x4081020400, +0x60c1860400, +0x2081020400, +0x20c1020400, +0x20c1020800, +0x1041060800, +0x20c1020400, +0x20c1060800, +0x4081020400, +0x2081020400, +0x4081830600, +0x4081020400, +0x2041060810, +0xc1020400, +0x41c1830400, +0x2041860800, +0x20c1020c00, +0x20c1820c00, +0x8181030600, +0x2041060800, +0x4081020400, +0x1041860800, +0x4081020400, +0x40c1830600, +0x4181810c00, +0x4081020400, +0x60c1060800, +0x81020400, +0x20c1020400, +0x81020400, +0x4081020400, +0x4081020400, +0x4081020400, +0x20c1820c00, +0x20c1060800, +0x4081020600, +0x2041020c00, +0x4083020600, +0x2041060800, +0x40c1020c00, +0x4081020400, +0xc1020400, +0x2041060800, +0x40c1820400, +0x4081020400, +0x40c1020400, +0x4081020400, +0x6081060b00, +0x4081020400, +0x4081020c00, +0x4081020400, +0x40c3830600, +0x2041860800, +0x2081020800, +0x4081020600, +0x2001020400, +0xc081020600, +0x4081020400, +0x4081030400, +0xc1020400, +0x20c1060c00, +0x4081020400, +0x20c1060c00, +0x40c1020400, +0x20c1020400, +0x60c1820c00, +0x40c1830400, +0x20c1820400, +0x81020400, +0x2041060800, +0x6081020400, +0x2041020800, +0x8081020200, +0xc081020600, +0x2081020400, +0x204181878e00, +0xc081030600, +0x6081020400, +0x4081020400, +0x4081020c00, +0x1061861800, +0x20c1060800, +0x4081020400, +0x4081020600, +0x20c1020c00, +0x20c1020800, +0x4081020400, +0x3041860c00, +0x4081020400, +0x4181020400, +0x20c1820c00, +0x20c1020800, +0x30c1060800, +0x4081030600, +0x3041860800, +0x2081020400, +0x20c1860800, +0x4081020400, +0x2041020400, +0x20c1020400, +0x4081020600, +0x60c1820c00, +0x2081020400, +0x40c1060c00, +0x4081020400, +0x6081020400, +0x20c1060400, +0x40c1830600, +0x60c3830600, +0xc081830e00, +0x2041060800, +0x4081020400, +0x60c1020c00, +0x4081020600, +0x2041060800, +0x3041860800, +0x4081020400, +0x8081030200, +0xc1020800, +0x1061861800, +0x40c1860c00, +0x20c1060800, +0x2081020800, +0x4081020000, +0x30c3860c00, +0x60c3870600, +0x30c3861800, +0x20c1020400, +0x81020400, +0x4081020400, +0x4081020400, +0x40c1020400, +0x2041060800, +0x4081020400, +0x20c1860800, +0x4081020400, +0x2081020400, +0x2041060800, +0x4081030200, +0x4081030600, +0x20c1020400, +0x61c3820e00, +0x4081020400, +0x30c1861800, +0x4081020400, +0x20c1020400, +0x4081020400, +0x20c1020400, +0x4181830600, +0x40c1820600, +0x20c1020c00, +0x4081020400, +0x4081020200, +0x4081020400, +0x4081020400, +0x4081020600, +0x60c3870c00, +0x2041060800, +0x2041020000, +0x2041860800, +0x40c1020400, +0x40c1830600, +0x3041860800, +0x20c1060800, +0x60c1060c00, +0x4081020400, +0x6081020400, +0x4081020600, +0x2041060800, +0x20c1020800, +0x40c1020400, +0x4081030200, +0x20c1860c00, +0x4081020200, +0x4081830600, +0x4081020400, +0x4081020400, +0x20c1020400, +0x20c1060800, +0x3041060800, +0x20c1020c00, +0x40c1020600, +0x2041020c00, +0x2041020400, +0x20c1060c00, +0x20c1860c00, +0x4081020400, +0x4081020600, +0x4081020600, +0x4081020400, +0x4081020200, +0x4081020400, +0x20c1020c00, +0x40c1820400, +0x60c1020400, +0x20c1020400, +0x20c1060400, +0x4081020400, +0x60c1020c00, +0x4081020400, +0x4081020400, +0x2041860800, +0x1041041000, +0x3041860800, +0xc1c1830600, +0x2041060800, +0x60c1830c00, +0x20c1020400, +0x4081820400, +0x6081020e00, +0x4081020400, +0x2041060800, +0x60c1830600, +0x2041020800, +0x20c1020800, +0x4183830300, +0x2041020800, +0x4081020400, +0x2041060c00, +0x4081020600, +0x4081030400, +0x3041061800, +0x4081020400, +0x2041000400, +0xc1020400, +0x4081030600, +0x4081030200, +0x40c1020400, +0x1041061800, +0x40c1020400, +0x4081020400, +0x3041860800, +0x60c1020c00, +0xc181030600, +0x4081020400, +0x2041060800, +0x40c1830600, +0x2041060800, +0x40c1020c00, +0x30c1841800, +0xc081030600, +0x20c1060800, +0x40c1020c00, +0x4081020400, +0x4081020600, +0x60c1830600, +0x2041020800, +0x2041060800, +0x2041020800, +0x60c1020400, +0x2041020800, +0x20c1060c00, +0x20c1060c00, +0x20c1820400, +0x6081020400, +0x4081020200, +0x4081020400, +0x20c1020c00, +0x20c1020c00, +0x4081020400, +0x4081020400, +0x60c1060800, +0xc081830600, +0x4081020200, +0x2081020400, +0x20c1020c00, +0x2041020c00, +0x41c1820400, +0x2041060800, +0x20c1020400, +0x60c1020c00, +0x4081020400, +0x20c1020c00, +0x2041860800, +0x2041060800, +0x2041020c00, +0x20c1020c00, +0x4081020400, +0x2041060800, +0x4081020400, +0x20c1060c00, +0x20c1060800, +0x2081020400, +0x20c1860c00, +0x4081020400, +0x4081020400, +0x204081070c00, +0x20c1860c00, +0x2041860c00, +0x20c1020c00, +0x4081030400, +0x2041060800, +0x20c1060c00, +0x2041860800, +0x60c1020400, +0x20c1060800, +0x4081020400, +0x4081020400, +0x40c1820400, +0x20c1020400, +0x4081020c00, +0x4081020600, +0x2041020c00, +0x20c1020400, +0x6081020c00, +0x40c1830600, +0x20c1060800, +0x1041061800, +0x1041860800, +0x4081020400, +0x20c1020400, +0x4081020400, +0x20c1060800, +0x4081020200, +0x2083020400, +0x20c1020400, +0x4081020400, +0x20c1020400, +0x30c1860c10, +0x4081020400, +0xc081030600, +0x20c1020400, +0x2081020800, +0x2041020800, +0x4081020400, +0x4081020400, +0x60c1060c00, +0x4081020400, +0x4081830400, +0x1041020800, +0x4081020400, +0x81020400, +0x81020400, +0x4081020600, +0x4081020400, +0x20c1020400, +0x20c1020400, +0x2081060c00, +0x4081020400, +0xc081030600, +0x2041860c00, +0x60c1860c00, +0x40c3020400, +0x4081020400, +0x6081020600, +0x4081020400, +0x4081020400, +0x1041861800, +0x60c1820400, +0x8181030600, +0x3041860800, +0x1061861800, +0x2041020c00, +0x20c1020c00, +0x20c1020c00, +0x4081020400, +0x20c1020800, +0x20c1020800, +0x60c1060c00, +0x20c1020c00, +0x20c1060c00, +0x20c1020c00, +0x2041060800, +0x60c1020400, +0x4081020400, +0x20c1020c00, +0x40c1830400, +0x204081070c00, +0x20c1060c00, +0x4081020400, +0x2081020800, +0x4081020600, +0x4081020600, +0x4081020400, +0x40c1020c00, +0x20c1820400, +0x20c1020800, +0x30c1060800, +0x4081020400, +0x20c1060800, +0x4081020600, +0x2041020c00, +0x4081020400, +0x20c1020c00, +0x20c1020400, +0x6081020c00, +0xc081020600, +0x2041860800, +0x4081020400, +0x81020400, +0x4081020600, +0x4081020400, +0x4081830600, +0x2041060800, +0x2041020400, +0x4081020600, +0x20c1020c00, +0x2081020400, +0x20c1020800, +0x20c1060800, +0x2041060800, +0x2041020800, +0x40c1020400, +0x4081020400, +0x20c1060800, +0x4081020400, +0x20c1060800, +0x4081020400, +0x4081020400, +0x60c1020400, +0x81020400, +0x4081030600, +0x2001040800, +0x20c1060800, +0x60c1020c00, +0x40c1020400, +0x2081060c00, +0x2041060800, +0x20c1820c00, +0x4081020400, +0x20c1020800, +0x20c1020400, +0x2041020c00, +0x2081060c00, +0x6041020c00, +0x2041020800, +0x40c1820400, +0x20c1020800, +0xc1020400, +0x60c1020408, +0x1041041800, +0x40c1020400, +0x20c1060c00, +0x20c1020c00, +0x2041060800, +0x4081020400, +0x4081030600, +0x4081830600, +0x40c1870400, +0x20c1020800, +0x40c1830400, +0x60c3060e00, +0x60c1020c00, +0x4081830400, +0x2041020800, +0x4081020400, +0x2081020400, +0x20c1020c00, +0x2041060800, +0x4081020400, +0x2041860800, +0x20c1020800, +0x40c1830600, +0x2041020800, +0x20c1020400, +0x20c1060800, +0x4081020400, +0x4081020600, +0x2041020c00, +0x20c1020c00, +0x60c3860c00, +0x4081830600, +0x60c1060c00, +0x2041060800, +0x1041860800, +0x4081020400, +0x4081020600, +0x3041861800, +0x20c1020c00, +0x4081020400, +0x2081020c00, +0x4081020400, +0x4081020400, +0x20c1820c00, +0x20c1860800, +0x20c1060800, +0x60c1820400, +0x4081020400, +0x40c1830600, +0x4081830600, +0x20c1020c00, +0x4081020400, +0x20c1060800, +0x1041861800, +0x20c1060800, +0xc081030200, +0x20c1860c00, +0x3041060800, +0x60c1820400, +0x4081020400, +0x30c1060800, +0x2041020800, +0x40c1020400, +0x3041060800, +0x20c1020400, +0x20c1020400, +0x20c1020c00, +0x20c1020400, diff --git a/samples/digitrec/digitrec/data/training_set_2.dat b/samples/digitrec/digitrec/data/training_set_2.dat new file mode 100644 index 000000000..8411863c8 --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_2.dat @@ -0,0 +1,1800 @@ +0xc0c1020f00, +0xe1c3870a00, +0x71608e1e00, +0xc3c081070c, +0xe340878d00, +0x6020870e00, +0xe14087ca00, +0x70a0cf1800, +0x6041870800, +0xe0c38f8300, +0xe0618f1b00, +0x70a08f1800, +0xe161cf9c00, +0xe041820f00, +0xf0430e1700, +0x70238f1800, +0xe041078e00, +0xe060871f00, +0x20e0438f8300, +0x21c08f9000, +0xf320860e18, +0xc1868000, +0xe040870e00, +0xf060871b00, +0xc1c187c800, +0x60608f1900, +0xc0c0860700, +0x6040870e00, +0x2020438b0800, +0xe0474b8e00, +0xc081078e00, +0x71c08f9800, +0x43e38f8900, +0x21e0820830, +0xc08107c000, +0xe1c38f8e00, +0x6041821e00, +0xc041830900, +0xe140870c00, +0x6361c59c00, +0x1c043c78700, +0xe060861f00, +0x60c1cf8e00, +0x70a0cf0e00, +0xe041830e00, +0x70c30f9a00, +0x3c4830700, +0x6041c71c00, +0x6041860e00, +0x71e08f1a00, +0x61c304ce00, +0xe140878c00, +0xe0c1060a00, +0x70208f1800, +0x7021871c00, +0xc1c36f8000, +0xc041870c00, +0x2020410f8000, +0x60c10f8000, +0xe24187cc00, +0x40c083078f00, +0xe0428f9800, +0x1c0c18e8f00, +0x41c040878c00, +0xe343870d00, +0xe041868e00, +0x6140860e00, +0xc0c30fc800, +0xe0438b1e00, +0x60e08f9800, +0x60c78f8800, +0x6063cf9900, +0x4023c98c00, +0xe040871100, +0x21a1cf9000, +0x71a0820c10, +0x61c1078c00, +0x61c1820e00, +0x43c5028600, +0xe043870b00, +0xe040870c00, +0xe041840f00, +0x1c78b8000, +0xc0c38fc300, +0x7020cf9800, +0xe060870e00, +0x40c1870a00, +0xe040870d00, +0xe241878f00, +0xf123cf9800, +0xc141078000, +0xe041879e00, +0xf0618f9e00, +0x6081020600, +0xc0c1078800, +0x1060438f1a00, +0xe060831c00, +0xe14182080c, +0xc0c1878e00, +0x70e08f1800, +0x40f0274f9f00, +0xe0418f8880, +0xe0c1040f00, +0x7041048e00, +0xc081020700, +0xe1c0878c00, +0xc041860e00, +0xe141878d80, +0x60c38f9800, +0x6041060e00, +0x61c1cfdc00, +0xf0618c1e00, +0x1e1c187ce00, +0x40c1020c00, +0x61c0870e00, +0x6140cf1800, +0x1c240870f00, +0x41c3cf0800, +0x107041059f00, +0x3063cf1800, +0xe344820f00, +0x1c081020700, +0x4143870d00, +0xe163870a00, +0x206040821e00, +0x20e0c1060f00, +0x61408f1900, +0x41c081878f00, +0x2060438f0c00, +0x21e1c18f9f00, +0x71e08f9800, +0xe040878900, +0xe0438d8d00, +0x1f061869f00, +0x2060c3078f00, +0x1020418f0000, +0xe041060c00, +0x71218f1a00, +0x81e70000, +0xe040871b00, +0x1e1e1879f00, +0xe1c38f8d80, +0x61e1c71c00, +0x6141871c00, +0xc1c0820f00, +0xe141860700, +0xe141820f00, +0x1c040878400, +0x381078600, +0x60418f9400, +0x70e28f1e00, +0x4080830400, +0x1c0c1078400, +0xe0c1860f00, +0x7023ce1800, +0x61438d0e00, +0xe0c10e8f00, +0xe0c3cf8800, +0x20f062cf9d00, +0x1e041020f00, +0x40c041871900, +0x2020c1070c00, +0x60e78f9b00, +0x6041060600, +0xc0c1060700, +0x180c0878e00, +0x60a0cf1a00, +0xe140870b00, +0x6161871c00, +0xe141860f00, +0x60428b1c00, +0x60c38b8d00, +0x20c3ee0800, +0x4040870200, +0x1c3c0cf8c00, +0x1c083078700, +0xe3e0870f00, +0x60478f8100, +0x6060cf1c00, +0x6040860e00, +0x71e38f1b00, +0x181ce0000, +0x61c0871e00, +0xe040861e00, +0xa041870800, +0xe1c3cfdc00, +0xf0238f1000, +0xe143878f80, +0xe0c3060e00, +0x70a3cf1800, +0x6123c99c00, +0x61c18f8000, +0xe041870b00, +0xe0408f0b00, +0xe0c1878f00, +0x40410f8800, +0x71e1041e00, +0x60678f0200, +0xe0c38fc300, +0x71e1cf9c00, +0x2060478b8800, +0x80428f8000, +0x20e0428f8800, +0xe060cf9800, +0x1e34187cf00, +0xe041071000, +0x40c18f9000, +0x61e18f9000, +0x20e041840f00, +0x404043870000, +0x7141061600, +0xc0c0860300, +0x2063cf9c00, +0xc0c0820e00, +0xe3c1878f00, +0xc1438a8800, +0x70228e0e00, +0xe021879900, +0x40c0c0878c00, +0xe1c1069e00, +0xe041040e00, +0xe1c1078f00, +0xe0638f1c00, +0xf0234f9d00, +0xc240870f00, +0x2060438f0a00, +0x20e041070b00, +0xe041038e00, +0x206040870c00, +0xe0e7cf8100, +0x30238f0a00, +0x1070618c1f00, +0x61a04f9c00, +0xe041e78800, +0x43c1878800, +0x70a3cf1900, +0xc1c38fc800, +0xe040879c00, +0xc040870c00, +0x103808000, +0xe0438b0f00, +0x7020cf0a00, +0xe341cf9c00, +0xf022cf0e00, +0x40e0618f9700, +0xc081060780, +0xe143850f00, +0x1020c1078800, +0x61608f9c00, +0xc0c0c78c00, +0x2043860800, +0xc0c1878c00, +0x20e041060f00, +0xe041878c00, +0x183c7c000, +0x206043cfcc00, +0x1c0c1e79c00, +0xe041079f00, +0xf060871b00, +0x60438f9800, +0x6041070a00, +0xe040870e00, +0xe1608f1f00, +0x1c18f8000, +0x183278000, +0x8383c78000, +0x60478f1100, +0xe1c18f1d00, +0x20e34b0800, +0xe0c14f9000, +0x6361870c00, +0x70608f1800, +0x6041079a00, +0x1c0868000, +0x6060870e00, +0x6020860e00, +0xe0438f1800, +0x71e0871900, +0x60609f0000, +0xe0428f0e00, +0x7063cf1e00, +0xc0c1070f00, +0x20e041878800, +0xc0838f8080, +0x41c30f8000, +0x181c187cf00, +0x70618f1000, +0xc3c187c300, +0xe341870b00, +0x20e041040e00, +0xe040870f00, +0x2060438f8c00, +0x70e38f1a00, +0xe3e0cf1f00, +0xe0438f8b00, +0x6060870e00, +0x6140830400, +0xc3c0c78c00, +0x60208f1800, +0x70a3cf9800, +0xe141858c00, +0xc1c3c78c00, +0xf0410c1f00, +0xe0c30f8f00, +0x180c3878000, +0x41c1879900, +0x6040860600, +0x1e0c1020f00, +0x21e0cf9800, +0x302063ce1800, +0xe061cf9c00, +0xe3c18f9d00, +0xe060c70c00, +0x71e0861f00, +0x70a18f1000, +0x182078000, +0x31a04f0c00, +0x307023cf1a00, +0xe141860e08, +0x1e2c1878f00, +0x6041070a00, +0x60438b9800, +0x60430f0300, +0xc1c18f1900, +0x61c18f8000, +0xe040830c00, +0xe1438f9d00, +0xc0418fc800, +0x60438f9800, +0x6060c10f8f00, +0xc081020600, +0xe0c3079980, +0xe3e0c61e0c, +0x6040879800, +0xe043870d00, +0x60e2c70c00, +0x6040860a00, +0xe060cf1f00, +0xf0608f1a00, +0x30278f0000, +0x60c1068e00, +0x61c18f8e00, +0xc041878900, +0xe041479c00, +0xc04087cc00, +0x181670000, +0x1c043c70e00, +0x1c040878800, +0x1c0c3878800, +0x61c1871e00, +0xe0418f1100, +0x1e040820f08, +0xc3c187ce00, +0xf1e14f9c00, +0xe1a24f1e00, +0x1c081038c00, +0x206041860e00, +0xe141878f00, +0x1c38f8000, +0x1e3c0838e00, +0x20a041041e00, +0x70418f8900, +0x3020438f0a00, +0x1c040878e00, +0x40e0c1878f00, +0x21e38f8000, +0x387038200, +0xf1e38f9f00, +0xe041c78e00, +0x70618f0600, +0xc0c3830d00, +0xe141868f00, +0xe1438f9d00, +0x40c1870a00, +0x1e0438f8d00, +0x20f0208f9d00, +0x61c18f9900, +0x63e4c79c00, +0xe140830e00, +0x8381c78000, +0x1c040850c00, +0x60438e8000, +0x61c1870c00, +0xc1c307c700, +0xe0c107cc00, +0xe1c1839f00, +0x1e040871800, +0x61c0870e00, +0xe041871e00, +0xc0408f8000, +0xe0208f9800, +0xe3c3871f00, +0x6161878c00, +0x808107c000, +0x20c08307cf00, +0xe041860e00, +0x21e261cf9d00, +0x70265f9c00, +0x6041871800, +0x1c0c30f8f00, +0xe361cf9c00, +0xf1e38b9d00, +0x181cf0000, +0x60a0870c00, +0x20e0608f0d00, +0xe3e0879c00, +0xc0c1830e00, +0x30228b1600, +0x60c3079400, +0x60c38f9800, +0x71e18f1c00, +0xe062cf8e00, +0xc1c1878c00, +0xc0c1078f00, +0x1c040830c00, +0xc0c1078000, +0x1c0418f8900, +0x71a0c71c00, +0xc081060700, +0x30a08f9800, +0xc041020e00, +0xc041878c00, +0x6041870a00, +0xf1a3cf9c00, +0x20e0c1078c00, +0x206040871c00, +0x41c08f1e00, +0xc0c1870f00, +0x60c10f3200, +0x61c08f0e00, +0xc143020700, +0xc1c1878c00, +0x41e041878e00, +0x70208f1200, +0x6063841e00, +0x1e0e1860f00, +0xe041020e00, +0xe1408f0c00, +0x61e0cf1b00, +0x60c3cf8000, +0x71e1861e00, +0x4183078000, +0xc1834f8000, +0x60638f1a00, +0xf0224f1e00, +0xe1e1c70e00, +0xe140831f00, +0x40c0c1878c00, +0x41c041879b00, +0x6140879880, +0x6040870c00, +0xe1c0879800, +0xe041860e00, +0x20e0438b0900, +0x80830f8000, +0xc360c39c00, +0xe243cd8e00, +0xe160878c00, +0x1c040870e00, +0xc081048e00, +0x1e0204f1700, +0x41c3820600, +0xe1e38f9f00, +0xe341068e00, +0x20e040821e00, +0x61c0870e00, +0xc040870e00, +0x60c08f0a00, +0xe0618f9d00, +0x40c040870e00, +0xe041061b00, +0x6060871e00, +0x1070618f9800, +0xe041870a00, +0xe163cf9c00, +0x70208f1a00, +0x404081078000, +0xe0c1861e00, +0xe043870f00, +0xe04f1000, +0x1c0c0838c00, +0x60418f1800, +0xe1418f8e00, +0xc041820f00, +0xe341048e00, +0x7020cf9800, +0xe041878800, +0x20e0c1870e00, +0xf022cf9800, +0xc3c1870f00, +0xc040c70600, +0x61c1279800, +0xc1c1c70c10, +0x60e067cf8c00, +0x1e020cf1c00, +0x1e161870c00, +0xe1238b9800, +0xc08f8000, +0x40e0c1079c00, +0x1c08f8000, +0x1071e18f1a00, +0xe1e1cf9900, +0x20e040870e00, +0xe140878e00, +0xe0638f9900, +0x1c040870300, +0xc0c0870a00, +0xc0c107c800, +0xe041870c00, +0xe1408f1e00, +0xe0274f8000, +0x41410fc000, +0x20e041061f00, +0x83c1878100, +0xe1c12f9a00, +0x60418f8100, +0x60c18f9c00, +0xe0408f9b00, +0xc28107cc00, +0xc08107cc00, +0x41c6830f00, +0x1e041860f00, +0x2041870800, +0xf061878e00, +0x60678f8000, +0xe0c1068f00, +0xe1408f8800, +0x61c0878c00, +0x1c0c1870b00, +0xe040870e00, +0x6041841f00, +0x20e043878c00, +0x20c18f1800, +0x40e0418f8300, +0x3060418f1800, +0x20e040871b80, +0x1c081038e00, +0xe041870f00, +0xe161861e00, +0xc0c1020600, +0x63e0cf9800, +0x6023cb9c00, +0x70a0cf1e00, +0xc1c0830e00, +0xc141830404, +0x7020871800, +0xe0414f9600, +0xc0404f9800, +0x206040870200, +0x1c3c38f8d80, +0xe1c1070a00, +0xf3a1cf9c00, +0xe121cf9c00, +0x6040860400, +0xe041078e00, +0x20c38f9000, +0x40c1870800, +0xc081078000, +0x61c0830c00, +0xe0438f1900, +0x70e18e1600, +0xe04087ce00, +0x6140820e00, +0x3c0cf0800, +0x6043cf1c00, +0xe042870c00, +0x1070238f9c00, +0xe041870e00, +0xe0428f9e00, +0x1e0c1060f00, +0xc081478000, +0xc3c0838c00, +0x6040871600, +0xc1c1838e00, +0x61c18f0a00, +0xe041040f00, +0x61c1861e00, +0x1f061868f00, +0x61a04f1e10, +0x60e78f8800, +0xe043cd8e00, +0x30e08f1800, +0x41e063478f00, +0x71e78f0100, +0x4140870c00, +0x1e041878f00, +0xe1418e9e00, +0x51418f1a00, +0xe041060b00, +0x7041071300, +0xe1618f9800, +0x60a38f9100, +0xe1c0870f00, +0x21c38f8000, +0x61c4878c00, +0x20e041070e00, +0xc040870e00, +0xe061cf9800, +0xc040870c00, +0x3027931800, +0xe040870e00, +0x7061871a00, +0x20e063cf9c00, +0xf1e0c71e00, +0xc043ef1800, +0x41c040878f00, +0x20e0438f8c00, +0x60e0c10f8f00, +0x70238f1a00, +0x418307c100, +0xe0438f8d00, +0xc3878000, +0x20e063c91e00, +0xe143049f00, +0xe0408f9800, +0xc08107c000, +0x60238f0c00, +0x3022cf9800, +0xc040870e00, +0x20e0c10e8700, +0x2060418f9800, +0x2040c1870400, +0xe141879e00, +0xc140870f00, +0x60438f9000, +0x60c0871e00, +0x202041870e00, +0x60638b1a00, +0x21e0430f8f00, +0x71a0cf1f00, +0x1e141870f00, +0x60c1020600, +0xe3e4cf8f00, +0x1e060c79c00, +0xe063c78c00, +0xe041878e00, +0xe1c0830f00, +0x1c0424f8e00, +0x60638f8100, +0xe160cf0d00, +0xe160831d00, +0xe0c0870e00, +0x1c0c1060700, +0x6040860e00, +0x60438d8c00, +0x70638f1b00, +0x63c0878900, +0x70678d0600, +0xe260830e00, +0x20e0c3878f00, +0xc0c30f0b00, +0x41c1cf0800, +0x70a1cf1a00, +0x41c143c78f00, +0xe041060e00, +0xe220478c00, +0xc041048e00, +0x20c081078f00, +0x41c0c1878e00, +0xe241868f00, +0xe24106df00, +0xe041079c00, +0xc0418f8000, +0xc240820f00, +0x30e28f1e00, +0xe041060f00, +0x18180830400, +0xc080870600, +0x1e041858c00, +0x60408f1e00, +0x60e1cf1800, +0x1c0c1870f00, +0x60638f0300, +0x20f020cf1f00, +0x2063ce0800, +0x6142870e00, +0x70e38f1e00, +0xc041870300, +0x302026cf0900, +0x1e0c1060f00, +0x61e1870d00, +0x7027cb1e00, +0x41e18f8000, +0x4043870c00, +0x20e041868e00, +0x70238f1800, +0x1e0438b9900, +0xe1c1879f00, +0x1e041058e00, +0x30e3cf1800, +0x70e08e1e00, +0x7020cf1800, +0xc1834f8000, +0x1e020c79e00, +0x4041870c00, +0xe3e08f9b00, +0xe0c1060f00, +0x70618f1a00, +0x40c1820e00, +0xe0418f9800, +0x1030278b1800, +0xe3c1878f00, +0x60428b1c00, +0xe14107c800, +0xe041839c00, +0xe260c78c00, +0x1c082078000, +0x6041078800, +0x6041070200, +0xe341878f00, +0x30238f1100, +0x6043870c00, +0xc1c387c800, +0x18081c70800, +0x2060408f9800, +0x1c040878c00, +0xc041878f00, +0xc0408f0900, +0x30649e0400, +0xe061879e00, +0xc041cf8e00, +0x4080038000, +0x71c1879c00, +0x60418f9900, +0xc081020e00, +0x7027cf9800, +0xe340878e00, +0x6041861e00, +0x406040878800, +0x71a0871e00, +0xe0608f9900, +0x70208f1a00, +0x7026cf9800, +0xf022cf9c00, +0x30608e1200, +0xe140c78f00, +0xc0c38fc800, +0xe1408f9900, +0xe040820e00, +0xe143878c00, +0x6043858c00, +0x71e38f1000, +0xe341878f80, +0x6041871e00, +0x2041860800, +0x6041020e00, +0xe040861f00, +0xc1c1870900, +0x61c1830700, +0xe027cf9c00, +0x60618f8100, +0x60428b0e00, +0x60478f8000, +0x60410f8800, +0x7120478e00, +0xc18387c000, +0x60438f9800, +0xe041878c00, +0x1e040820f00, +0xc0c30fce00, +0x7041071c00, +0xe040820e00, +0x71e1cf9c00, +0x4041860800, +0x20e027cf8d00, +0xe7cf0000, +0x6040871a00, +0xe140810f00, +0xe0410e1700, +0x1e0438f9d00, +0x6040870c00, +0x60418f9f00, +0xf120871e00, +0x6043878800, +0x1f0204f9f00, +0x61e0cf9800, +0x1c0c1878f80, +0x61e0871e00, +0x3060618f9800, +0x71a0cf1a00, +0xe040860e00, +0xe041878c00, +0x18380878400, +0xc3c3078000, +0xe1e18f8e00, +0x1c1c1820700, +0xc3c0838f00, +0xc1cf0000, +0x1c0c1060700, +0xe063cf1e00, +0x41c1070100, +0x70e0861e00, +0x6341060700, +0x1c041c50c00, +0x6041870800, +0xe041041f00, +0xf041820e00, +0x6040870800, +0xe043899c00, +0xe241028e00, +0x60c3ce1800, +0x2043ca8800, +0xc38107c700, +0x7020cf9800, +0x4081870c00, +0x2020c1078800, +0x3023cf9c00, +0xf060cf9e00, +0x70208f9800, +0xe043878c00, +0x20e041870900, +0x60618f1200, +0xe3e0cf1d00, +0x71a24f1c00, +0x1e0c3048f00, +0x38307c200, +0x1e041831900, +0xe140878c00, +0xe0e18f9c00, +0x306020cf1c00, +0x107043879800, +0x6041000e00, +0x8381078100, +0xe041079800, +0x6041070e00, +0xe344830700, +0x20e041871b00, +0x1c3c1878700, +0xc1c1870e00, +0x7041871c00, +0xc3ce0000, +0x1c34f8000, +0x60c1cf9600, +0x40830fc000, +0xe1418f9d80, +0xc0c1020700, +0x60264f8800, +0x60c78f8000, +0x1f221cf9f00, +0xe041840f00, +0xe060870a00, +0x71e1851e00, +0xf023cf9800, +0xe040820f00, +0xe1668b8c00, +0x1e040878800, +0xe1c1878c00, +0xc040830e00, +0x6140879a00, +0x60408f1800, +0x60c30f8000, +0x60c38f1a00, +0xe160871c00, +0x70208f1a00, +0x6040861e00, +0x30e18f1000, +0x70238f1e00, +0xf0430e1f00, +0x20e1c30c0f00, +0x2070208f1100, +0x40478d8000, +0xe3c08f9900, +0x7020cf1c00, +0xe041020f00, +0xe0c3078f00, +0x6041040e00, +0x6040870c00, +0xe041870b00, +0x60438b9800, +0xe0418f9e00, +0xc341860f00, +0x1e060820f00, +0x20e2cf0200, +0x207022cf1b00, +0xe3cf1000, +0x10e0c3048f00, +0x1c0c0878800, +0x70234f1e00, +0x202041058f00, +0xe063c98e00, +0x1e041c70c00, +0x61438f8900, +0xf1e1ce1c00, +0xe041820e00, +0xe2438d8d00, +0x202061860c00, +0x60e38f1a00, +0xc081020f00, +0x4040870800, +0xe041058c00, +0x7020871800, +0x40c0c1878f00, +0xe0618f1a00, +0xe041870c00, +0x6160870d00, +0x181c107cc00, +0x1070228f1b00, +0xe041078c00, +0x70238f9800, +0xe0c1070e00, +0xe1438f0f00, +0x60c1871800, +0x70418e1200, +0x40c0c32fc700, +0x81c60000, +0x41c1079000, +0x1e041871f00, +0x7020871a00, +0x61c1071e00, +0xe041070e00, +0xf020c71e00, +0x6041871c00, +0x71e1049f00, +0x1c243cd8c00, +0x7027db1c00, +0xe0410f9900, +0x6043878c00, +0xf061861c00, +0xe0234b1d00, +0xe0c1879900, +0x60428f8000, +0x20c0860c00, +0xe063cfcc00, +0xf1e1860f00, +0x60408f9800, +0x20e3cf8800, +0x60238f1200, +0xe0c18f9e00, +0x73e1879e00, +0x6041870d00, +0x3060410e1700, +0xe062cf9c00, +0x6041061300, +0xf1e7cb9f00, +0xf220431e00, +0x20e0c3040f00, +0x6041060e00, +0xc241860700, +0xe341860f00, +0x61c1870c00, +0xf061871c00, +0xe041060f00, +0x206041878c00, +0xe340879f00, +0x70c1879800, +0xc0c3878d00, +0x3c0838100, +0xe060871b00, +0xc1408f8c80, +0x20e1c1868f00, +0x1c040c10e00, +0xe1c1860700, +0x6040870700, +0x20e143c98e00, +0x6140878800, +0x381878000, +0x7020cf1e00, +0x6083060700, +0xe043870f00, +0x60408f8c00, +0xe0438f8000, +0x2060418f1b00, +0x6041860e00, +0xc081820700, +0x60e1850c00, +0xf0618f1b00, +0x6041871e00, +0x1e041840f00, +0x20e041058e00, +0xe0c38f1b00, +0xe040820e00, +0xe0c1078e00, +0xf061861e00, +0xc04082040c, +0xe041040f00, +0xf0618f1e00, +0x1e041860f00, +0x70e08f1a00, +0xe0204f1e00, +0xc0c1870c00, +0x21f0204f9f00, +0x1e060860e00, +0xc0c18f8000, +0x70639f9200, +0x60438f9800, +0x30628e1800, +0x60418f9800, +0x6142870c00, +0xc040871400, +0xe143c50e00, +0xe040879800, +0x6020628b0d00, +0xe140810e00, +0x1c2c0870700, +0x1c081878c00, +0x6043850e00, +0x1c0c0838900, +0x60438f0e00, +0xe0438f8c00, +0x71e1869e00, +0xe041870e00, +0x70e1879c00, +0x70e18f1200, +0x4081870800, +0x1e060cf9c00, +0x20e0608f0e00, +0x60c3ef8c00, +0xf141861f00, +0x61e3cf9800, +0xe1e6830d00, +0xe041860f00, +0x1c0c1020700, +0xe0408f8800, +0x41c0c1038f00, +0x30a28f1a00, +0xe0c30f8f00, +0x20e0418f1f00, +0xe3c1860f08, +0x6041820e00, +0x70e0871800, +0x70608f9c00, +0x30618f1800, +0x6060c30f8f00, +0x2060438f8800, +0x20e061879f00, +0x20e021cf1c00, +0x3c0828400, +0x30e08e1c00, +0x1c3c0c28600, +0x3c4838400, +0xc38347c000, +0xe0c2060b00, +0x70e0861e00, +0x1e043060f00, +0xe041079c00, +0x21c08f9800, +0x1808107c000, +0x2043070100, +0x60c1870e00, +0xe341068f00, +0x60418f9e00, +0x20478f8000, +0x1070a08f1b00, +0x61410f8100, +0x70608f1b00, +0x61438f8800, +0x30e3861e00, +0x6040870e00, +0xe1408f0f00, +0xe1c3878f00, +0x70e18f1b00, +0x70438b1a00, +0x60438f0e00, +0xe041861e00, +0x838107c000, +0x7020871c00, +0x3e0cf8800, +0x70638f1200, +0xe041860e00, +0xe0238b0a00, +0x60c1860600, +0x6040870c00, +0x31e08f1800, +0x1070408f1c00, +0x20e141cf9c00, +0xe140820e00, +0x181ce8000, +0xe060871f00, +0x1070678f8d00, +0x1c0c1020f00, +0x6140870e00, +0x70c1869800, +0x70208e1e00, +0x1c0c187cc00, +0xe0c1870e00, +0x70a1cf9c00, +0x2041070800, +0xe021870c00, +0x70a0cf1c00, +0x1c0c1830e00, +0x8383c78000, +0x6140879c00, +0x6144820e00, +0xe241860f00, +0xe3c0870f00, +0xe041040f00, +0x70a0cf1800, +0x81460000, +0xe0438f0b00, +0x1c181078000, +0xf041861f00, +0xc383078000, +0x6020678f8800, +0x70618f1900, +0x7064cf1e00, +0xc041860f00, +0xc0c3870700, +0x4020408f0c00, +0xe060451e00, +0xe260870c00, +0x6041870a00, +0xc1c10f8000, +0xe1418f8c00, +0xe0438f9c00, +0x20478e8000, +0x6041860c00, +0x4083040700, +0xf120861f00, +0x1e041048f00, +0x6020cf1100, +0xf0638f9900, +0x63c107cc00, +0x8181078700, +0xe043870f00, +0x60418f9800, +0x6041040f00, +0xc2c187cc00, +0xe0408f0c00, +0xe0408f1b00, +0x1c38f8000, +0x60438f0e00, +0x60608f1c00, +0x400f8000, +0x6043cf8c00, +0xe040830e00, +0xe0618f1c00, +0xc0c1060e00, +0xc241868600, +0x2060c1061f00, +0x87ce0000, +0x20e140870f80, +0x41e041078f00, +0x6081061b00, +0xe063899d00, +0x6021ce1800, +0x107060ce1800, +0xe141020f00, +0xe0428f8000, +0x20e0410ccf00, +0x1c083078700, +0xe040cf1c00, +0x1c081878c00, +0x61c1871a00, +0xe141060e00, +0x60438b1800, +0x6040870c00, +0x1e060c78e00, +0xe021861e00, +0xe1c0878d00, +0x2060438f0200, +0x1f0204f1f00, +0xc3c107cc00, +0x6043870e00, +0xe1438b9d00, +0xe060c71e00, +0xe160870e00, +0x61c0870e00, +0x1e042cf8900, +0xe0438f8000, +0x71e18e9700, +0x60c1060e00, +0xc040838c00, +0xe1e3cf8c00, +0xc1c78fdf00, +0xe0608f1900, +0x60c38f1a00, +0xc0c1070300, +0x204081020a00, +0xe0228f9c00, +0xe3604f8e00, +0xe141820e00, +0x106060871800, +0x6040870c00, +0xe041870c00, +0x183c60000, +0xe161c78c00, +0x7161861e00, +0xe140830d00, +0x183058c00, +0xc0c1820f00, +0x61c0879900, +0x1c081028700, +0xc040820f00, +0x6140820e00, +0x418c8000, +0x6141879f00, +0xe0638f1d00, +0x6042870c00, +0xf041049e00, +0x383078100, +0x71e08f9800, +0xe023879c00, +0xe063c98e00, +0x70430f0e00, +0x1c0c0878c00, +0x70e38f1c00, +0x60e1cf9c00, +0x70a3cf1a00, +0xe1438f8100, +0xe0408f9f00, +0x1e340838f00, +0xe043cf8c00, +0x20e043870900, +0x382838000, +0x60e063cf8d00, +0x1c3e0c70e00, +0xf120cf1e00, +0x60e08f1e00, +0x70618e0a00, +0xe0438f9800, +0xe1418f8080, +0x20e083040f00, +0xf023cf1800, +0xc08307c700, +0x206043878800, +0xe141820e00, +0xe0638e9000, +0x40c183078600, +0x183278000, +0x2020c3058e00, +0xe041879b00, +0x40e041079980, +0xf241841f00, +0x202041860e00, +0xe0c1078f00, +0x2067cf0800, +0xc3c1078b00, +0x20e0c1078c00, +0xe0410e1300, +0xe140870e00, +0xe060cf9c00, +0x70a18f1e00, +0x1e041878f00, +0xe363cf8f00, +0x71a04f1e00, +0x71e1873e00, +0x206043860800, +0x7141860e00, +0xe041020f00, +0xe041860e00, +0xe0234b9c00, +0x6063c50c00, +0xe0408f1e00, +0x1c043870f00, +0xe1438f0900, +0x6160870e00, +0xe0c1040f00, +0xe1e38f8100, +0xe0438e0300, +0x71e28f1e00, +0x107067cf9c00, +0x6160871f00, +0x60c78f0100, +0xe041870f00, +0x60c30f8000, +0xc241839c00, +0xe183078f80, +0xf1e1879e00, +0x6041879000, +0x70208e1c00, +0x71e1841e00, +0xe341020f08, +0xc041078c00, +0x2020478f8000, +0x60218f1800, +0x60c18f1a00, +0x4141870e00, +0x71e0cf1e00, +0xc08307ce00, +0x70618f9800, +0xe161870a00, +0x83c60000, +0xc081038600, +0xc0c1870d00, +0x61e3c78c00, +0xe341860e00, +0xe041868e00, +0x6043870800, +0x60618f1200, +0x61c7870200, +0xe140870d00, +0x71e3cf1e00, +0xc28f8000, +0x6041041f00, +0xe040870600, +0x6041870a00, +0x1e040830f00, +0x1e241878f00, +0xe041879f00, +0x1c381078700, +0xe0c1860e08, +0x1c081040700, +0xe140878c00, +0x70c1078c00, +0x3060418f8900, +0x43e0430f00, +0x60438f9800, +0x3e0cf9000, +0x70a68f1800, +0xc041820e00, +0x1e2c1068f00, +0x1c040820f00, +0x20c1cf0000, +0xc241820e00, +0x1c040878c00, +0xe140860a00, +0x818307c000, +0xc0810fc000, +0x1c0478b8d00, +0xe040831c00, +0xc0c0878f00, +0xc047cf8800, +0x1c240878e00, +0x107023cb9c00, +0x1070e78f9900, +0xe0428b0c00, +0x61e08f1a00, +0x6041040e00, +0x4181c20800, +0x3c0878800, +0x6142870500, +0x20e0c30f9e00, +0xe041078b00, +0x4040870c00, +0xe043899c00, +0x1e041820f00, +0x60418f1f00, +0xe081040e00, +0x31e1861c00, +0x404081078400, +0x202041860a00, +0x20e141870f00, +0xe14187d800, +0x3020070000, +0xe0428f0a00, +0x70e7cb9c00, +0x60c0871e00, +0xe0408f0b00, +0x1e04f0800, +0x3c0cf0000, +0x6041861a00, +0x71a28f1c00, +0x61c3cf9800, +0xe041860c00, +0xe140820c08, +0x6040871c00, +0xc140820e00, +0xc3058000, +0x70418f9000, +0x70238f9800, +0xe060cf9800, +0x6040871e00, +0xc14183cc00, +0x70e38f1c00, +0xe041068e00, +0x40c081030700, +0xe040cf8f00, +0x6041878c00, +0x7061841e00, +0x48183278800, +0xe0c1060f00, +0x20e043878900, +0xc0c1878c00, +0x4043870c00, +0x6027cf0900, +0x60e08f9b00, +0xe041060e00, +0x20e0638f8800, +0x2060438f8800, +0xe1c187df00, +0x61c68f0100, +0x6041820e00, +0xc040830e00, +0xc0c1838c00, +0x6041870800, +0xe041040e04, +0xf023cf9900, +0x60418f9900, +0x23e0870900, +0x60c3078800, +0x61c6830e00, +0x1c041858e00, +0x71e0871f00, +0x60274f0000, +0x70c38f1a00, +0x70438f0300, +0x101c60000, +0x1e0c1078f00, +0xe040c70c00, +0x31e1861c00, +0x1c0c1878e00, +0xe0418e8600, +0xc0408f8300, +0xe1c3870d00, +0xe1408e1f00, +0x1e040821f00, +0x1c081038c00, +0x60c1070c00, +0x30638f9800, +0xe041840f00, +0xe0438f9800, +0x30470e0200, +0x2060c38f8800, +0xe1c1878c00, +0x1e3c38fdf80, +0x7140861f00, +0xe0638b9900, +0x7022cf1800, +0x70e18f0200, +0x1e260820f00, +0x70c38f1b00, +0xe061060e00, +0xc3c3c7c000, +0xe166cf8e00, +0xc38f8000, +0x7021850c00, +0xf0208f1c00, +0xe041871e00, +0xf3208f1f00, +0xe041870f00, +0x106043870c00, +0x60438f0e00, +0xc0c1c78c00, +0xe0c1078f00, +0x61a0cf1e00, +0x1c0c0820f00, +0xe060871e00, +0x61e28f1e00, +0x206040870800, +0x71a0cf9c00, +0xc2c1038d00, +0x1c043c78c00, +0xc143878c00, +0x4040830fc800, +0x70678f9c00, +0x10f1c1871f00, +0xe040820c08, +0xe041860f00, +0xc0c1878980, +0xc1c187c000, +0xc04187cc00, +0xe1e3cf8c00, +0x30a3cf1800, +0xe16186cf00, +0x60a3871e00, +0x61c1860a00, +0xe0c18f9f00, +0x1e061860f00, +0x20e3cf9000, +0x83470400, +0xc040830c00, +0xe040870c00, +0xe240878c00, +0xe0438f1700, +0x20e041870a00, +0x87470000, +0xc140879800, +0x2063cf0c00, +0xc1c0c7cc00, +0x40408f8000, +0xc18387c000, +0x60418f8200, +0x102041870000, +0x20e0438b0900, +0x7022cf1e00, +0x10f0238f1d00, +0x70234b1800, +0xc0c1820700, +0xc081058c00, +0x60e0c38fc800, +0x3efc30700, +0x70e18f1100, +0x6041860e00, +0x1c18f8000, +0x1c0cf8800, +0xe061870900, +0x60478f8000, +0xe140870f00, +0x61c1860e00, +0x43c10f8000, +0x21e08f9a00, +0x3c3cf8000, +0xc043878100, +0xe1e0cf9c00, +0x6041060e00, +0xe063cb9800, +0xe340831f00, +0xc040878c00, +0x20e041870c00, +0xf120cf1c04, +0x61c1860a00, +0x30a08f1800, +0xe0438f8800, +0x30e38f9000, +0x60410f1300, +0xc040870c00, +0x61c105ce00, +0x6140830c00, +0x63c4838600, +0x70638f0200, +0x60c1060300, +0x40c081870300, +0xe020860e00, +0x1e1418f8f00, +0xc0c10f8900, +0xf0208e1b00, +0xe3c1028e00, +0x1c78f8100, +0xf1a3cf9900, +0x6043860600, +0xe041020e00, +0xe060c78e00, +0xc240470e00, +0xc1c30fcf00, +0x4040870e00, +0xe041820f00, +0x41c0c307cf00, +0x30e3cf1800, +0x61e08f9800, +0x20c38f1a00, +0x20e040870800, +0x60c0871800, +0xe141040f00, +0x2060608f9800, +0x7020861c00, +0x6042870c00, +0x40c1070800, +0x181078000, +0xc241078f00, +0x60410e0300, +0xe241820f00, +0xe0c08e9f00, +0x30208f1000, +0x70208f1800, +0xe041860f80, +0xc141820f00, +0x71e1cf3b00, +0x7041079200, +0x181078000, +0x1c0418f8100, +0xc081830c00, +0xe1608f9c00, +0x2040c1078800, +0x206041871800, +0xe0c1060f00, +0x20e160478e00, +0xe143cf0e00, +0xe340820f00, +0xe1c38f8f00, +0x6141820c08, +0x6063cf1c00, +0x60408f1800, +0xc3c0820e00, +0x20e043870d00, +0x2060418f9800, +0x60478f0300, +0xe060c78c00, +0xe1c107d800, +0xa040871800, +0x61c1878980, +0xe021cf9c00, +0xe140860a00, +0xe243c99f00, +0x6140870e00, +0xf020cf1800, +0xe041870900, +0x1c041060f00, +0xe041c78c00, +0x2060c30f8a00, +0x60c08f8000, +0x6140830c10, +0x20e0638f0e00, +0xe041871f00, +0x1c0438d8c00, +0xc0c30f8000, +0xc241870300, +0x60c30f0700, +0xc241878800, +0xe040860e00, +0xe040820e00, +0x70a08f9800, +0x6041860e00, +0x8307c000, +0xe040870e00, +0x1060430f8300, +0xe0618e9f00, +0xc3c107c700, +0xe0208f1e00, +0x60428f8800, +0x20e040879e00, +0xe141040f00, +0x6141860e00, +0x6140860c00, +0xe0418f9900, +0x2040860c00, +0x4181278800, +0xe0608f0c00, +0x70278f9800, +0xc0c107cf00, +0xe041870d00, +0xe1638f8000, +0xe041868f00, +0xf0208e1b00, +0xe0e1870e00, +0x60438f9900, +0x61e6830f00, +0x60608f1800, +0xe0438f8c00, +0xe340879c00, +0xe1e04f1900, +0x6041040e00, +0x6026cf1c00, +0x6063cb1c00, +0x61c38f8000, +0xe040c78c00, +0xe0238b9d00, +0x1e020870e00, +0x6141820600, +0x1c040870e00, +0x6041079000, +0x180c0810608, +0xe1c1070e00, +0x3027cf9c00, +0xe0408f8c00, +0x6141860e00, +0x31e08f9000, +0xe0608f9800, +0x1c0c1820708, +0x1c3c0878c00, +0x1c0c18f8000, +0xe0608f9800, +0x4040418f8800, +0x71e0cf9c00, +0xe140878c00, +0xc1c1870a00, +0x20e067cb1f00, +0xe0c1830f00, +0xc041860e00, +0xe141879f00, +0xc041c78e00, +0x40e042c78e00, +0x60438b1a00, +0xc1c187c800, +0x60410f9000, +0xf363841f04, +0x6140830c00, +0x18081030300, +0x6141868800, +0xe040820e00, +0xc381820700, +0xe040871800, +0xc0c1020f00, +0xe041020a00, +0xf061cf9c00, +0x6040870c00, +0xe1408f0c00, +0xe0638d1c00, +0x7040871e00, +0x7021cf1800, +0x1060408e1a00, +0x70638f1a00, +0x6341820f08, +0xc3c1078900, +0x2022cf0000, +0x20e041069f00, +0xe043c91f00, +0xe141068e00, +0x4040871800, +0xf161041f00, +0xe3e0c20c0c, +0x43c1820700, +0x30e18e1400, +0xf060861e00, +0xe041879800, +0xe061cf1c00, +0x60418f1b00, +0xe043870c00, +0xc040870e00, +0x4081078000, +0xe0c1058f00, +0xe0224f1e00, +0xe1c78f8000, +0x60c38f8d00, +0x2061cf1c00, +0xc041878c00, +0x60608f1c00, +0x70638f1000, +0xc0438f8200, +0xe240830f00, +0xe0438fc800, +0xe1c1878c00, +0x1e041870900, +0xc0c1879e00, +0x1c040878600, +0x60418e1e00, +0xe0408f9e00, +0xe060cf1c00, +0x1c281878580, +0xe041879800, +0x7041879800, +0x60c041078b00, +0xc080830c00, +0x1e3e1860f00, +0xe041820e00, +0xe3e0cf9f00, +0x206041070e00, +0x70a08e1c00, +0x40c0438f8580, +0x1030238b1a00, +0xc1c30f8000, +0xc040878c00, +0xf0608f1a00, +0x70e38f0300, +0xc141078000, +0x3c1cf8c00, +0xe3a6c30f00, +0x418107c000, +0x6041860e00, +0x107026cf9e00, +0xe0438f8800, +0x1c0c0878c00, +0xe240870f00, +0x61c3c79800, +0xc0c1870f00, +0x70a28f9800, +0xe043ce8800, +0xe3c1820f04, +0x7023cf1800, +0xe0430f8000, +0x6040871a00, +0x206041871f00, +0xf041861e00, +0xe061861f00, +0x20e0618f9c00, +0xe061840e00, +0x70a18f9900, +0xe040878c00, +0x1e023cf8c00, +0xe0438b9a00, +0x20e040831e00, +0xc0c18f9900, +0x8381e78000, +0x2027cf0000, +0x70e38f1b00, +0x418304ce00, +0xe1608f1f00, +0xc0c0820e00, +0xe0608e1b00, +0x4081078800, +0xf0618f9800, +0x60c08f8200, +0x6066cb1c00, +0x1e2628f9900, +0xe0418f0e00, +0x1e062cf9d00, +0xe040870c00, +0x71a08b1800, +0xc081020700, +0x70418f1000, +0xe0c3040f00, +0x808107c000, +0xe260879800, +0xf320cf1900, +0xe240871900, +0x1060408f1e00, +0x1c2c1078700, +0x40c10f8300, +0x1810f8000, +0xe023cf9800, +0x61c38f9c00, +0xc240830d00, +0xe040820e00, +0x204040870a00, +0xe060878c00, +0xe041c70c00, +0x43c1020700, +0x61478f8300, +0x60c1870e00, +0x61608f9900, +0x3027cf1000, +0xe060821e00, +0xe344831f00, +0xe0c1060f00, +0xe0c1070f00, +0xe041820c00, +0x1e0c3078f00, +0x181038000, +0x381078000, +0x60c38f1800, +0xe140871e00, +0x1c1c1878c00, +0x1c081020700, +0x2060264f8e00, +0x1c0c1060f00, +0xc0c1070b00, +0xe241c50e00, +0x2041860800, +0xe0c1870e00, +0x81c3c50c00, +0xe060871900, diff --git a/samples/digitrec/digitrec/data/training_set_3.dat b/samples/digitrec/digitrec/data/training_set_3.dat new file mode 100644 index 000000000..de11a687c --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_3.dat @@ -0,0 +1,1800 @@ +0xe041010e00, +0xc083810e00, +0xe225811e08, +0x6041810c00, +0x1e0c3808e08, +0x6343c08e10, +0x1c083808e00, +0xe0c1010e00, +0x61c3870e30, +0xe063899e00, +0x1e0c3891e00, +0xf0e3c11e00, +0xe043010610, +0x4041010e00, +0x1031e18d1e00, +0xf1e3871e10, +0xe081030c00, +0xe0c1c89e00, +0x7063809c00, +0x6061831c00, +0xc345808704, +0xe043810e08, +0x6043831c00, +0xe1c3c19e10, +0xe041030c00, +0x7023811c00, +0xe041830e00, +0x1e243858f00, +0x1c041010e00, +0xe0c1010e08, +0xe041810e00, +0xe1c3819e10, +0xc1e3819c00, +0x1e3e3c09f18, +0x8381030102, +0xc043030e00, +0xc283808f00, +0xc081810e00, +0x1c041810e08, +0x6041891c00, +0x20c1990c00, +0xe043810e10, +0xe38f0000, +0xe081818e00, +0xe0c3811e00, +0x6141891e00, +0x60c1811c00, +0x61c1818e08, +0xe0c18d8e00, +0x1e0c3c19e00, +0xc081810e00, +0x70e1821c30, +0xc0c1810e10, +0xc081810c00, +0x1e1c24ccf00, +0x18183808700, +0xe0e3891e00, +0xe0c3810c00, +0x1c3030204, +0xe341c89e00, +0x6041010c10, +0xe041811e00, +0x4181810c00, +0x1c383038704, +0xe043811e00, +0xc0c0c50e00, +0xc3c1838700, +0xe0c3850e00, +0x6041821e00, +0x70c1011e00, +0x1e1c3809f00, +0x61c1858e00, +0xc0c1020608, +0x1c041810e08, +0x40c3010608, +0x70e3870c10, +0x1c081011e00, +0xe0c3810e00, +0x60c1031c00, +0xe1e18b1e10, +0x6043030c00, +0x60c3810e00, +0xc0c1030410, +0x60e3811e00, +0xe0c3c88e00, +0xe0c1cf8f00, +0xc041010400, +0xe043810208, +0xe1c1810600, +0x1c043810e00, +0xe043810e08, +0x1c0c3818e00, +0x1e0c3818e08, +0x1e3c3c38f08, +0xe063810e00, +0x41c1860204, +0x61c3891e00, +0x1e0c7c48f08, +0xc08381020c, +0x1e3cf8000, +0xe0c3808f00, +0xe041891e00, +0x6041831c00, +0xf061811e00, +0xe0c3810e00, +0xf161020630, +0xe0c1810e00, +0xf1e3891e00, +0x70e1831c00, +0x6041010c00, +0x41e3890c00, +0xe0c3819e00, +0x70c3833c00, +0x61e3891c10, +0xf0c1cf1e00, +0x1e041819e10, +0x6043031e00, +0x7167811e10, +0xe0e3c19e08, +0xe043010e00, +0x1e3c3c08f00, +0xc043810400, +0xc0c3c19e00, +0xf1e3811e18, +0xe1c3850218, +0xe3c3818f08, +0x40c0810c00, +0xe0c3819e00, +0xc0c3818e08, +0xc043c89e00, +0x73e3cd9e10, +0x20c183ce8e00, +0x8083809300, +0x6041810c00, +0xe041c18e00, +0xe043010c00, +0xe141011e00, +0xc081810600, +0xe1c08d0e00, +0xc0c1818600, +0x1e243808f00, +0x1e0c3018f00, +0xe1c3811e00, +0x4081010400, +0x1c083810608, +0x2060c3898e00, +0x6061811c00, +0x1e041810e08, +0xe3c3c08f00, +0x1e0c3c08f08, +0xc043810e10, +0x70e3811c00, +0x1e343818f08, +0x60c3030600, +0x6041030c00, +0xc283808708, +0x7063021c20, +0x41c1030600, +0x6043890e00, +0xc041830610, +0xe0418d0e00, +0x71e1830c10, +0xe0c7818e00, +0x8181810208, +0xe1c1810e08, +0x4341c10600, +0xe041810e00, +0xc083818e08, +0xe0c1819e00, +0xe043819e00, +0x60c3811e00, +0x1e1c3818e08, +0x6043811e10, +0xc341818e20, +0x1e063811e08, +0xf041899e10, +0xc181818600, +0x4081810c00, +0xe1c3811e00, +0x61c3811e00, +0xe0c1810e00, +0xc041810e00, +0x71a18f1c10, +0xc1c3010608, +0x1e2c3818f00, +0xc083810e00, +0xe081810e08, +0xc1c1810e08, +0xe0c1810e00, +0x1c383c08700, +0x7043811c00, +0xc0c1030608, +0xc1c0409e00, +0xe043808a08, +0x7161831c00, +0x61c3811e10, +0xe241808e00, +0xe1e3899e00, +0xe043830e00, +0x70e3831c10, +0xc38380870c, +0x61c3c78e00, +0xe0c3810e00, +0xe2418f8e00, +0x18383818704, +0x4081020c00, +0x3043831c00, +0x1e043818f08, +0x7063831c00, +0xe143830600, +0xc2c0489e00, +0xc1c3810e00, +0x6043811c00, +0xc081010c00, +0x60c1810c00, +0x101c1810e00, +0xe0c1810c00, +0x60c1811e00, +0xe0c1890e00, +0x1c04381933c, +0x1c3c3c18f00, +0x60c3030c00, +0xe0c3830c00, +0x61c3859e00, +0xf063871c00, +0x61c3830c00, +0xe041818a00, +0xe043810e18, +0x2020c08f9c00, +0xf3c3819f10, +0x20e18b1800, +0xe041010e08, +0xc041810e08, +0xf3e3839e10, +0xe0c3811e10, +0x1e041878600, +0x1c0c3818f00, +0x4181810c00, +0x71e1851610, +0xe0c3858e08, +0xe0c3809f00, +0x1e221808f18, +0x6041810c00, +0x1e1c3c09f18, +0xe041811e00, +0xc081810600, +0xe0e3831e00, +0x1c183c68700, +0xe1c3870e00, +0x1e1c3c18f00, +0x6043810c00, +0x6143810610, +0xc2c3c88e00, +0xe1c3870e08, +0x70c3cf1c00, +0x30c38f1c00, +0x60c2811c00, +0xc081810e00, +0xc0c0810e00, +0x6041811c00, +0xc081810e00, +0x1e0c38d1e00, +0x1c041808e00, +0xc043070608, +0xe0c3810610, +0xe1c3030e18, +0x30e38b1c00, +0xc383808710, +0x70e1cf9e00, +0x20c1830c00, +0xe0c3c08e00, +0x1c0c3808e00, +0xe0c3010e00, +0xe1e3831c00, +0x4041020c00, +0x31e3810c20, +0x60c3010e10, +0xe1c3810e08, +0xf1e3c49e10, +0xc181810c00, +0xe041811e00, +0xc0c1810410, +0xc081810e00, +0x383878600, +0x60c3830c00, +0x60c3811c00, +0xe0c1810e00, +0xc083818608, +0x6080890c00, +0x1c181010200, +0x70c3811c20, +0xe3e38f0304, +0x31e3cb1c00, +0x6043010a08, +0xe1c3830e00, +0x7041811c00, +0xe041811e00, +0x6041030c00, +0x1c087c08f00, +0xc183808e00, +0x70c1833c00, +0x40c1810c00, +0xe043810e08, +0xe0c3870e00, +0xe343888f08, +0xc181808200, +0xe2c3809e00, +0xc0c3c08f00, +0xe1c3810e10, +0x3c3018300, +0x1c043818b04, +0x60c1830e10, +0x1c3c3c08f00, +0x1e1c3c48f08, +0xe101808e00, +0xc081010e00, +0xf061811c00, +0xc087808318, +0xe043810e08, +0x1e0c3858e00, +0xe0c3811a18, +0xc1c1850e00, +0x30e3831c00, +0x1c083808108, +0xc183818e00, +0xe0c3c89f00, +0xe3c3810e18, +0xe041810e00, +0xe0c3811e00, +0xf0e1811e00, +0xe041010410, +0x61c3811c00, +0xe0c2030e10, +0x7041811410, +0x1c18181860c, +0x71e3cd9e00, +0xc0c3030e00, +0xe041030e08, +0x6041890e00, +0xe0c3818e00, +0xe0c18d0e00, +0xc0c3810e00, +0xf163c19e00, +0x81c1c50608, +0xc0c1010c00, +0x1c083038300, +0xf0c1011e00, +0x1e1c3c19e10, +0xe141818e00, +0xc0e1030c20, +0x70c3831c00, +0xe143810e00, +0xc0c3408e00, +0x61c1020608, +0x60c1831c00, +0xe043810e00, +0xc087888f00, +0xc1c3838e00, +0x61e3810c10, +0x1e0c3808e00, +0xc083808e00, +0x6083810400, +0xe061831c10, +0xf1c3c19e00, +0xe041811e00, +0x40c2810e00, +0xc0c1830e00, +0x70c3813e00, +0xe1c3c09e10, +0x6043811e00, +0x70c1871e00, +0x60c3811c00, +0xe0c1010e08, +0xe1c1df8400, +0xe063811c00, +0x1e043818e10, +0xe0e3030c30, +0xe0c0c18e00, +0xe061819e00, +0x60c1811c00, +0xe3e1811e10, +0xe043810e10, +0x60c1810c00, +0x30e1891c00, +0xe0c1850e00, +0xc243c68f00, +0x60c3811e10, +0x2060c08d0e00, +0xe0c3818e00, +0xf0c1819e00, +0x6103810208, +0xe043891e00, +0x60c1811c10, +0x4003810400, +0xc083818600, +0xc0c1010600, +0xf1e1831e10, +0x1c3c3808318, +0xc0c1810e00, +0x1c083808e00, +0x60c3811c10, +0x1e3c3cc8f00, +0xc0c3c19e00, +0x61c1830410, +0xe0c389be00, +0x60c3811c00, +0xe2c3038f00, +0xe0e3c99e00, +0xe3e1c98f00, +0x60c1870c00, +0x1e2c7818f00, +0xe0c3870e00, +0x21c3850418, +0x1e0c7818f00, +0x1c083818e08, +0xc043810e00, +0x1c081810608, +0xe043809f00, +0xc243838600, +0xc0c3808e08, +0x6043031c00, +0xe3e3830e18, +0xc081808e00, +0x60c3810e00, +0x1e043809f08, +0xe143020a08, +0xe041810c00, +0x1c043809f00, +0xc081010600, +0xe0c3830e00, +0xc1c3030608, +0xe0c1810c00, +0xe0c1818e00, +0x2061c38d9e00, +0x6063811c10, +0x4181810e00, +0xf063811e00, +0xe141830400, +0x60c3810c00, +0xe0c1810e00, +0x60e1c3cf9f00, +0xc180810e00, +0xf0e3c99e00, +0x60c1831c00, +0x83c3808f00, +0x6141810e10, +0xe141819218, +0x60c1010c00, +0x6141811e08, +0x6041811e00, +0x61c3858e08, +0xe1c3818e00, +0xc083810e00, +0x4043010e00, +0x6080030c00, +0xc043010e00, +0x1e043830f00, +0x60c3830c00, +0xc0c3810e00, +0xc183810400, +0x60c1890e00, +0x60c3850410, +0x6041811c00, +0xe141070600, +0xc3c1808208, +0x60c1991c00, +0x20c18b1c00, +0xf0c3cd9e00, +0xe043020610, +0xe1c1890e00, +0xe043810e00, +0x1c081808e00, +0x4081810600, +0xe04181be00, +0x61c3811e18, +0x2060c18f0e00, +0x1c043808e10, +0x30e1811630, +0xe0c3808e00, +0xe180850e00, +0x1c181038608, +0xf0e1831e00, +0xe043810e10, +0xe041811c00, +0x61e1df1e00, +0xe063d19e00, +0xe0e38f9e00, +0x1e0c3808218, +0xe081810e00, +0xe043810e00, +0xc043010f08, +0xc082810e00, +0x1c083c48f00, +0xc081808218, +0xc183010608, +0xe143810e10, +0xe0c3810c00, +0xe3c3018218, +0xe0c0470c00, +0xe0c1010e10, +0xf061c99e00, +0xe1871800, +0x61c3861110, +0xe0c1811e00, +0xc043810208, +0xe0c1850e00, +0xe0c1030e08, +0xc183818600, +0x61c1811600, +0x30e48f0e00, +0x6041810c00, +0xc0c3010e00, +0xc183818708, +0xe1c7808f00, +0x61c1811e00, +0x4041030c00, +0x2182810e00, +0xe0c3c18c00, +0xe043808e00, +0xc243808600, +0x70c1811e00, +0x21e1c91e00, +0xc1c1830e00, +0x60c3850c00, +0xc081830c00, +0xc083818e00, +0xe0c3810e00, +0x60c1870c00, +0xc081810e00, +0x71c1831c00, +0xe041811c10, +0xe041811e00, +0xc081858e00, +0xe0c3811c00, +0xc3c3838708, +0x60c1811c00, +0x6041810c00, +0xe041891e00, +0x30e1851c10, +0xe143818e08, +0x1c183808e00, +0x1c183808700, +0x61c3810c00, +0xf361891f18, +0xc0c3870c00, +0x60c1831c00, +0x1c2c3c18e00, +0xf1618f1c00, +0x60c3811e00, +0xe1e1891e00, +0xc081810e00, +0xc0c3010600, +0xf3c3c08e00, +0x70a1821c00, +0xf063809e00, +0x70c1890c00, +0xc081808e00, +0x1c0c18d0e00, +0xc0c3830200, +0x2060c0870e00, +0xe0c1810e08, +0xe0c1899e00, +0xc043010600, +0xe341c78e00, +0xe0c3811e00, +0x6061831e00, +0x6041031c10, +0xc083810e00, +0x1c081010208, +0x6043c19c00, +0xe043808e00, +0xc040810a00, +0x1c083c08600, +0x63e1810630, +0xe0c1811e00, +0xf063811e00, +0x1e041809e00, +0xe141830e00, +0x1e3c3c08f00, +0xc1c3830e00, +0xe0c1810e00, +0x61c3811e00, +0x1070c0cb1c00, +0x4081810400, +0x6041811c00, +0xf061811c00, +0x60c3890e00, +0x21c0c99e00, +0x1c0c3c18e00, +0x6041020c00, +0x2060c3871c00, +0xe0c3010e00, +0x1c187808700, +0x60c0cb1c00, +0x6043811c00, +0x4182818e00, +0xc081818608, +0xe043811e10, +0xe1e1830610, +0xe1c3859e08, +0x18081808104, +0xc0c3810e08, +0xa082818e00, +0x40c1010400, +0x7041821800, +0xe043811c00, +0xe383818e00, +0xe043810e00, +0x6043811e00, +0xc041810e00, +0x73e0810e30, +0xc3c3828700, +0x4083010e00, +0x6143850600, +0xc083810e00, +0xe0c1811c00, +0xe0c0811e00, +0xe041830c00, +0xe0c3010e00, +0x101e1839e00, +0x7041831c00, +0x1c081808f08, +0xe0c3819e10, +0xe0c7818f08, +0xe0c3850e00, +0xe041811e00, +0xc041020600, +0x61c3810c10, +0x61c1850218, +0xe2c0810e00, +0xc383818e00, +0x7063831c00, +0xe0c1021c00, +0xc081848e00, +0x6181810e10, +0x41c1810600, +0xc0c1810610, +0xe1c3c18e10, +0xe0c38f8e00, +0x43c1030608, +0x70e3871c00, +0x21c3811c00, +0x1e1c3878e00, +0x101c1030100, +0x4181010600, +0x1c3c1030304, +0xc081010e00, +0x60c1010208, +0x6043811c00, +0x60c1811c00, +0xe1c1830400, +0x60c3810e00, +0x7043831c00, +0x61e3811c10, +0x1c083808700, +0x6061999e00, +0xe181010600, +0xe0c3810e00, +0xc1c3830600, +0xc2c1818e08, +0x7061cd1c00, +0xc043811e00, +0x61c3810410, +0xe063891c00, +0x1e1c1c89e00, +0xe043819e00, +0xc3c1810e00, +0xc0c1830400, +0x2060c3870e00, +0xe1c3811e00, +0xe1c3870608, +0xc0c3858e00, +0x10e5cf0c00, +0xe083889e00, +0xe041810e00, +0x60c1871c00, +0xe0c3810e00, +0x7040cd8e00, +0xc182810e00, +0xe343808f04, +0x1e043808e08, +0x50e3c99c00, +0xf0238f1e10, +0x2060c0850e00, +0xc043810e00, +0xe0c3c19e00, +0xe0c3030e00, +0x183c0838300, +0x60c3810c00, +0xc081830c00, +0x60c3811e00, +0xc081010200, +0xc380c18e00, +0x6361819e00, +0x4143010100, +0xe3c1c10e18, +0x18383818f00, +0x60c1011e00, +0x63c3818e00, +0xf0e3c99e10, +0xc041010c00, +0x1c181810e00, +0x206081070c00, +0xe243810e08, +0x6041020400, +0x1c1830204, +0xe041020c00, +0xc3c3878618, +0xe1c3818f08, +0x1c0c1878e00, +0x1e1e3819e00, +0x60c3060e00, +0x7141811e00, +0x1e0c1819e00, +0x7063831c00, +0x60c3419e00, +0xe243819f00, +0xf1e3c99e00, +0xe2c1848600, +0xc1c103020c, +0x1e2c1818f08, +0x40e0c71c00, +0xc3c1850204, +0xc081010300, +0xe363811e00, +0x60c3810e00, +0x61e3831e10, +0x60c1811c00, +0x63c3c08318, +0x60c1011e00, +0xc081810600, +0xe1c3811e00, +0xe041811c00, +0xe1c1c99f00, +0xe0c1810e00, +0x6141811e00, +0x1c083818700, +0xe1c3810e00, +0xf041811e10, +0x1e0e1c99e00, +0xe0c3819e10, +0xc083010e00, +0xe0c1811c30, +0x70408d0c00, +0x60c1811c00, +0xe043810600, +0xe041811c00, +0xe0c3818e00, +0xe041810e00, +0x60c0831c10, +0x61c3891c00, +0x60c3811c00, +0xe0c1819e00, +0x7161011c00, +0xe043830e18, +0x1c287808708, +0xe1c1818e00, +0xc083810600, +0xc083808e00, +0x61c1030600, +0x6041811c00, +0x6041830c00, +0xf0e18b1c10, +0x1c043810e08, +0xe3c3858608, +0x6081810e00, +0xe0c1830c00, +0xe0c3030700, +0x1e3c3c48f00, +0xe163871e00, +0x61c0cf0e00, +0x41c3820408, +0x1f1c3c19e00, +0x2060c1830c00, +0xc1c1870204, +0xc0c1810e00, +0xc041010600, +0xe043811e00, +0xe1c3810410, +0xe1c1819e00, +0xe043850e00, +0x18383838700, +0x4083810e00, +0x1c180808e00, +0x60c1830c00, +0xc343858718, +0xc083810600, +0xc1c1810e00, +0xe041810e00, +0xe041811e08, +0xc0c1830c00, +0xe1c2810e00, +0x1e0c3838e00, +0x71e3899e00, +0x30c1891c00, +0x1c283c08700, +0xe1c3879e00, +0x4081000400, +0xe0c3c18e00, +0xe083810e00, +0xc1c1811e00, +0xe0c1810e00, +0xe0c1818e00, +0x21c3810c10, +0xe0c3810e00, +0x31e3079e00, +0x2043010e00, +0xe1c3c99e18, +0x18081810200, +0xe0c3010e08, +0x1c0c3838e00, +0xe143813c20, +0x6041820400, +0xe1e3c99e00, +0xe1c3870e00, +0xc243808f08, +0x60e1851c00, +0xe041810218, +0x61c1831c00, +0x1e043819338, +0x1e0c1c09e00, +0xe083819e00, +0x1c0c3818704, +0x6043c1be00, +0x7063831c00, +0xc0c3010600, +0x60c1030e00, +0x60c7c19e00, +0xe1c3818f00, +0xe0c3030e00, +0x1e0c3c18e10, +0xe3c7818f04, +0x1e1e3c11e10, +0x1e041810d04, +0xe0c7870e00, +0xe0c3889e00, +0xc0c3870c10, +0xc081810400, +0xc043810400, +0x61c1848e08, +0xe0c3850e00, +0xe0c3810e10, +0xe041030e00, +0xf1e38f9e08, +0x40c1810e00, +0xe043811c00, +0x6081011c00, +0xe0c3811e00, +0x60c34d9e00, +0x60c1891e00, +0xe0c3cd9e00, +0xe081850e00, +0xc0c3810600, +0x63618e0608, +0xe0c3819e00, +0xe343858e00, +0xe063819e00, +0xe1c1c58e00, +0xe0c3818e00, +0xc043030600, +0x60c1811e00, +0xe1c3c58e00, +0x7061890e00, +0x61c1020e18, +0x3e3870410, +0x61c3011e10, +0x1c041810e00, +0x4081810e00, +0x71c3810c00, +0xe061851e10, +0xc083810600, +0x1c0c1818f00, +0x61c1810e00, +0x6080810c00, +0xe0c3811e00, +0x1c083808600, +0xc081010600, +0xe080811c00, +0xe0c3818e00, +0xe0c3819e10, +0x1e1c3810e08, +0xe043010e00, +0x61c1890e00, +0xe0418f1c00, +0x6041030c00, +0xe081c19e00, +0x1e2c3899f00, +0xe043819e00, +0x60c1891e00, +0x1c081818600, +0xe041818e10, +0xe0c7808e00, +0x1c1c0c88f00, +0xc0c3870e00, +0xe3c1c19e10, +0xe1c3858e00, +0x1f0c3809e00, +0xf063811e00, +0xe0c1010e00, +0xe0c1819e00, +0xe1c1010e08, +0xe081038f00, +0xc043810e10, +0xc3c3c08600, +0xe0c3818e00, +0xe1c3030e00, +0xe083810e00, +0xc0c0850e08, +0xe0c3819e00, +0x1e1c7c08700, +0x1c043808700, +0x6041810600, +0xe143030410, +0xe1c3850e00, +0xe0c3810e00, +0x61c3c58e00, +0x6061831c20, +0x206041891e00, +0x1c081848600, +0x181e7c78e10, +0x6041831c00, +0x1c0c3818e08, +0x18080838600, +0x7043060e00, +0xc141030608, +0x7063891e00, +0xc081810a00, +0xe243858618, +0xc041810e00, +0xe0c1818e00, +0x30c0891c00, +0xc0c1810618, +0xe041810e00, +0x7041011c20, +0xc041010e00, +0xc1c3858e00, +0xe061811c20, +0xf061cc9e00, +0xc1c3830e00, +0xc0c3810e00, +0x20e0e14f9e00, +0x1c283c08f00, +0xc1c2810600, +0xe0c3810e08, +0xc083818e00, +0x1e0c3809e08, +0xc1c1070608, +0x1c0c3808b00, +0x7040911c00, +0x6043011c00, +0x7041891c00, +0xe043811e10, +0xc0c7818e00, +0xe0e1811e10, +0x60c1810c00, +0xe1c1850e00, +0xe041819e08, +0x70e38f1c00, +0x30e3811e00, +0xe0c1870e00, +0x6041810610, +0xc043010e00, +0xc081810e00, +0xc081810e00, +0xf0c3811e10, +0x20e0618b9e00, +0x63c1838e00, +0xc081898e00, +0x4183810600, +0x1e043818e00, +0x61e3031c00, +0x60c38d0e00, +0xc381808600, +0xe0c3809e00, +0xe0c3c08e00, +0xe0c3830c10, +0xe1c38f9e00, +0xc081810600, +0x60e1811c00, +0xe0c3818e00, +0x70618f0e00, +0x6061831c00, +0xe1c0811c10, +0xc0c3818e08, +0xc0c1030608, +0x18383808708, +0xe347810708, +0xe0c3858e00, +0x73e3810e18, +0xe0c0810e00, +0x70e1819e00, +0xf021811e00, +0xe143811e00, +0x71e1820410, +0x6081010400, +0x40c1010e00, +0xe0c18b1c00, +0xc1c1808e08, +0xe1c3870e00, +0xe0c3819b00, +0xc041810a08, +0xe0c3819e00, +0xc083010600, +0x6043010e00, +0xe141818e08, +0xe0c1010e00, +0x60c1011e00, +0x60c0890e00, +0xe161811e00, +0x4080810c00, +0xe043810e00, +0xc0c3030e10, +0x70618f1c00, +0xe041891e00, +0xe0c3811e10, +0xe0c3810e00, +0xc081010208, +0xe041850e00, +0x60c3891c00, +0xe0c3819e00, +0xe041810e08, +0xe1c3c18e00, +0xc0e3810e00, +0x6041810c00, +0xe043811e00, +0x60c1830c00, +0xc0c3818e00, +0x1e0418f0204, +0xe0c0409e08, +0x70c3871c00, +0xc1c0408e08, +0x61c3811e00, +0xe081010e00, +0x1c0c3830700, +0x1c083c08600, +0xc081810e00, +0xe0c3c19e00, +0xe1e3c99e00, +0xe1c3818f08, +0xe1c1811e00, +0x60c1890e00, +0xc041030e00, +0x18381c28300, +0xe1c1810e08, +0xf161851c00, +0x70e18e1c00, +0x10183458400, +0x6043811c00, +0xc141030618, +0xf0c3811e00, +0x206083870e00, +0x1e043819f00, +0xc0c3808e00, +0xe1c3cf8e00, +0xc080800e00, +0x1e043818e08, +0xe043811e10, +0xe041810e00, +0x63c3c19e10, +0x4081810600, +0xe0c1810e00, +0x4041030400, +0xe0c3810c00, +0xc081810e00, +0x4081830c00, +0xe161cf9e00, +0x61c4820608, +0x60c1810e00, +0x381038200, +0xf0e1891e00, +0xe041811e00, +0x61c3c59e00, +0xc0c3010600, +0xe3e0818e18, +0x6141831c00, +0x1e083818f00, +0xc0c3810c00, +0x1c083808f00, +0xe043808e00, +0x6041821c00, +0xc043810e00, +0x6063811c00, +0x6141810e00, +0x6043870e00, +0xe341858f00, +0x4083010e00, +0x63c3878608, +0xc0c1030600, +0x1c3c3818608, +0x6041891c00, +0xe1e3851e00, +0xe083810c00, +0xf1e3899e00, +0xe041820c00, +0xe0c3810c00, +0x8181830200, +0x1c0c3808f08, +0xf0c3011e00, +0xe043818e00, +0xe1c3811e10, +0xe0c3858238, +0x70618b1c00, +0x4183c18e00, +0xc1c3858e08, +0xc183818e00, +0x1e2c1818f08, +0xe041811e10, +0x60c1890e00, +0x71e3850e00, +0x1e3c3830308, +0x1c083818e00, +0xe120870c00, +0x60c3811e00, +0x4141830e08, +0xe0c1899e00, +0xe080810e00, +0x1c3c3c78700, +0x6041010010, +0x6041010c00, +0x6041810c00, +0x1e1c0c89e00, +0x1c043809f00, +0x20e0c1c78e00, +0x1e081010e08, +0x6081010c00, +0x8383808600, +0xc083808e10, +0xe043810e00, +0x60c1810c00, +0x6080810c00, +0xe0c3010e00, +0xe0c1811e00, +0xe041818e00, +0xe041811e10, +0x4081810c00, +0xc1c1020208, +0xf0e1811e10, +0x30e18f1800, +0x60c3811c00, +0xe0c1831c00, +0xc081010200, +0xe081810e00, +0xc0c3810600, +0xe0c3870c00, +0x60c3830c00, +0x6043819e00, +0x6041050c00, +0x70c38f1c00, +0xe041810e10, +0x61c3870c00, +0xe0c3810e00, +0xe0c7c09e00, +0x1e043870e08, +0xe083810e00, +0xe1e1819f10, +0xc1c1030408, +0xc181010600, +0x1e043c08f00, +0xc1c3808e08, +0x1c2c3c08700, +0x2061c3cf9c00, +0xc1c1c70200, +0xf041c99c00, +0xe0c3818e00, +0xc183810608, +0x1c0c3418e00, +0x60e7819c00, +0x30e38f1e00, +0x1e0c1810e10, +0x60c18b1c00, +0xc0c3810e10, +0xf1e3891e10, +0xe0c1010e00, +0xe043819e00, +0x6181810410, +0xe341810e00, +0x6041810c00, +0xe163831e00, +0x1c0c3c40e08, +0xf361871c10, +0x1e041891e00, +0x1e1c3c08f00, +0x60c3030410, +0x7061831c00, +0xe1c1830e00, +0xe0c3810e00, +0x1030c3871c00, +0xf061811c00, +0xe0c3819e00, +0xc3c1810608, +0xe1c1c19e00, +0x70e3831c00, +0x60c1821c00, +0x60c1830c00, +0x1c041810e00, +0x60e1c90e00, +0x6083810e00, +0xe3c3810e00, +0xe041831e10, +0xc0c1030e00, +0xc183810e00, +0xe283c08f08, +0x7041891c00, +0x1e0c1810e10, +0x4081810c00, +0xe243888e00, +0x1e0c3818e08, +0xe0c3810e00, +0x3c3808f00, +0xe0c1891e00, +0xe1c3818e08, +0x60c1030e08, +0x1c081010e00, +0xf1c3871c00, +0xc083810c00, +0xc0c3850e00, +0xe043810e00, +0x7043811c00, +0x60c2810c00, +0xf0c3c49e00, +0x1e061899e00, +0xc081010e00, +0xc083808e00, +0xe043811e08, +0xe1c1830c10, +0x40c3010208, +0x3040891c00, +0xe0c1818e00, +0x6001850c00, +0x61c1811c10, +0xc141830600, +0x61c1820c00, +0x6141811e00, +0xc1c1830e00, +0xe341819e18, +0x61c3831c00, +0xe1c24c8e00, +0x3e3831c00, +0xe1e3819e00, +0xe0c1810e00, +0xe043819e00, +0xf1c3811e10, +0xe161821c10, +0xf3e3879e08, +0xe281858e00, +0xe1c38d8e00, +0x30e3990e00, +0x1c0c3cc8e00, +0x1c1c1878f00, +0x1e083038e00, +0xe163811e10, +0xe241018f08, +0xc181818e00, +0xe1c3810e00, +0xe0c3038c00, +0xe0c7818e00, +0xe181c78e00, +0x60c1811c00, +0xe243010a08, +0x4041020400, +0x7021911c00, +0xe1c3830e08, +0x1e081811e00, +0x6081810c00, +0x31c1811c00, +0x21e18b1c00, +0x1c0c3810e00, +0x30e18f1c00, +0xe0418b1e00, +0x1c041810208, +0xe061809e00, +0xc3c1870208, +0x60c3813c00, +0x1c3c3848f00, +0x7063811c00, +0xe0c3810c00, +0xe041811e00, +0xc043818e08, +0xc043010200, +0xc081810e00, +0xf0c3011e00, +0x61c3818e00, +0xc0c1810e00, +0xe043810e00, +0x60c0811c00, +0xe241810e00, +0x30278f0400, +0xe0c2808e00, +0x4181810600, +0xe1e1860608, +0xe3c3c19e10, +0x1c2c3808f00, +0xc0c3810e00, +0xf143819e18, +0x70c3831c00, +0xc180818600, +0x60c1830c00, +0xe0c1810e00, +0xe041858e00, +0x1c381838f00, +0x1e0c3810e08, +0xc083818f00, +0x60c1830c00, +0xc0c1030e00, +0x6041811c00, +0xe0c1850e00, +0xe0c3599e00, +0x1e1c0cccf00, +0x60c1810c00, +0x7021819e00, +0xc041030e00, +0xc043830e00, +0xc081810604, +0x70c1911c00, +0xc0c1830c00, +0x43c1838108, +0x61e3830e08, +0xe1c3c89e00, +0x1f3e3839f00, +0x4043810e00, +0x60c1811c00, +0xe043818e00, +0xe141810e10, +0x60c3811e00, +0xf0e1c89e00, +0xf0c3811e00, +0xc081810e00, +0xe0c1030e08, +0x206081811c00, +0xc081808e00, +0xc043870410, +0xe043810e00, +0x6081810e00, +0x60e3031200, +0xe143858e08, +0x8101810400, +0xe0c1830e00, +0x70e1891e00, +0x40c1830c00, +0x70c1891c00, +0xe0c1818e00, +0xe1c3818f08, +0x6061830c00, +0xe1c3838e00, +0x1e063811e10, +0x60c3c78e00, +0xc043810e00, +0xe183808e00, +0xe043810c10, +0x60c1811c10, +0xe1c1030618, +0xe0c1810e00, +0xf0e7819f00, +0x63c103020c, +0x60c1811c00, +0x1e261c1be00, +0xf1c3879f00, +0x7041030e00, +0xc0c3cf8e00, +0x20c0891c00, +0x60c3811c00, +0xc083818e00, +0xc0c3c78e00, +0x1c0c7808300, +0x1c1c3c58e00, +0x60c3858e00, +0xc081010e08, +0x6063811e00, +0x1c3c383870c, +0x61c3c10610, +0xe043810e00, +0x60c0810e00, +0xe341819f18, +0x1e041070e08, +0x70c0d19e00, +0x6041810e00, +0xe1c3811e00, +0xe0c3819e00, +0xe0c3819e00, +0xe3c1810e00, +0x60c1020c00, +0x60e48d0e00, +0x60c1819e00, +0x7323811e08, +0x1e2c3c18f08, +0xe0c0810e10, +0x1e181818f08, +0x1c1c3c08e00, +0xc081810e00, +0xe283c00700, +0xe0c3810e00, +0x1c083808700, +0x1e3e1819f08, +0xc043808e10, +0xe3c1830e08, +0x4181830400, +0x60c3810e00, +0xc041010e00, +0x1c1c3c78e00, +0xe083810e00, +0xe0c3810608, +0xf3c1809718, +0xc083810608, +0xe0c3cf1c00, +0xc2c1030708, +0xe3c3c08e18, +0xe0c3818e00, +0xe0c1818e00, +0xe0c3879e00, +0x70e38f1c00, +0x61c1830e00, +0x70e3831c00, +0xe183808e00, +0xc081818e00, +0x18183818700, +0x30e3891c00, +0x1e0c3888f00, +0xe043078e00, +0xe043890e00, +0x6043820408, +0x4181010600, +0xe0c1811e00, +0xc083010e00, +0x40c1020400, +0xe1c1870c00, +0xe141810e00, +0x1c083818200, +0x70e3811c10, +0xe0c1030600, +0x20e1821c00, +0x70e1811c00, +0xc081818e00, +0xe1c1810e00, +0x1c0c3c08e00, +0xe161811e00, +0x31e1831c00, +0xe1c1891e00, +0xe0c3011e00, +0xe081850600, +0x81810400, +0xf3e19f9e00, +0xe0c1810c00, +0xc283810608, +0xc081810c00, +0x60c3810e00, +0xc081010600, +0x40c1810e00, +0x7041891c00, +0xe0c3811e00, +0xe0c1810400, +0xe1c0878e00, +0x71e3c91c00, +0x60c2c70c00, +0xe1c7811e10, +0xe1c3810e08, +0xe3c1810238, +0xe1c3830e00, +0xe081850e00, +0xc043888e00, +0x6061831c00, +0x6141811e00, +0xe021c88e00, +0xe0c3808b08, +0x1c0c1030e00, +0xc1c1020408, +0x18383878600, +0x6341030608, +0xe0c0810e00, +0xe1e1831e08, +0xe0c3870c10, +0x60c3811e00, +0x6043810c00, +0xe041010e00, +0x7041811c00, +0x20e1871e00, +0xe0c3819e10, +0x1e1c7c18e10, +0xe1e3831e10, +0xe0c1800e00, +0x60c1891c00, +0x70a1859e00, +0xc043810e00, +0xe0c3811c00, +0xc043810a00, +0xc001010e00, +0xc0c3810e00, +0x60c3831c00, +0xc0c1010c00, +0xf0e3819e10, +0x1e043818f08, +0x1c083808700, +0xe0c3810e00, +0xe1c3819e00, +0xc043810e00, +0x1e0c1818f00, +0x63e3c19e00, +0x60c1830c00, +0x60c1850c00, +0xe1c1818e08, +0xc081808e00, +0xe041810e10, +0x60c3831c00, +0xc181810e00, +0x70c1813e00, +0x6043810410, +0xe1c2498e00, +0x60c3811c00, +0x1c081808704, +0x61c1810e00, +0xc083810e00, +0xe1c3c58e00, +0x70e5999e00, +0x1e061c89f08, +0x71a1811c00, +0x61c3811c00, +0xe0e3870e00, +0x60c1830c00, +0xe043810c00, +0xe0c3810e00, +0x1c083808e08, +0xc0c3c08e00, +0xe0c3818e00, +0xe141810618, +0xe0c3848e00, +0x4081810e00, +0x61c3811c00, +0x4043010c00, +0x1c083808600, +0x60c1810c00, +0xe041810e00, +0xe043818e10, +0x1c041010e00, +0xc083810410, +0xe0c38d0e00, +0x1e041848f08, +0x3e3839c00, +0x40c3060400, +0xc0c1818e00, +0x6081810e00, +0x71c3819e00, +0x1e3c3838f00, +0xf1e1811e10, +0x60c1cf0e00, +0x30e7891c00, +0x1e383818e00, +0x60c1850e00, +0xe0c3010e00, +0xe043831e00, +0xe0c1859e00, +0x1c1c3818700, +0x40c3810e00, +0xe041030e00, +0x60c1821c10, +0xe041808e00, +0xe043830e00, +0x61c3810c10, +0xc081810e00, +0xe261811e00, +0x2041050c00, +0x1c383c18e00, +0xc0c1810e00, +0x1e2c3c58e00, +0xc1c3810610, +0x6041030410, +0x4081030400, +0xc083810e00, +0x41c1808600, +0xe141810e00, +0xe0c1850e00, +0x60e3819e00, +0x61c1810e00, +0xe0c1810e00, +0xe041811e00, +0x6143810e10, +0xe041810e00, +0x60c1030e00, +0x60c1821800, +0xc0c1810600, +0xc0c1810e00, +0xe0c1810e00, +0x181c1ce8300, +0xe0c1810e00, +0x60c3830c00, +0x3e7c08f00, +0x6041011c00, +0xe1e3030e18, +0x61e1c31c10, +0xf161871e10, +0xe1c3ce9e00, +0xc081810600, +0xe0c3890e08, +0xe081810610, +0x30638f0c00, +0x60c1070608, +0xe141810e00, +0x6143811e00, +0xc081818e00, +0x1e0c3850f08, +0xe041031c00, +0xc043810200, +0xe343c19e00, +0x60e38f1c00, +0xf043851e00, +0x7143851e00, +0x2060c1810c00, +0x7043811e00, +0x30618b1800, +0x63c1809e00, +0xe041831c00, +0x30c3811c00, +0x6041810c00, +0xe1c3010608, +0x1e0c3819f08, +0x6143810400, +0x183c3010700, +0x4081010600, +0xc245c08e00, +0xc3c3c08f00, +0x6143810e00, +0xe1c3898e00, +0x6043810c00, +0xe3c3810f00, +0x1e041009e00, +0xc083808f00, +0xc181808f00, +0xc183808e00, +0xe0c3811e00, +0x6041010410, +0x43c1808e00, +0xe0c3c19c00, +0x60c3811c00, +0x4043891e00, +0xe183889e00, +0x71c1891c00, +0x1e243c09e00, +0xe0c3810e00, +0xf0c3811e00, +0xc081810e00, +0x1c1c3818708, +0x4041030c00, +0x7061831c00, +0xe043819e00, +0xf163831e20, +0xc083c08e00, +0x1e0c3818f00, +0xe0c1870e00, +0x70c1833c00, +0xe241c48f00, +0x70e1891c00, +0x1c0c3808f08, +0x61c1830e00, +0x61e1890e00, +0x6041810c00, +0x1c1c383870c, +0xe0e3819e10, +0x1e1c3818f08, +0xe143810e08, +0x1e3861000, +0xe0c1811e00, +0xe0c3850e00, +0x1c081808104, +0xc3c1818708, +0x1e063831e20, +0xc1c3808e00, +0xe0c3811e00, +0x60c3850c10, +0xe0c7c08e00, +0x1e0c1808f00, +0xc1c3818e00, +0xc3c3819e00, +0x6041010c00, +0xe061c31e00, +0x61c3891e00, +0xc0c1810e00, +0x1c081010608, +0x60618a0c00, +0x6041831800, +0xc041811e00, +0xe041810e00, +0x1e0c3030e00, +0x8103818600, +0x70e3861c18, +0xc0c3030600, +0xe1c3c39e00, +0x1e18f1000, +0x60c0819e00, +0xe0c3811e10, +0xe041811e00, +0xc081010e00, +0xe0c3811c00, +0xc240810208, +0xe0c1819f08, +0xc181808e00, +0x7021891e00, +0xe1c3810600, +0x6143810e00, +0x70c3831c00, +0x20c0831c00, +0x1c080808e00, +0xe0c1811e18, +0xc1c3038304, +0xc080810e00, +0x4343808e00, +0xe081000e00, +0xe141811e08, +0x7063811c00, +0x40c1020e00, +0x1e043808f08, +0xe041810e00, +0x3c1818600, +0xe0c1811e00, +0x70c3811e00, +0x1e023819c00, +0x63c38c8218, +0xe041811c20, +0x61e3899e00, +0x60c3030c00, +0xe1c3809f00, +0x1c081030e00, +0x60c1850c00, +0xc1c1810c00, +0x8183018300, +0xc083818e00, +0x30c3cd0c00, +0x60c3810e00, +0xe1c3819e18, +0x1c08101810c, +0x70e1811c00, +0x60c1010c00, +0xe041050e00, +0xe043810e08, +0xe243810e08, +0xc083010400, +0x71e1831c00, +0xe0c1010e08, +0xe3c3819e00, +0x70618f1c00, +0x60c1830c00, +0x61618b1c00, +0xe1c3858e00, +0x60c3858e00, +0x180c3848700, +0xe081818a08, +0x61c3030c30, +0xc001030e00, +0x1c083808f00, +0xe043850e00, +0xe0c3d19e00, +0x1e083c08e00, +0xe143870600, +0x60c3010e00, +0x7063891e00, +0xe341818f00, +0x61c1c10c20, +0x60c3811e00, +0x21c1810e10, +0xe0c3030610, +0xe041810c10, +0x1e063809f18, +0xe041030c00, +0x1c0c3030f04, +0xe1c0858e00, +0xe043810e00, +0xe043031e00, +0xf3c1811e00, +0x60e3831c00, +0x7061821c00, +0xe1e3858e00, +0xe1c3839e18, +0xe041010e00, +0x7063c19e00, +0xe041890e00, +0xe041889e00, +0xe043878e00, +0x6041810c00, +0xe041830e00, +0xc081810e00, +0x71a1889e00, +0xe041830c00, +0x6041010c00, +0x182c0408700, +0xc381c0870c, +0xf1e1cf1c00, +0x6143870e00, +0xc041810408, +0xc1c3818e08, +0x41c3830618, +0xe0e3811c00, +0xe1c3c99e00, +0x1e043c09e00, +0x70e18b1c00, +0x6043891c00, +0x1c2c1808600, +0xe061c91e00, +0x7041811c00, +0xc081810e00, +0x3c3010204, +0x1c083030208, +0x60c1830c00, +0x60c0891e00, diff --git a/samples/digitrec/digitrec/data/training_set_4.dat b/samples/digitrec/digitrec/data/training_set_4.dat new file mode 100644 index 000000000..4183b1fcd --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_4.dat @@ -0,0 +1,1800 @@ +0x8122870400, +0x267c78000, +0x1167870400, +0x13274f8100, +0x60c3830200, +0x9327cf0200, +0x2343830608, +0x2247c98300, +0x81c3870200, +0x51e2870c00, +0x11e7cf0600, +0x4183c70600, +0x2143c30400, +0x11e78b0c00, +0x41a78f0408, +0x10e78f0c00, +0x1127830200, +0x2143830600, +0x51e3830c00, +0xa143c78100, +0x21c3c70400, +0x61c2870200, +0x51a3cf0400, +0x20c6870200, +0x10228e0400, +0x2347c70400, +0x10267c78100, +0x20c3c70600, +0x20c3cf0600, +0x60c3cf1400, +0x167c20c00, +0x20c3830400, +0x143870200, +0x107c20000, +0x2143c70600, +0x1127c10408, +0x3147870400, +0x8143810600, +0x1122870400, +0x1278f0608, +0xe143878200, +0x4163870800, +0x2143810200, +0x1327810200, +0xa347830408, +0x247810204, +0x1367c70608, +0x2183820400, +0x1c3c30200, +0x8123c30400, +0x10c3860400, +0x8142830200, +0x4183810200, +0x2146c70600, +0x81c3830600, +0x2042870000, +0x1163820400, +0x20c3860400, +0x1143830400, +0xa142830200, +0x1143820400, +0x3143860400, +0x91248f8200, +0x91468f8200, +0x11e3cf0400, +0xc167c18300, +0x5167c70c10, +0xa142870b00, +0xc3830400, +0x10a2820800, +0x2143c60408, +0x61c38f0200, +0x11a7c21800, +0xa142870200, +0xc1c7c70600, +0x10a3820800, +0x1163820800, +0x61c3870200, +0x4162870400, +0x8143830400, +0x2042870200, +0x18438f0800, +0x41c3870200, +0x51e7c70400, +0x243810200, +0x8103830200, +0x40a3820400, +0x4122870400, +0x4143c50200, +0x10243810100, +0x10e38a0c00, +0x10e38e0400, +0xa143830200, +0x8143830200, +0x2043870400, +0x1163830400, +0x2142870400, +0x147c30608, +0x8143830600, +0x1c1810204, +0x8347850204, +0x142870200, +0x4183830400, +0x8326470200, +0x2147c70400, +0x4183c20400, +0x127c10400, +0x10c3870400, +0x20c3cf0408, +0xc1c3870600, +0x8143c20400, +0x20c3c70400, +0x20c3830400, +0x9127cf0200, +0x9367cf0204, +0x143030408, +0x10345cf8300, +0x41c7c78400, +0x4183830c00, +0x1167870400, +0x1067860800, +0x1142870600, +0x41c3cf8200, +0x264870200, +0x4083838400, +0x63c7c30600, +0x11a6870400, +0x51a7c30c00, +0x143820408, +0x11678e0400, +0x207c88100, +0x8143810204, +0x11c3860c00, +0x10247878100, +0x40a3820400, +0x2143870200, +0x41e3870408, +0x8367860c10, +0x51e3c20c10, +0x9a7870c00, +0x83c7ef8200, +0x9167830408, +0x2448f0200, +0x1c2cf8400, +0x21c3820400, +0xc3c7810204, +0x10c3860800, +0x31c78f8400, +0x60c2870600, +0x1a7c78400, +0x50e3860810, +0xa143870200, +0xa347830608, +0xa167cd0200, +0x2147890200, +0x12244870100, +0x5163830400, +0x8103c30400, +0x3143870400, +0x11e3830c10, +0x2142830600, +0x4102830400, +0x1e2860c10, +0x1c3c30400, +0x2147810200, +0x2247c18300, +0x8143830200, +0x2143870400, +0x3147c10200, +0x6142870200, +0xa147850200, +0x81448f0200, +0x41c3830600, +0x1143870400, +0x3143830c00, +0x43820408, +0x21c7870400, +0x9163870400, +0x4182870200, +0x81c3810200, +0x2143830200, +0x41468f0200, +0x8143830300, +0x40c2870400, +0x2347cf8200, +0x2147830400, +0x40c2cf0200, +0x4082870600, +0x9122cf8200, +0x2143870600, +0x146870408, +0x22c7830200, +0x3246870400, +0xa146870400, +0x8143870408, +0x2143870400, +0x61c3878200, +0x8143850204, +0x2142870200, +0x41a3c70400, +0x4127810400, +0x23468f9a00, +0x125850400, +0x2143c70400, +0x4383c30604, +0x2043860800, +0x163830408, +0xa147870600, +0x41a3830400, +0x2183020400, +0x3146870600, +0x2142830200, +0x367c30600, +0x2147c70400, +0x11478f8800, +0x2143070400, +0x2042870400, +0x1142860800, +0x9324cf0200, +0xa1478d0200, +0x147810200, +0x2146878200, +0x8364878200, +0x2143820400, +0x243810200, +0x2347838300, +0x167cf8400, +0x8143810204, +0x1127850400, +0x2143870400, +0xc7810200, +0x41b7c70400, +0x10e38f8800, +0x8103c38200, +0x1047850200, +0x8126870400, +0x27820800, +0x8163830400, +0x10e3860c00, +0x2142c70200, +0x9143820400, +0x5123820c00, +0xc1428f8600, +0x20c3820400, +0x20c28f0400, +0x40c2870600, +0x11628f0400, +0x30c3cf0400, +0x20c3870200, +0x143810200, +0x3147c70600, +0x4103898200, +0x10438e0c00, +0x11428f8400, +0x2143860400, +0x4102810200, +0x2043870200, +0x9367c30400, +0x8103810204, +0x91638f0600, +0x127830600, +0x2142830200, +0xb147c70400, +0x11e7cb0400, +0x2247870204, +0x2147c28400, +0x1124cf0600, +0x2142830600, +0x11c3860400, +0x8127c98102, +0x11470e0800, +0x4102878400, +0x1167c20400, +0x11a7cf0600, +0xa143870200, +0x247810600, +0x1c3830604, +0x21c3830408, +0x8122830200, +0x1163820800, +0x20c38e0c10, +0x2143c20410, +0x8143810200, +0x20c3ce0400, +0x2143870400, +0x21c3cf0400, +0x1043820800, +0x4082830200, +0x50e3820c00, +0x146830200, +0x40c38f8200, +0x143830600, +0x2147870200, +0x41e3830400, +0x2147860400, +0x51e3c70600, +0xe3860800, +0x4082870200, +0x1327cf8200, +0x61c2870200, +0x41c3c70600, +0x81c3850200, +0x10a38f0410, +0x21e1c20c10, +0x2247850200, +0xc7ce0800, +0xa143810200, +0x23c7830304, +0x13267cf8100, +0x1163830400, +0x1127c70c00, +0x11a7cf0c00, +0x20c3820400, +0x11e28f0400, +0x1167cf0c00, +0xc3870400, +0x2143820400, +0x8143870400, +0x4122860c10, +0x10a28f0400, +0x42448e8100, +0x2142870600, +0x49a7860800, +0x1147870400, +0x40c3870600, +0x9266c70200, +0x60c3850200, +0x143820408, +0x2142070400, +0x3042870400, +0x2143870200, +0x11c7878000, +0x143810200, +0x61c38f0600, +0x167cf0400, +0x8143870200, +0x51e7c70c00, +0x143830200, +0x10c38f0400, +0x1142870400, +0x5163870400, +0x2047870200, +0x11638f0400, +0x143830408, +0x4143870200, +0x20c3820400, +0x8142870200, +0x143830600, +0x11e3820800, +0x23c3830200, +0x20c38f0600, +0x1043cf0400, +0x1a7c70c10, +0x2142870400, +0xa1c3878600, +0x1a267c70300, +0x51a6c70c10, +0x1047c60800, +0x4103830200, +0x8142c70200, +0xa142878600, +0x2043c70400, +0x31c7cf8400, +0x11c3820400, +0xa3870800, +0x9364cf8300, +0xa143830400, +0x6147870200, +0x1043860400, +0x41c3830600, +0x9147cf9200, +0x51e3860c00, +0x2143830600, +0x61a3c30400, +0x1227c10200, +0x2143830400, +0x8167c10600, +0x1146cf0400, +0x143810600, +0x9167830600, +0x11648e0400, +0x227c08100, +0xa3448f0300, +0x143830608, +0x2147870600, +0x50c2870400, +0x9162cf0600, +0x61c3860c00, +0x1123870400, +0x4143860400, +0x2147ca0400, +0xa147830600, +0x50c3870400, +0x10e38e0800, +0x20c38f0400, +0x27820000, +0x1143870c00, +0x8127cd0300, +0x143810200, +0x1c3820400, +0x10a7860800, +0x1c3810200, +0x4183030200, +0x81c3830600, +0x10c3870800, +0x8307c18300, +0x2267830604, +0x143810400, +0x2143830200, +0x81c2cf8300, +0x2147870600, +0x4182820400, +0x31478f0600, +0x8367c78100, +0x11e3870400, +0x5167820c00, +0x20c2870200, +0x4122830408, +0x2147870300, +0x41c3870800, +0x2147850204, +0x2143810204, +0x342830200, +0x21478f0204, +0x21c3c60400, +0x126870400, +0x1167c30400, +0x1127830400, +0x41c3830600, +0x2143830400, +0x143c20410, +0x10c3860400, +0x127810200, +0x11e6cf8200, +0x41e3cf0608, +0x1123870400, +0x8323c18200, +0x2142830200, +0x51c3860400, +0x8163830400, +0x41c7cf0200, +0x11678b0400, +0x1167cb8400, +0x41c3c78200, +0x2142870204, +0x142860400, +0x21c38f0400, +0x2143830200, +0x19678e0c00, +0x1142870400, +0x20c3870400, +0x4167810300, +0xa3ce0c00, +0x11678f0600, +0x2043820400, +0x2143830600, +0x2043820400, +0x1c3870200, +0x4124cf8200, +0x21c3870600, +0x10e38e0c00, +0x41c3870600, +0x20c3820408, +0x127810400, +0x11648f8200, +0x2344878100, +0x1127c70400, +0x21c68f0600, +0xc3830408, +0x3147860400, +0x21c3860400, +0x21c3830408, +0x143870400, +0x1143830800, +0x30c38f0400, +0x142860400, +0x23c7c70600, +0x4143870400, +0x2143830600, +0x8143830600, +0x2142870400, +0x1062860400, +0x163830408, +0x143810400, +0x143830200, +0x8327cf8300, +0x6146870400, +0x4183870400, +0x183810200, +0x2043810200, +0x143800600, +0x61c2cf0200, +0xe3860810, +0x11a2870c00, +0x2247810204, +0x12247870300, +0x18367c70600, +0x2143870400, +0x2143860400, +0x1143860400, +0x8a3870800, +0x21c3830608, +0xe3861800, +0x102c7c10300, +0x2042870600, +0x10e3860800, +0x2347c70400, +0x4182870200, +0x12347c58300, +0x2042878400, +0xc143830600, +0x61c3cf0600, +0x4102838400, +0x51e38f8400, +0x2143870400, +0x3147870400, +0x147810400, +0x1042878400, +0xa1438f0200, +0x2143870200, +0x2143820400, +0x2247810200, +0x8143820400, +0x8326470200, +0x21c3830c00, +0xa1c3870600, +0x21c7810200, +0x21c3820c00, +0x8367cd8200, +0x10c3820400, +0x8103810200, +0x12343870300, +0x2144870200, +0xa7c30800, +0x2147c70200, +0x141810200, +0xa147810600, +0x1167870400, +0x1c3830400, +0xc142870200, +0x8143810200, +0x11e7c30c00, +0x1163830c00, +0x8143870200, +0x8143810600, +0x1c3870400, +0x21c7870600, +0x83830408, +0x8143830200, +0x2246c78100, +0xa343830600, +0x40e38a0410, +0x1143c70400, +0x91244f0200, +0xc103c78300, +0x50c3860400, +0xe3861800, +0xa143820408, +0x11e78f8c00, +0x10247818100, +0x11478f0400, +0x1127870600, +0x2247830200, +0x10247c18100, +0x2143830400, +0x10c3820800, +0x8143810300, +0x41c3c20400, +0x10e3870800, +0x9147850200, +0x8143810200, +0xa143830200, +0x186878200, +0xe166870200, +0x61c3870400, +0xa3820410, +0x31c3870408, +0xa143870400, +0x1163820810, +0x83020000, +0x41c3810200, +0x2147850600, +0x2143870c00, +0x8146878600, +0x10c3cb0400, +0x2247c70200, +0x10240878100, +0x10c3860400, +0x3147cf0400, +0x1042860800, +0x20c3870600, +0x2147870400, +0x107810000, +0xc1c3878200, +0x1226870600, +0x143c50000, +0x2267cf8000, +0x51c7e78c00, +0x10a38a0c00, +0x8107810600, +0x1c3810400, +0x11228f0200, +0x2343870200, +0x20c3870400, +0x1c3820400, +0xa28f0000, +0x3147870400, +0x40c1870200, +0x1346870400, +0xc3830600, +0x40a38f0200, +0x1127890600, +0x243c30200, +0x11e7830c00, +0x51e68f8c00, +0x41a2870400, +0x20c28f8200, +0x11a2860c00, +0x126878400, +0x70e38f0200, +0x367c78300, +0x2043830200, +0x1143820810, +0x81a7cf0600, +0xa3830c00, +0xa142870200, +0x4187850204, +0xa166870200, +0x8163810200, +0x23478f0200, +0xe142870200, +0x367870400, +0x30a3830c00, +0xc3810200, +0x8143838600, +0x21478f0600, +0x2143810200, +0x11438a0400, +0x2147830400, +0x142820400, +0x21c3820400, +0x1142870200, +0xe1c3870400, +0x1063860400, +0x246830200, +0x41c3830600, +0x41c3c20400, +0x8143820400, +0x4123860810, +0x41438d0200, +0x146830200, +0x1e7870400, +0x21438f0400, +0x8103810200, +0x43870400, +0xc2870200, +0x10246c68000, +0x20c3820800, +0x8167870600, +0x4143860400, +0x1163810600, +0x40c7830200, +0x343810204, +0x243810200, +0x21c3ce0400, +0x142830600, +0x2343870200, +0x41c7838600, +0x2143820400, +0x11e6870c00, +0x143830600, +0xa143820400, +0x2142870600, +0x1c3870c00, +0x27820c00, +0x1167860800, +0x1c3850200, +0x8224c70200, +0x167810408, +0x1124cf0200, +0x42820400, +0xa147c70600, +0xc3870400, +0x43820000, +0x4122870200, +0x4183830400, +0x21438b0200, +0x4163830400, +0x1167878000, +0x2143c60400, +0x4082860400, +0x11678f0400, +0x1123cf0400, +0x1143830400, +0xa142830200, +0x21c7838200, +0x41c38f8400, +0x2347870200, +0x2142870200, +0xc103c78300, +0x1042870400, +0x10c38e0400, +0x2347c50204, +0xc3020408, +0xa343810200, +0x1143860408, +0x2143830600, +0x142870400, +0x21c3c70800, +0x20c3ef8000, +0x20c3840800, +0x2143cf8400, +0x1126870400, +0x11a7830408, +0x1143870c00, +0x143c60408, +0x11a38f0400, +0x8143810200, +0xc3830400, +0x143830200, +0x4082870400, +0x343c20400, +0x12266478100, +0x20c3870400, +0x2142870200, +0x20c38f0400, +0xa1428f0100, +0x60c2870200, +0x247830400, +0x10a3820400, +0x2143870400, +0x1127c30200, +0x20c2860000, +0x1c7820c00, +0x143830400, +0xa143870200, +0x2143870400, +0x1163870408, +0x143850204, +0x1367870608, +0xa142870600, +0x10e3870800, +0x367c70200, +0x2347810200, +0x2367cf8304, +0x3142870600, +0x8143830600, +0x8346878200, +0x2143820400, +0x12343830200, +0x41a7830c00, +0x2143830204, +0x23c7c78600, +0x40a3cf0400, +0x2147830600, +0xa3c78f8a00, +0x61c3870200, +0xa143830200, +0x1163870400, +0x60c38f0200, +0x3166c70400, +0x1164cf0200, +0x61e3cf8300, +0x1143070800, +0x5163c70400, +0x4127830400, +0x341810204, +0x1043860400, +0x146870400, +0x4143870400, +0x2146870200, +0x2347c70600, +0x8367870300, +0x2347c10200, +0x20428f8400, +0x20c2870600, +0x1167870608, +0x8143810200, +0x123830c10, +0xb164cf0200, +0x20c78f0400, +0x367c30200, +0x2143850600, +0x8143830200, +0x143810400, +0x2143870200, +0x8143c58300, +0x2143870408, +0x2143830400, +0x163850200, +0x2347830204, +0x1164878400, +0x41e78f0408, +0xc1c3830200, +0x51e38f0400, +0x30c3860c00, +0xa1c3870600, +0x1b3c70c00, +0x20c3870200, +0x142870200, +0x2147c90200, +0x1a7c60800, +0x327810600, +0x2347870300, +0x2346870200, +0x1147870600, +0x18383810302, +0x51c3860400, +0x4143820400, +0x12243838100, +0x4083830400, +0xc3860400, +0x4142878200, +0x4083810400, +0x1163830c00, +0x127c90200, +0x1040860000, +0x10a3870810, +0x1c3830400, +0x2043820400, +0x2147c30608, +0xa147c70400, +0xa344cf8300, +0x1127870400, +0x21c3c70400, +0xc3820400, +0x1167870400, +0x142870400, +0x12448f0200, +0x1147820400, +0x10e3820c00, +0x1043860400, +0x5127870400, +0x2247810200, +0xa143830600, +0xc3c60800, +0x4081030200, +0x20c3860c00, +0x4143810200, +0x4083820400, +0xa143810400, +0x61c38f8400, +0x8343c78300, +0x51c7c30c00, +0x20e28f8200, +0x1043860800, +0x2047830200, +0x21c28f0400, +0x122870800, +0x20c28f0200, +0xc1e7cf8300, +0x1147c60400, +0x20c3820400, +0x10678f0600, +0x3082860600, +0x1127870600, +0x2147830600, +0x11678f0400, +0xa347830200, +0x4122870400, +0x2042860000, +0x147e70400, +0x127820800, +0x42860400, +0x41a2c60400, +0x2143820400, +0x2143820400, +0x2146870200, +0x2143830200, +0x11c3870400, +0x5147c38c00, +0x126830400, +0x8143810200, +0x2343810300, +0x2143860800, +0x4183c70600, +0x20c28f0400, +0x10c3860400, +0x4183830600, +0x147810200, +0x8143870400, +0xc082830200, +0x143830600, +0x1144870200, +0x41a7c70c00, +0x21c3870c00, +0x2146870600, +0xa3c7c70200, +0x8143830400, +0x61c3820408, +0x143ee0400, +0x1142830400, +0x1147c20c00, +0x5167830400, +0x8163c78600, +0x41a3c70400, +0x2247870200, +0x2142830400, +0x40c3030400, +0x8143830600, +0xc1820800, +0x20c3820400, +0x31c3820400, +0x41a78b0400, +0x1127c70200, +0x9163cf0400, +0x2042860400, +0x21c3870200, +0x30c7878c00, +0x1147870c00, +0x11e3820410, +0x1142870200, +0x1e3830400, +0x20c3830204, +0xa3820c00, +0x4083850200, +0x11e3c20800, +0x30c38f8c00, +0x4923870c00, +0x4103830600, +0x8347830200, +0x1124cf0600, +0x3c7810302, +0x41c2870200, +0x163830600, +0x143cd1200, +0x8143810200, +0x2146870200, +0x143820c00, +0x41c3878600, +0x6142c70200, +0xc1820800, +0x11428f8400, +0x6147870200, +0x3142860400, +0x8146870200, +0x11e7cf0408, +0x9167850600, +0x8142830600, +0x4122c70400, +0x2147870608, +0xa143e70200, +0x5163830400, +0x1c7810204, +0x8143810200, +0x143c60400, +0x2047830200, +0x20c3860810, +0x1162870800, +0x1123830400, +0xc1e6cf0200, +0x2143870400, +0x41a7830c00, +0x146870408, +0x2142870400, +0x8143c10400, +0x147c20410, +0x2142870000, +0x2247858100, +0x1146860400, +0xc3830400, +0x1042870800, +0x8143820400, +0x2142870200, +0x8142870200, +0x13678f0204, +0x40c2870200, +0x8143830200, +0x61e3830c00, +0x9367870400, +0x12247870200, +0x2142870200, +0x2246878300, +0x2142870200, +0x11478f0400, +0x2247810300, +0x6142870200, +0x91668f0700, +0x1a7c70400, +0xc183810200, +0x2143830600, +0x2143cd0200, +0x1147c78800, +0x963820c00, +0x41e3830408, +0x41c3870400, +0x143810400, +0x1167830408, +0x11a68f8400, +0x8142870200, +0x1167830800, +0x1c6830200, +0x11e3890400, +0x10e3830410, +0x9123cd9100, +0x143810600, +0xd163830408, +0x18265c48100, +0x4083820400, +0x4162870600, +0x2143020800, +0x1167ca0400, +0x51e78f0400, +0x2143830400, +0xa1478f8200, +0x4183810200, +0x41c3870200, +0x8143810200, +0x41c3810200, +0x143830604, +0x41c3830400, +0x1163870400, +0xc122cf8200, +0x8324c70200, +0x4141878400, +0x1127870400, +0x1142830400, +0xc3870600, +0x102830400, +0xa2860400, +0x822860800, +0x12247878100, +0x4083810200, +0x2147810200, +0x2143870200, +0x1628f0200, +0x8347c10600, +0x2144878100, +0x11e3830800, +0x1a78f0400, +0xa143870400, +0x2143830600, +0x1142860400, +0x2143830200, +0x4182870200, +0x11678f0600, +0x8143810600, +0x1123860c00, +0x143020408, +0x143830400, +0x2246870200, +0x4083030000, +0x10628f0400, +0x10326478300, +0x8147870204, +0x8143810204, +0x41c3810400, +0x2083820400, +0x8224c78200, +0x2342870200, +0x2142870600, +0x9324cf0600, +0x11c68f0200, +0xc3830200, +0x13366c78300, +0x40c3820400, +0x2043820400, +0x2142870200, +0x9344cf0600, +0x1143820c00, +0xa143c48100, +0x21c7830204, +0x20c3c60800, +0x41e3870608, +0x30478f0c00, +0x9367830408, +0x10c38e0800, +0x1167870608, +0x51e7870400, +0x243850202, +0x2142878200, +0x21c3870200, +0x2346870200, +0x50e2870400, +0x1123820c00, +0x51e7cb0400, +0x2043860400, +0x1a7830400, +0x264870200, +0x10438f0c00, +0x1162870600, +0x1163860400, +0x147838608, +0x12267c98300, +0x2147870400, +0x1127c90200, +0xa346c78300, +0x127cf0200, +0x4182870200, +0x51a28f0200, +0x1122860400, +0x4183850200, +0x2343870200, +0x247cc8100, +0x2143830600, +0x143c30408, +0x10c3860810, +0x10a3860400, +0x1e3820410, +0x8145850200, +0x61c3878600, +0x2247810200, +0x50c28f0400, +0x11e78e0e00, +0x11e3820c10, +0x81810400, +0x41b7870400, +0x11e68f0c00, +0x20c68f8600, +0x8122870204, +0x4083850200, +0x8143810204, +0x8343830200, +0x1c3830600, +0x2143830408, +0x12267810200, +0x123478f8300, +0x5127c70600, +0x2143870200, +0x2343810200, +0x142830400, +0x1122c70400, +0x238f0400, +0x10e3820800, +0x4143830400, +0x9126c70400, +0x2143810600, +0x2347850200, +0xa1468f8600, +0x1367cf8300, +0x4126c30400, +0x22478f0200, +0x1c3820c10, +0x41a7c20400, +0x51a6cf8400, +0x247830408, +0x123c30408, +0x1042870400, +0x2143810200, +0x11e7878c10, +0x143830c10, +0x2143860000, +0x30c38e0c00, +0x1142830000, +0x8103810200, +0x2143870600, +0x11668f0000, +0xc3820400, +0x8147870600, +0x2143830400, +0x41a7c70c10, +0x2247c90200, +0x21c3870408, +0x2343810400, +0x51e7870408, +0x20c7830400, +0x20c3820c00, +0x9126870400, +0x4167870608, +0x18143810300, +0x2143870400, +0x8327ce8102, +0x13678f0600, +0x8167c70408, +0x5163820400, +0x11638e0c00, +0x247c68100, +0x123820000, +0x810448f8000, +0x147830408, +0x164cf0200, +0x20c3870600, +0x6347cf8400, +0x2143870800, +0x143830604, +0x41c3860400, +0x1143860400, +0x21c3c60c00, +0x2146870200, +0x40c28f0200, +0x143820408, +0x1043860000, +0xb1628f8600, +0x181850200, +0x8162c70200, +0x8162870200, +0x1147c70800, +0x41e3830408, +0x41e2870400, +0x10c3820c10, +0x30c28f8400, +0x183820400, +0x1c3830204, +0x21468b0200, +0x127810200, +0x40c3870400, +0x4082870200, +0x2043820400, +0x143820800, +0x41c3830400, +0x1c3830600, +0x2143820400, +0x1124cf0600, +0x2083830400, +0x2043860800, +0x4083070400, +0x10e3c60800, +0x9366cf0200, +0x10243810200, +0x8143830608, +0x2147cf0200, +0x8143870604, +0x1143820800, +0x5143c70408, +0x204083870000, +0x8143c78100, +0x4147c28c00, +0x9167c78608, +0x8367c70600, +0x1a7878400, +0x8143810204, +0x8147830400, +0x247c08102, +0x4143820400, +0x10c2870400, +0x8146878200, +0xa144cf0200, +0xa147c78300, +0x1327cf8000, +0x2247830200, +0xa142870200, +0xc2870400, +0x20c7cf0400, +0x10e78f0400, +0x143820400, +0x143830400, +0x1a3890200, +0x9a7870400, +0x3167870600, +0x8107810200, +0x1c3830604, +0x2143870600, +0x1c3820c00, +0xa347c78c00, +0x40c3870408, +0x2347c60400, +0x4142870400, +0x11a38f0400, +0x1167c20400, +0x41c2870200, +0xc3820400, +0x12347870304, +0x1c6cf0000, +0x20c28f0200, +0x2342870200, +0x8367c50300, +0x2143830200, +0x2147c70608, +0x47810200, +0x50c3860c00, +0x2147890200, +0x21c6830200, +0x4122860c10, +0x8162860600, +0xa347cf8300, +0x2143c60400, +0x2143c30400, +0x2147870200, +0x10c3860800, +0x21c7ce8400, +0xd167cf0400, +0x143810200, +0x41a7860c00, +0x2143820400, +0x147810200, +0x83658f0200, +0x8183c30600, +0x2143870200, +0x4143c70400, +0x2147830600, +0x1143c60400, +0x8143810200, +0x2142870200, +0x142820400, +0x41a3870600, +0x1126cf0c00, +0x8127c08100, +0x10a2820c00, +0x40c2820400, +0x11678f0800, +0x1146830400, +0x1126870400, +0x183820400, +0x10e3820810, +0x2267810600, +0x1a4860400, +0x8142830200, +0x3147830400, +0x1167830400, +0x2147810300, +0x4183c30600, +0x2142870400, +0xa147810204, +0x8144878400, +0x8327c70600, +0x4142870200, +0x2142870400, +0x10267810200, +0x1142860400, +0x4143830200, +0x2347cf0200, +0x8143810200, +0x1c3830408, +0x1042870400, +0x3043870400, +0x1176fc38300, +0x2147cf0400, +0x107810200, +0x143820400, +0x143830400, +0x143870400, +0x2144870200, +0xa347870300, +0x3c3830200, +0x10c3820400, +0x1043070800, +0x2448f8100, +0x2147c70600, +0x8147c78200, +0x2143820400, +0x147cf0400, +0x10e7870800, +0x2147870400, +0x41a3870410, +0x1167820c00, +0x163810400, +0x21c7830600, +0x41e3860c00, +0x143810400, +0x10e38f0800, +0x8127cd0204, +0x1147c60400, +0x20c3870200, +0xc364cf8300, +0x1678f8800, +0x43c3830200, +0x2143870200, +0x41a2870400, +0x143830200, +0x40c3870c00, +0x167870608, +0x2142870400, +0x61c7cf0200, +0x21c3870400, +0x21c3830200, +0xe1418f0200, +0x41e3810408, +0x10c7830400, +0x2043c70800, +0x1143830400, +0x40c7870200, +0x9143820600, +0x2143c60400, +0x2143830400, +0x143810200, +0x41e3870408, +0xc3870400, +0x10e38f0c00, +0x1167890200, +0x20c68f0408, +0x4183830200, +0x41c3820800, +0x1162870800, +0x60c38f0200, +0x1163820400, +0xc2870400, +0xc3830408, +0x1163870800, +0x11448f0200, +0x8123c70600, +0x21c7c78400, +0x827870800, +0x2243830200, +0x83870400, +0xe2878c00, +0x1043860800, +0x2081020400, +0x2143cf0400, +0x4083830400, +0x11268b0200, +0x1327810200, +0x146870400, +0x41c3838c00, +0x10a3870400, +0x2183870200, +0x2147870200, +0x40c2870200, +0x4123820c10, +0x2142870400, +0x1144870400, +0x41e7830c10, +0x2147810600, +0x11628f8400, +0x142870100, +0x4123830408, +0x8143810200, +0x1043860800, +0x40838f0200, +0x2246c78300, +0x8143810204, +0xa1c3c30608, +0x51e2cf0400, +0x143820400, +0x11e2830400, +0x1126cf0200, +0x21c3c70400, +0x2143810200, +0x10c3860400, +0x143820400, +0x247810200, +0xc2820400, +0x163870608, +0xa147870600, +0x4143810200, +0x8367c10608, +0x61c3cf0600, +0x1127830400, +0x2143820400, +0x41c3870608, +0x8143850200, +0x2082870400, +0x22830000, +0x142870400, +0x20c3870400, +0x9167870600, +0x30c7ce0800, +0xa346878300, +0xa142870200, +0x143860400, +0x2147c70400, +0xc0c38d8b00, +0xa366870200, +0x10c2860400, +0x1147850200, +0x21c7870408, +0x21c3830400, +0x2147810200, +0x1e38b0408, +0x81268f8200, +0x11678f0400, +0x9267c70200, +0x20c3030400, +0x8307c68100, +0x31c7cf0600, +0x1147830c00, +0x2147870200, +0xe3820400, +0x31c38f0400, +0xe142870400, +0xa3448f0200, +0x8143830608, +0x167c30410, +0x3147878400, +0x10c3070800, +0x8123c50200, +0x2247878300, +0x1143830400, +0x10c3870400, +0x8327c10200, +0x41a7c70600, +0x2147830600, +0x20c3830400, +0x4347830400, +0x1167870400, +0x367870400, +0x41c7cf0000, +0x2147830400, +0x2143830400, +0x41a3c20c10, +0x41c3830400, +0xc3820400, +0x341810600, +0x2047810200, +0x8347c70204, +0xc3860400, +0x41c2870400, +0x143810400, +0x2167850600, +0x2247870600, +0x143820400, +0x4123c20c10, +0x2267870204, +0x1143860400, +0x40c3870800, +0x2143870200, +0x1142860400, +0x1143860400, +0x1c3870200, +0x1c3860c10, +0x2143c70400, +0x1163860400, +0x61c68f8400, +0x2142870400, +0x5166cf0600, +0x2264c70200, +0x2143820408, +0x2043820800, +0x8226c70600, +0x143830400, +0xc142870200, +0x4383030200, +0x8103850200, +0x1167c78c00, +0x10327cc8100, +0xa143830600, +0x8147870400, +0x10c3860400, +0x10c2870400, +0x41c2cf8600, +0x1163861800, +0xc28f8200, +0x10a3870800, +0x20e28f0c00, +0xa1c6878600, +0x2043830200, +0xa142860400, +0x147c38400, +0xa366cf8300, +0x8127850204, +0x2247878300, +0x2147870400, +0x2143820400, +0x11227c10200, +0x11e2870c00, +0x9a38f0800, +0x51a3850400, +0x9127cf0400, +0x2143820400, +0x11e2cf8400, +0x51a7820c10, +0x51e7cf0400, +0x8127c10408, +0x1c3820408, +0xa3820800, +0x8103810200, +0x127c30408, +0x10c3870800, +0xc3cf0200, +0x10c3820800, +0xa143870604, +0x3147870400, +0x9167890600, +0x8224cc8100, +0x2146830200, +0x1065860400, +0x61c7870200, +0x127820800, +0x1063820800, +0x5163820c10, +0x23c7c30600, +0x21e0830820, +0x142870400, +0x143820400, +0x2143c70200, +0xd367830c00, +0x4383010200, +0x6143830800, +0xa7cf0800, +0x8126870600, +0x2243810200, +0x8103830200, +0x33678f0600, +0x2061e28f8300, +0x2147810200, +0x11e68f0e00, +0xa347870200, +0x4083870400, +0x31c28f0400, +0x147830600, +0x183870400, +0xa143870200, +0x2143810200, +0x50e3860800, +0x6143820400, +0x27820000, +0x2042860400, +0x40c2cf0200, +0x2142830400, +0x143820400, +0x1167ce0400, +0x83e7830408, +0x1142830400, +0x20c38f0200, +0x10e3820800, +0x143850200, +0x4183870600, +0x10c3060800, +0x2147830200, +0x61c78f8600, +0x166830408, +0xa1c3820400, +0x8143850200, +0x143810200, +0x10e3820800, +0x91678b0400, +0x143820400, +0x3c7c30408, +0x61c3820400, +0x41c3820c00, +0xa347870200, +0x2143860400, +0x1142860400, +0xa346870200, +0x63cf0000, +0x11678f8c00, +0x23c7870200, +0x8347850300, +0x1127830400, +0xc102870200, +0x2247830600, +0x2147830408, +0x1123830410, +0x40c2870200, +0x9167830600, +0x2143870200, +0x127c70400, +0x8143870200, +0x1325cf8000, +0x20c78b0c00, +0x3142870400, +0x1e3c70408, +0x103810200, +0x143850200, +0x162870400, +0x1163860400, +0x10a3c60800, +0x167810200, +0x2143860400, +0x4125c70204, +0x2347870204, +0x1438d0204, +0x10c3830c00, +0x1043860800, +0x2143860400, +0x41c3878400, +0x123810408, +0x1163870600, +0x2142860400, +0x11e38f0c00, +0x2143870200, +0x41c3830400, +0x8142870200, +0x147850200, +0x147c60c00, +0x142860408, +0x10c28f0400, +0x21c6830200, +0x9127c70400, +0xc3cd0000, +0x8141810600, +0x20c3830400, +0x1e3861800, +0x21c7c70c08, +0xa143830600, +0x11e7cf0408, +0x30438f0800, +0x8343c10204, +0x40c3c60800, +0x1163820400, +0x41c7c10200, +0x8124870200, +0xc102830200, +0x20c3830400, +0x11668f8400, +0xc7c60000, +0x8143850100, +0x1c3820200, +0x1162870200, +0x2143830408, +0x11c3830400, +0x3043878c00, +0x9143830400, +0x19367cf8200, +0x4146870300, +0x9167870600, +0x4147810200, +0x10c3860400, +0x8147850200, +0x19324cf8100, +0x41c3850608, +0x1a2c70c10, +0x20c3820400, +0x2143830400, +0x2042860400, +0x127830400, +0x20c3820400, +0x1147c78800, +0x8102c78300, +0x2347870200, +0x41c3820408, +0x2047810200, +0x10226478300, +0x30c3870c00, +0x8163c20400, +0x20c3860c00, +0x20c78f0200, +0x1228f0200, +0x143830400, +0x142830200, +0x2142870200, +0x2243830200, +0x4083820400, +0x2043870400, +0x143810204, +0xe2820800, +0x1c3830408, +0x10a3820800, +0x81c3c30600, +0x8103810200, +0x41e3830c00, +0xa147e70600, +0x4141820800, +0x1163870400, +0x8102830200, +0x2146870200, +0x142870200, +0xe3830c10, +0x40c38f8200, +0x2143830400, +0x2142870600, +0x20c3870800, +0x143870400, +0x9367810600, +0xa143870200, +0x10e3830400, +0x12247870300, +0x20c28f8200, +0x61c3870200, +0x21c3830600, +0x8126830200, +0x2247c78300, +0x21e7cf0c00, +0xc143870200, +0x1146870400, +0x2143830600, +0x143830400, +0x51e3820c10, +0x10244c70200, +0xc3020400, +0x147810200, +0x10c3860400, +0x125c70400, +0x9367c78400, +0x41e3870810, +0x11278b0400, +0x144870200, +0x2147cf0400, +0x1067860400, +0x8143820400, +0x123820400, +0x8143810200, +0x21c7870408, +0x11668f0400, +0x21c2870200, +0x92244f8100, +0x167cf0200, +0x1127cf0400, +0x20c3810200, +0x21c3870600, +0x2143870400, +0x3166c70200, +0x11a68f0400, +0x10c7c61800, +0xa147870600, +0x9163830608, +0x143820400, +0x8167cf8300, +0x8107810300, +0x60c6cf0200, +0x41e7c78c00, +0x10c3820400, +0x1123830400, +0x2142870204, +0x4082870000, +0x8147830400, +0x2143870600, +0x4103810600, +0x4163850200, +0x2143820400, +0x8126c70200, +0x11c2870400, +0x21c3830600, +0x60c3cf0200, +0x2347830604, +0x4167830600, +0x10e28f0400, +0x4083820400, +0x2147810300, +0x147c30408, +0xc3c20400, diff --git a/samples/digitrec/digitrec/data/training_set_5.dat b/samples/digitrec/digitrec/data/training_set_5.dat new file mode 100644 index 000000000..02ff008fc --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_5.dat @@ -0,0 +1,1800 @@ +0x7183899e00, +0x1f2020c00, +0x71818f1e00, +0xe3021800, +0x10e30e0000, +0x10c3030410, +0x19c3030c00, +0xe103010e00, +0xe2060000, +0x6181010600, +0x20c1021c00, +0x7103818e08, +0xf1c3819e00, +0x2181030400, +0x2083811c00, +0xe180811e00, +0x71c3810e00, +0x7187810e00, +0x7103010e00, +0x61c2819e00, +0x7181030c00, +0x11e3031c00, +0x39e6060608, +0xc183c08e00, +0xf3021c00, +0x3183890e00, +0xf38e0800, +0xf3060000, +0xc1c3810e00, +0x3081811c00, +0xf102038f08, +0x31e3070418, +0x7183c99e00, +0x1e3e3078f0c, +0x6103810e00, +0x11e3011c00, +0xe3020800, +0xe1c1c99e00, +0x18e3060c00, +0xe3860800, +0x10e3071c00, +0x21f38d0e00, +0x3183810c00, +0xe3010c00, +0x71e3031e08, +0xe2040c00, +0x31e30b0c00, +0xe3c3c08f00, +0x61c1010618, +0xf1e2070608, +0xf2021800, +0xe1c2488f00, +0x7103810e08, +0x10c3021800, +0x4163010c00, +0x3182811c10, +0x1e3890c00, +0xe103818e00, +0xe1020800, +0x3183050608, +0x6181010400, +0x11c2020c10, +0xe2070c00, +0x70e3890e00, +0x6103899e00, +0xe307808700, +0x3183020e00, +0x70830f0e00, +0x4081030c00, +0x43c2408700, +0x61c3c10c00, +0x3083810400, +0x6083030400, +0x3947020c00, +0x3080080c00, +0x71c7811618, +0x1f3030c00, +0xe183808700, +0x6183010e00, +0xc1e3010e00, +0xe183850e00, +0x10e3021800, +0x6081830c00, +0x31e3031c00, +0x71c1031c00, +0x10e10e0c00, +0xf383010700, +0xe30f0000, +0x1e3090c00, +0x7183811e00, +0x6003000400, +0x7083070e00, +0x41c1020e00, +0x7103cb1c00, +0x1f2020c00, +0x50c3020e00, +0x6103010600, +0x6081810c00, +0x70c1871c00, +0x7103810c10, +0x610a0000, +0x71c3818e10, +0x7183819e00, +0xe1e7809e00, +0x30e3021c00, +0x4103819e00, +0x3903030c00, +0xb1e3038e00, +0x6183808f08, +0x71e1811e00, +0xe103c08e00, +0xe10e0000, +0x6083811410, +0x31c7810e08, +0x30e30f1c00, +0xe2020800, +0x61c3010208, +0x70c3070c00, +0x38810f0c00, +0xc203808e00, +0x7181090e00, +0x30c3811c00, +0xe143810e00, +0x187480200, +0xf3021c00, +0x11e3020c00, +0x71e3070c00, +0x4081810c00, +0xe207c08f00, +0x6186070300, +0x6183850e00, +0xf3e3030e08, +0xf107808f08, +0xf187800218, +0x10c10b0800, +0xc1c3010600, +0xe103010e00, +0x6183810e00, +0x1060c08f0c00, +0x61e3030e18, +0xc3060800, +0x1c3020208, +0x7183850e00, +0x41c3031e00, +0x11e2021410, +0x79c3878e00, +0xe30b1c00, +0x11c2030c00, +0x7183010e00, +0x6183810600, +0x71818b1c00, +0x6083810c00, +0x40c3810c00, +0xf1e3070e18, +0x10e1890c00, +0x6103808e00, +0x11e7810e00, +0x6081810e00, +0x60c3810e00, +0x7183818e00, +0x6183c58e00, +0x3183011e00, +0x2181030600, +0xf102070708, +0xe103810e00, +0x3183010400, +0x7183810e00, +0xe1c3c18e08, +0x31c3090e00, +0x6101030600, +0x10e3821c00, +0x41c3030604, +0x2183030204, +0xe183838300, +0xe143810e00, +0xe107818600, +0x4083810c00, +0x6181c10c00, +0x7183070e00, +0xe103810608, +0xf3020c00, +0x6181850e00, +0x7181831c00, +0x41e3020c10, +0xe1c3858e00, +0x31e3010e18, +0xe2040800, +0x31f7020e08, +0xe10e0800, +0x31c3070400, +0xc103810e00, +0xc301808f00, +0x610e0800, +0xe30a0c00, +0xe103818e00, +0xe103810e00, +0x60e2020c00, +0xf1c3c89f00, +0x63c781830c, +0x6181811e08, +0x10c3021c00, +0x6083010410, +0x65060000, +0xe101010e00, +0x71c3030c10, +0x3103030c00, +0x6181810e00, +0x4183010200, +0x1f3070c00, +0xc3030410, +0x638e0000, +0x6182020600, +0x6081010c00, +0x6183810e00, +0x20e2031e00, +0x1c3c7c08f00, +0x7183030e00, +0x6103030600, +0xe303810e00, +0xf3c1010e08, +0x71c3810608, +0xe103018e00, +0xe10e0800, +0x71c3031c00, +0xe2020800, +0x78c2030c00, +0x40c3060200, +0x71c206020c, +0x39c38d1e10, +0xc3c3c08e00, +0x7183050e00, +0x7387c08f08, +0x18e1021c00, +0xe307c08f00, +0xf1e781020c, +0xf103818e00, +0xf307848f00, +0xe38f0000, +0x61e1810e08, +0xf3060c00, +0x6103808610, +0x6081810c00, +0x6183818e00, +0x3982020c00, +0x11f7070c00, +0x7183010e00, +0xf103818e00, +0x3083090c00, +0x7183c39c00, +0x7083030c00, +0xf3e7070e18, +0x70c2060c00, +0x10e0890c00, +0x3083031c00, +0x1e3811c00, +0x71c3858e08, +0xf1c3c50e00, +0xf103c08f00, +0x7182070c10, +0x7183891c00, +0x71c2070e00, +0x10e18f0800, +0x7181811c00, +0x7183810e00, +0x7103808e00, +0x21e6020608, +0xf3070800, +0x30c38f0800, +0x10c3020408, +0x1e2021800, +0x19c2060c00, +0x21c6000200, +0x1f2020c00, +0x3081810c00, +0x6101010c00, +0x6102070204, +0x6183810c00, +0x30c3010e00, +0xf103810e00, +0xe181030208, +0x71c3811e10, +0x30c7810e00, +0x10c3821c00, +0x31c3891e00, +0x6183810c00, +0x7183891e00, +0x3083050a00, +0x7183c19c00, +0x7183030600, +0x7183030e08, +0xe1060c00, +0xf3040c10, +0x7183011e00, +0xc100808e00, +0xe100888f00, +0x1f20a0c00, +0x6103878e00, +0x6103810c00, +0xf303c19e00, +0x10c3040c00, +0x1e3010c00, +0x10c10e0c00, +0x7983030c10, +0x7183889e00, +0x61e2020e08, +0x1c3cc8000, +0x7183808e00, +0x71c2020418, +0xe183818e08, +0x6183018e00, +0xe1c3818e00, +0x7083870c00, +0x4101810200, +0x3183010e00, +0x1e385020c, +0x11c38b0c00, +0x6183810c18, +0xe183038e00, +0x39c2020e00, +0xe30e0000, +0x3983890e00, +0x7103c19e00, +0xe3020c00, +0x7183030c00, +0xe18f0400, +0x7183810600, +0x1f3020e00, +0xf3070410, +0x3183890c00, +0x30e3811e00, +0x7183891e00, +0xf1e3831e10, +0x6180810e00, +0xe1c6409f00, +0x71c1030c00, +0x10e3811c00, +0xe1c3878e08, +0x10e3070c00, +0xe1c7810e18, +0x3387818700, +0x30c1031c00, +0x70c30f1e00, +0xf183810e00, +0xc181c78f00, +0x31c3890e00, +0x7103818e08, +0xe103010600, +0x20e38f1c00, +0x30838f0400, +0x1081050c00, +0x2102070100, +0x7103810e00, +0xe3030400, +0x7182031e00, +0x71c3851e00, +0xf103810e00, +0x11c3811c00, +0x7103810e00, +0xe30e0800, +0xf1c3870e00, +0x6103819608, +0xf183c08e00, +0xf187c08f08, +0xe181818e00, +0x70c3021c00, +0x7083850a08, +0x60c10a0c00, +0x1e303808f00, +0xc103810e00, +0x2081870c00, +0x6183858e00, +0x6181850e00, +0x63020c00, +0x1e3c0c08e00, +0xf183050e08, +0x71c3831e00, +0x7183890e00, +0x71c3071e00, +0x18830e0800, +0x61c3c19e18, +0xf3020c00, +0xf101898f00, +0xe3010c00, +0x6183810e08, +0xe08f0800, +0xf2060c00, +0x7183811e18, +0xe081810e00, +0x71e7818e08, +0x7187850c00, +0x31e3850e10, +0x1e1021c00, +0x7183810e00, +0xc183c4810a, +0xe103c08e08, +0x7102020410, +0x7103030e00, +0x30818b1c00, +0xf102070700, +0xe307c08608, +0xf1e3859e08, +0x1e3c2488f00, +0xe2040800, +0x2183050200, +0x7081811e00, +0x7883031e00, +0x60e3070608, +0x7181811e00, +0xe3890c00, +0x6183850e00, +0xf103819e00, +0x71c3810e00, +0xc107c38700, +0x6103818200, +0xe183810608, +0x6103809e00, +0x2081810c00, +0x70c3811e10, +0x6101010600, +0xe307cc8700, +0x7143011e00, +0x6183810e00, +0x31e3811e00, +0x3387819e00, +0xc143810e00, +0x1f2020c00, +0xf183011e00, +0x43e7818700, +0x3083870c00, +0x71838c8e00, +0xe182808f00, +0x7081031e00, +0xe103848e00, +0x73e3030e18, +0x6180898e00, +0x31f2060c10, +0xf303850e00, +0x2181010208, +0x30c1831800, +0xe1c0489e00, +0x70818d0e00, +0x11e2031e00, +0x6183070304, +0x7181010c10, +0x6081890e00, +0x3081020410, +0xf1c3c48f0c, +0x30c3010e00, +0x11e3031c00, +0xf30e0c00, +0xf183810e10, +0xe3090800, +0xe1c3810e08, +0xf143819e00, +0x60c3030e00, +0x6183810e00, +0x71c3010618, +0x6181010600, +0x41e2070200, +0xf30a0c00, +0x71c3878e00, +0xc103808e00, +0xb101819e00, +0x1f3010e00, +0x2081030c00, +0x4103810600, +0x31c3810e08, +0x2183810e00, +0x1e7810c00, +0x30c18e1c00, +0xe1021c00, +0xe103848f00, +0xe1021c00, +0x63c7c58f08, +0xe3c3c58608, +0x30c10d0e00, +0x30c38b0c00, +0x7081031e00, +0xe10e0800, +0x1e2020c00, +0x71830b0c00, +0xf101030e08, +0xf3e3039e18, +0x4343808b08, +0xe103818f00, +0x10c3020c00, +0x40c2060200, +0x61e2060608, +0x40c1030c00, +0x6103810e00, +0x6181811e00, +0x11c2010a08, +0x3883011c00, +0xf3021c00, +0xf103819e00, +0x7183811c00, +0xe103818e00, +0xe103850200, +0x11e2020c00, +0x10c3031c00, +0x3083831c00, +0x30e30e1800, +0xe30e0000, +0xe2020c00, +0x79c202060c, +0x11c20f0700, +0x71c7070e00, +0x7183811c00, +0x3083090e00, +0xe3070000, +0x3183020410, +0xe103c08e00, +0x60e1011e10, +0xe103810600, +0x7183810e00, +0x18387c08700, +0x7183890e00, +0x71c3831e00, +0x71838b0e08, +0x1f2030410, +0xe3c3c89f00, +0x6103810e00, +0x7103810e08, +0x1e2020c00, +0xe100810a00, +0xe30e0800, +0x70c1c11c00, +0xc180810600, +0x60c0810e00, +0xf107809e10, +0xf3020800, +0x7183890c00, +0x7187811c10, +0x7183810600, +0x7183810618, +0xe183808f00, +0xe1831c00, +0xf1c3850e00, +0x1e1011c00, +0x30c3831c00, +0x73c7818e00, +0x11e38f0c00, +0x71c3c19f00, +0x7183818e00, +0x10e2020410, +0x1e7010218, +0xe1c2c89310, +0xe3c3c48f00, +0x618e0000, +0x7183818e00, +0x61c7c18e00, +0xe1e3010e08, +0x1c4040604, +0x61f3030e00, +0x7183850400, +0x13c6060608, +0x7183031e00, +0xe3020800, +0x79c1031c00, +0x7183811c00, +0x1e3831c00, +0x7103810e00, +0x6103010c00, +0x6103810e00, +0x6083810c10, +0x1060c7870c00, +0xc103808700, +0xf303808b0c, +0xe3c3810600, +0x7103810e10, +0xe2c6488e00, +0x7983030e00, +0xe30f0000, +0xf103810e00, +0xe2020c00, +0xc1c260cf00, +0x7183010e00, +0xe181810e00, +0x7183810e00, +0xe107850604, +0xe287c48700, +0x7183070e10, +0xe103010e00, +0xe103010e00, +0x11e3870c00, +0x10c0040c00, +0x31e3811c00, +0xf307808f00, +0xf20a0000, +0xe103818e00, +0x20c38b0800, +0x61080800, +0x3183030600, +0xc3c7c48e00, +0x11e2060c00, +0x71f3030e00, +0x71c3011e10, +0xe307818f08, +0x7101890e00, +0x6183858e00, +0x6183818e00, +0xe1e3409e00, +0x6183030600, +0xc101810600, +0xe2020c00, +0xe103818e00, +0x31e3890e00, +0x10c3011c00, +0x41e3810c18, +0xe10a0c00, +0xe107808700, +0x31c3030c00, +0x3183898e00, +0x6183810608, +0xe2041800, +0xe30e0800, +0xf1e3010e00, +0x1e303808f00, +0xe1e3031e00, +0x1e2020c00, +0x62060800, +0x7183850c10, +0xf183030600, +0xe2060800, +0x71e3071e00, +0xf3e3859f08, +0x7183810e00, +0xe187810e08, +0xf3870800, +0xe3021800, +0xe207c08f00, +0x71e3031e10, +0xe180899e00, +0x7182030e10, +0xf1c384811c, +0xe103818e00, +0x1e207808f00, +0x3083011c00, +0x60e3870c00, +0x7183810e00, +0xe103010e00, +0x31c3811e00, +0x21c3030608, +0xe38f0000, +0x71e3811e00, +0x3183020c00, +0x31c3010c10, +0x41c2030e00, +0xe183858e00, +0x7081020c00, +0x71c3899f00, +0xe2021c00, +0x71e3011e00, +0xe103818f00, +0x6183870e00, +0xe103810e00, +0x1e7810e00, +0x1e3871c00, +0x70c3831c00, +0x6183810e00, +0x71c1811c00, +0x1e303808700, +0x1e3831c00, +0x6183808e00, +0xe30e0000, +0xe1c2818e00, +0x83181011c00, +0xf183899e00, +0x10c2020410, +0x7083811c00, +0xe7870c00, +0xe307c08e00, +0xf183c48e08, +0x31c2030c00, +0x10e3020c00, +0x3083810e00, +0xe1c303060c, +0x7183850e00, +0x7183810c10, +0x1f207818f08, +0xe103810e00, +0xe103828600, +0x6183818e00, +0xc183878600, +0xe103810208, +0x3e2060608, +0x18c2070e00, +0x6183810e08, +0x71e3890c00, +0x10c3021c00, +0xc307038700, +0xf103848e00, +0x31c7818a18, +0x61c3849e00, +0x6183818e00, +0xc103808600, +0xe107ed9e00, +0x7183831c00, +0x6183810c00, +0xe3070800, +0x70830e1c00, +0x6183810c00, +0x1c2040400, +0x7187810630, +0x6183878608, +0x61c3810e00, +0x4181818e00, +0x71e3031c00, +0x20e1021c00, +0x6307c08e00, +0x10e3830800, +0x71c3011e00, +0x60c1020c00, +0x61f2060c00, +0x30c38b0c00, +0xf103c09f00, +0xf101011f00, +0x7183031e00, +0x7183848e00, +0x71c3c99e00, +0xf1e7818e08, +0xf30b0c00, +0x7163810e00, +0x79e3071c00, +0x7381890e00, +0x1e3031c00, +0xf182870e00, +0x6183010208, +0x18c1020c00, +0xe181898f00, +0xc143870600, +0x71e3831e10, +0x7181c18e00, +0x71e3011c00, +0xf38f0c00, +0x6181810e00, +0x610e0800, +0x71e3091e00, +0x6183818600, +0x20e30b0e00, +0xe181c88f00, +0xe3c7c88f0c, +0x3183811c00, +0x71c2030608, +0xe38f0000, +0x61c2418e00, +0x6103810400, +0x7383c88f00, +0xe307c18e00, +0xf1c3858e00, +0x1e3031c00, +0xc181850e00, +0x7182070e00, +0x6183030608, +0x6103810e00, +0xe3c7c18e08, +0x6003010600, +0xe3871c00, +0x70818d1e00, +0x3183891e00, +0x7104070700, +0xc103808600, +0xe103808104, +0xe2030800, +0x71e3031e00, +0x30c3811e00, +0xe103810608, +0x70e10f0c00, +0x41e3030410, +0xe307818700, +0xb902010e08, +0xe180810c00, +0x70c30f1c00, +0xe183810608, +0x61c3818e00, +0x11e3870c00, +0x6103070e00, +0xe3890c00, +0x7081850c00, +0xe18e0000, +0x71e3010618, +0x19c2020c10, +0x6081830c00, +0xe30b0c00, +0x61021800, +0x7103810e00, +0x71e3010e08, +0x1e7030c00, +0x21c3c09f00, +0x7183810e00, +0xc101810c00, +0xf107818e08, +0x30418e0c00, +0x7a07818e00, +0xe107818e00, +0xe1c3858e00, +0x1e3c88200, +0x3083811c00, +0xe307818f00, +0x7183070600, +0xe20a0c00, +0x4081010c00, +0x40e3030c00, +0x7107810e08, +0xe183810e18, +0x6103810e00, +0xc103010600, +0x78e10e0600, +0xe38e0800, +0x6181810e00, +0xe30e0400, +0xe307c08704, +0x7103808610, +0xe28f0400, +0x40c2020208, +0x11e1890e00, +0x78c1071c00, +0x31e2020c00, +0xe1020800, +0x7983811c00, +0x6103809e00, +0x6183818e00, +0xf3070c00, +0x7183811e00, +0x7183010c00, +0x41e3070e00, +0x6103010e00, +0x11f3810c00, +0xc103818e00, +0x7183011e00, +0xf3070800, +0x10e10e0c00, +0xf3030c00, +0x1e30b0c00, +0x10e3020c00, +0xc102030e00, +0x71e30b0e08, +0xe101818e00, +0xf103050e00, +0x6183010600, +0x1e7010600, +0x10e30e0800, +0xe143838708, +0x10e3021800, +0x61e2020c00, +0x1e3030c00, +0x71c3091e00, +0x7183810e00, +0x3182020408, +0x7183898e00, +0x10c3020c00, +0xf20a0800, +0xf103808e08, +0x7083810e00, +0x7183811c00, +0x20e3890e00, +0xe3020410, +0xf1c3830e00, +0x7183070600, +0x7107809f00, +0x30c3821800, +0xe3c3808f04, +0x7082020c00, +0x30c1010c00, +0xc101810600, +0x7083030e00, +0x7183011b00, +0x31e3070c00, +0x11e2020418, +0xe3060c00, +0x7107810e00, +0x40e38f0c00, +0xf106078300, +0xe183810e00, +0xc182818f00, +0x40e3020410, +0x3083810c00, +0xe3060400, +0x11c10b0c00, +0x6081020600, +0x6081010400, +0x1e38f0c00, +0x3183c19e10, +0x30838f0c00, +0xf181810e20, +0x6183810e00, +0x6103810600, +0xc1c0810c00, +0xf1e1810e08, +0x6083810e00, +0x71838f0e00, +0x630e0000, +0xe1c0c11e00, +0xf103878e00, +0x71c0891e00, +0x7103010e00, +0xc183810102, +0x33e7030e08, +0x6183810e00, +0x31c2060608, +0xe103010e00, +0x61c3851e00, +0x11e3830c00, +0x7183850c00, +0x2083010c00, +0x71e3810e00, +0xf103018f00, +0x71c3020410, +0x6083070600, +0x41c0810e00, +0x11f30f0c00, +0x71c3810e08, +0xf102838c00, +0xc101898e00, +0x61c3070e00, +0xf103070e00, +0xe383818e00, +0xe3030800, +0x3083810e00, +0xe101810e08, +0x6000000e00, +0x7103010e00, +0x7103810e00, +0x6103010a00, +0xf183811e18, +0x6101810e00, +0x6183010e00, +0xe1e3818f00, +0xe307c08f00, +0xe3020800, +0xe38f0c00, +0x71c38f1e00, +0xe3020c00, +0xf183c09f10, +0x71e30f0c00, +0x618f0000, +0x71c3031e00, +0xe10a0800, +0x6183c48e00, +0x61c2810e00, +0x30e38f0c00, +0xe101810208, +0x3983030c10, +0x11c3050e00, +0x3082060208, +0xe183838e00, +0x61c3cf8e00, +0x6102878e00, +0x7081020c00, +0xe1a1811e00, +0xe3c440890c, +0xe3c3818e00, +0xf301810e00, +0x60c3810c10, +0x7103010610, +0x3083020c00, +0xe30e0400, +0xe180810600, +0x79e3030e00, +0xf1e7c0811c, +0x6103c58e00, +0x10630e0c00, +0x1c2420408, +0xf383c08f00, +0x71c18f1c00, +0x6183811e00, +0x710e0800, +0x3083020c00, +0xe30b0c00, +0x10c2020410, +0x1081c31c00, +0x7303c08e08, +0x6103010e00, +0xe207c08f04, +0x10c3870c00, +0x6182850e00, +0x71c3810e08, +0x71e6070c00, +0x7183890e00, +0x18301828700, +0x7183811e00, +0x41e3811e10, +0xe303808f00, +0x11c2020618, +0xf18040df00, +0xe2021800, +0x7102030e00, +0xe103c19e00, +0xf347878e00, +0x6083810e00, +0x3947810e00, +0x71e38f1e00, +0x71c3811e10, +0xe103818e00, +0xe2020c00, +0x7102030608, +0x7107010e00, +0x7181011e00, +0x4101810400, +0xe10a0c00, +0x3187810e10, +0x6162060e08, +0xf2020c00, +0x6103810200, +0x1f3020c00, +0x61c3030c18, +0x6183810e00, +0xe3811c00, +0xe103818e00, +0x410e0000, +0xe101010218, +0x6103818e00, +0x71c3020600, +0x71c3850e08, +0x1c3850204, +0x60c1810c00, +0x2083810c00, +0x3083811c00, +0x30e1021c00, +0x6183810e00, +0x31810f0c00, +0xe387c08f00, +0x20c1000400, +0x4103810e00, +0x1f3070c00, +0xe1c0818e08, +0x6180810e00, +0xe38f0c00, +0x7183811e08, +0x7183c09e00, +0x10e3061c00, +0x6183810e10, +0x6103818e00, +0xe3cf8000, +0x30c3031e00, +0xc1c2408e00, +0x7083891e00, +0x11e3021c00, +0x39e3071e08, +0x61c0919e00, +0xf107c19e00, +0x39e3031c00, +0x7083891c00, +0x6183810e00, +0x60e3831c00, +0x79c2020e00, +0x6183010600, +0x1e1c3c08f08, +0xe3821c00, +0xe301808f00, +0x7183011c00, +0xf1078d8e08, +0xf1e7890e08, +0x7183810e00, +0x3081010c00, +0xe1020800, +0x6083810e00, +0x1e7808200, +0xe38e0800, +0x30c1090c00, +0xf3c3c18e08, +0x6181810e08, +0x73c7850e00, +0xf1c3010e00, +0x71e10a0c00, +0xe383838600, +0x18e3060c00, +0x18e3810c00, +0x71e78d8e00, +0x10e3811c00, +0x7183810c00, +0x31c7850e00, +0x7183811e00, +0xc203c08700, +0x60e30b0c00, +0x6183870e00, +0x7383818f08, +0x73c3030f00, +0xe183810e08, +0x1e7811e00, +0x1f3070c00, +0x3182020e00, +0x61060000, +0x61f2020418, +0x7183010e00, +0x70c3811e00, +0x4181010c00, +0xe30b1c00, +0xe103058700, +0x6103810600, +0x3982020c00, +0x11e30f0000, +0x6183810e00, +0x6103810e00, +0xf187819610, +0xe101010e00, +0x30c3870c00, +0xf1e3031e00, +0x73e3031e18, +0xe107848708, +0xc3c7878704, +0x6081020e00, +0xd9e3010a08, +0x10e30f0c00, +0xe3020c00, +0x6183810e00, +0xf187c08f08, +0x71c3851e00, +0x31e3021c10, +0xe3840800, +0x10e1090c00, +0x3083870800, +0x60c1020c00, +0x31e3831e10, +0x10e18f0000, +0x6183810e00, +0x61c3810e08, +0x11e3071c00, +0x7183090e00, +0xe181810e00, +0xe3060c00, +0x7183010200, +0x1e3030c00, +0xf107c19e08, +0x71c7810e08, +0x6183810e08, +0x30c38a1800, +0x7103010e00, +0xe183c48f00, +0xe183810a08, +0x21c2020608, +0xf101011e00, +0x7181810c00, +0xc103810608, +0x7183858f00, +0xe3811c00, +0x7103811c00, +0xe103808f00, +0x71e3070e00, +0x7083010e00, +0x23cc0000, +0x6103878700, +0x30c5cf0c00, +0x71c3030e10, +0x7183021c10, +0x4103818204, +0x7107808e00, +0x1e3c1808f04, +0x163020418, +0x10e30e0800, +0xe103808f00, +0x7081891c00, +0x6182818e00, +0xe103818e00, +0xe383808700, +0x10e1021c00, +0x1e2020410, +0x10c3030e00, +0x18180c08e00, +0x1e2060c00, +0xf183810e08, +0x7187810e00, +0x38c3811c00, +0x30c3071c00, +0x7183010220, +0x31e3850e10, +0x7183819e00, +0xf123810e08, +0xc101810600, +0xc1e3808f08, +0x1e3c3c48f00, +0x21e3010c00, +0xf183818e00, +0x7102030e08, +0x11e3899e00, +0xe38f0c00, +0xf3060c00, +0xf183c89e00, +0x2183010408, +0x71e3811e00, +0x6183838600, +0xe3c2848700, +0x70c38f1c00, +0x6183810e00, +0xe103810e00, +0xe1c3858e00, +0x7103010e00, +0x7081811c00, +0x31c3030210, +0xc183c18600, +0x6103810e00, +0xe10d0400, +0x6081030600, +0x7181810e00, +0xe183818e00, +0xe183858e00, +0xa1c0818e08, +0xc101810e00, +0x7183809e00, +0x1e181c88f08, +0xe1c3818f00, +0x6182030600, +0xe1c3808f08, +0x7103810e00, +0xe3811c00, +0x6182808e00, +0x31c38f0c00, +0x7183030e08, +0x7083811e00, +0xc101808f00, +0x71c1030c00, +0x31c3891e00, +0xe101810e00, +0xf1e3850e00, +0x1f2060c00, +0x10c7010400, +0x7183010e10, +0x6183810e00, +0xe3070c00, +0x70c3831c00, +0xe103810a08, +0x71e3871e00, +0xf181091e00, +0x61e3030e10, +0xe3821800, +0x7083811c00, +0xe3031c00, +0x1e303808f00, +0xf103819e00, +0xe183c08f00, +0x71c3011e00, +0x3083811e00, +0xe2021c00, +0xf101c98e00, +0x30818e0800, +0xf30f0800, +0x7183830e00, +0x30c2060608, +0xf1c3819e00, +0x31c3810e00, +0xe347488308, +0xe2020c00, +0x71818f0e00, +0xc183808f00, +0x1f2060408, +0xc30a0800, +0x7183050e00, +0x71c3811c00, +0x6183c18e00, +0xc103818600, +0x6103030218, +0xc247c48f00, +0x61e3070e08, +0x1e3810c00, +0xe38e0000, +0x6101810600, +0xf3021c00, +0xf081850e00, +0x70e3031c00, +0x7183810c00, +0x7103811e08, +0x71c3031e10, +0xc1e0488e00, +0x71c1850e00, +0x1e1031c00, +0xf3060c00, +0x71a7c18e00, +0xe3020800, +0x71e3050700, +0x6103030c00, +0xe103810c00, +0xe347818f04, +0x10e30e0c00, +0x30c3071c00, +0xc103808e00, +0x6083010400, +0x11e3890c00, +0x11e3060c10, +0xe3080c00, +0x1e3031c10, +0xf103030e08, +0xf181818e00, +0x6183030600, +0x61e3021c10, +0x71e3091e00, +0xe3c3c18e08, +0x6181030c00, +0x7183810e00, +0xe183010e00, +0x81f20f8100, +0xfbc7010e08, +0x71e3031e00, +0x11c3030600, +0xf30e0800, +0xf307818704, +0x6181850e00, +0x71e3811e00, +0x21e3810c10, +0x30c3811c00, +0x7022060408, +0xe3c2c78e00, +0x7081031c00, +0x30818d0c00, +0x31e3010e08, +0x61e3830c00, +0x7103010e00, +0x73c3c88f00, +0x6183810e00, +0xe1c2c18e00, +0x23840000, +0xe3021c00, +0xf102408f00, +0x30e38f0c00, +0x1e3810c00, +0x38c3071c00, +0x71020b0e10, +0x61c0409e00, +0x71e3811e00, +0x3081010400, +0x71c3020608, +0xe10f0400, +0xe103838e00, +0xc3c7848700, +0x6102070600, +0xc180810e00, +0x6083850c00, +0xe3890e00, +0xe1e38f8e00, +0x31e18f0400, +0xf1c3811e00, +0x7183810c00, +0xc207c08600, +0xf3060800, +0x30e18f1800, +0xe10a0c00, +0x3083031c00, +0xf103850e00, +0xe181c48e00, +0x31c2070610, +0x7183811e10, +0x1f2060608, +0x1e7010e00, +0x71e3810c10, +0xe103818e00, +0xe3020800, +0xe303848f00, +0x41c3810610, +0x19e30b0c00, +0x20a3020c00, +0x30c3810e00, +0x41e7810c30, +0x7101010e00, +0x11e3071c10, +0x3946060e00, +0x6181898e00, +0x6103808e00, +0xf982020e08, +0xf181cf9e00, +0x30c3070c00, +0x38e7010e00, +0x4303808600, +0xe307c08f00, +0x8b183871e00, +0x30a3070c00, +0xc3c780830c, +0x18c3011c00, +0x6181810c00, +0xf383c09f08, +0x7187811e00, +0x3983011e00, +0xe2010000, +0x6307c08e00, +0x79838f8e00, +0xe1811c00, +0x7183031c00, +0xe3020800, +0xf103850e00, +0xf103899f08, +0x6183810e00, +0x61e3050e08, +0x31c38f0c00, +0xe103858600, +0xe3030c00, +0x41c2070600, +0x7183030e08, +0x7081020c08, +0x51c1811e00, +0x31c3018e00, +0x71c2070618, +0xe183848a00, +0x3083c89e00, +0x10c1810c00, +0xe2021c00, +0xf387c48e08, +0x3082020c00, +0x7183818e00, +0x10c3870c00, +0x7180899e00, +0x6083030410, +0xf1061800, +0xe103818e10, +0xf3060800, +0x1e2020418, +0xc101810e00, +0xc1c3010104, +0xe30e0800, +0xc1c3030e00, +0x61e3818f00, +0x71c3871e00, +0xe1c3850e00, +0x7181050e00, +0x70c1821c00, +0x3083010c00, +0x6183808e00, +0x6183010e00, +0x6183850e00, +0x31c2020c00, +0xe38f0800, +0xc102838e00, +0xf103809e10, +0x7883811e00, +0x1e48f8700, +0x23841000, +0x4081810200, +0xc103c08600, +0x2083820c00, +0xe10e0000, +0x6183070e00, +0xc1020800, +0x71e38f1e00, +0x71030b1e00, +0x4101030600, +0x3183810c00, +0xe2020c00, +0x33c3819e00, +0x11e7070400, +0xe1c3418e00, +0xc1e3810e18, +0x70830f0e00, +0xe181898e00, +0x31e38b1e00, +0xf103810e00, +0xe183808e08, +0xc1e3848e00, +0xf203010f00, +0x38820f0c00, +0xe103808e08, +0x6083010e00, +0xe30e0c00, +0xe30e0000, +0xf103811e10, +0x6081810600, +0x3083090c00, +0xf303899f00, +0x71e3090e00, +0xe103810e08, +0xe3020800, +0xe163818e10, +0xe183810e00, +0xe18e0000, +0xe183808700, +0x31c3030e00, +0x71e3030e10, +0xe103810618, +0x6081810c00, +0x60c3c70e00, +0x630e0000, +0x71e3031e00, +0x7103811e00, +0x6103810e00, +0xe3091c00, +0x18e3020c00, +0x38818d0c00, +0x23c7878304, +0xe3830c00, +0x31e3011c00, +0x6183810e00, +0xe3070000, +0x1f3021c00, +0xe3060800, +0x7103858600, +0xe303808700, +0xe103808700, +0x1c7010204, +0x173020e08, +0x38c30a0c00, +0x6103818e00, +0x3f7070208, +0x61c3010600, +0x10c3031c00, +0x71e3078f00, +0x30e3001c00, +0x30c1021c00, +0x7983030410, +0xe3060000, +0x60c0810e10, +0x7082060410, +0x31c2060c00, +0xe183808f00, +0x1f3e7c18f08, +0x62040000, +0xe1c7c08e08, +0xc181810e00, +0xf3c3818f0c, +0x3083010c00, +0x1e3090e00, +0x70c50d0e00, +0x7103898e00, +0x6183010e00, +0x10818f0c00, +0xf183810e00, +0x11e3890e00, +0xf3c3878f00, +0x61c2810600, +0x7102070600, +0x7081850e00, +0x7103810e00, +0x100810400, +0xe1c781810c, +0x11e3011c00, +0xe183850600, +0x71e38d1e00, +0xe18f0800, +0x1f3021c00, +0x610e0000, +0x6183850600, +0x7107818e00, +0x1e7010e00, +0xe30e0800, +0xe2020c00, +0xe103810e00, +0x3883010c00, +0x11e30b0c00, +0xc103c08e00, +0x4000010c00, +0xe103810e00, +0x73040800, +0xf3060000, +0xc1c0c19e00, +0xc103810e00, +0x6181010e10, +0x61818d0e00, +0x1e7c89200, +0xe103810600, +0x6083010e00, +0x1e3050600, +0x71c2811c00, +0x7101010600, +0x338603030c, +0x7182810e00, +0xf3c3819e10, +0x1f3030e00, +0xc183808e00, +0xf383808338, +0x71c3811e00, +0x61061800, +0x6081810c00, +0xc182c08e00, +0xf303818f00, +0x3081030c00, +0x70e58f0600, +0x3183030e00, +0x6161010e00, +0x71a3810e00, +0x1163010c00, +0x38c2020408, +0x40e3811c00, +0x6183811e00, +0x7081031800, +0xe30e1800, +0x3182870600, +0x31e3811e00, +0x1c380808f00, +0x3983021c00, +0x3c7818100, +0x7003810e00, +0x1e3011c00, +0xf1e3031e1c, +0x4103810600, +0x79c3031e00, +0x1f3030c00, +0xf303899f00, +0x6143850600, +0x11e3030c00, +0x79c3070e00, +0xe183808f0c, +0xe180810e00, +0x1e3001e00, +0x7183030e00, +0x11e30f0c00, +0x73e3818f18, +0x7182031c00, +0x7103850e08, +0x20e3831c00, +0xe2060c00, +0x41e1830c00, +0x61c3c58f08, +0xe180848e00, +0x31e3891e00, +0x6103010400, +0x71e3850e10, +0x30c38b0c00, +0x31c2021c00, +0x81f6070600, +0x71c3811e00, +0x30e3031800, +0x71830b0e00, +0x61e3030e08, +0x71810e0e00, +0x41e6078600, +0x1e387838700, +0x10e3021c00, +0xe182810e00, +0x6081070600, +0x1f3831c00, +0xe0c0858e00, +0x4102030000, +0x6103810e00, +0xc183818e00, +0x8101848700, +0x11e3060c00, +0x71e78f0e00, +0x6101810600, +0xe1c3810618, +0x6103808b00, +0x3182021c00, +0xe307c08f00, +0xc103808208, +0x21e38f0c00, +0x40c2060c00, +0xf3060800, +0xe1020800, +0xe3020c00, +0x71c3810e00, +0xf1c3811e10, +0x1f7070e00, +0x7183810630, +0x6383818e00, +0xe3870000, +0x30c38b1c00, +0x7103010e00, +0x61e3030418, +0xe10e0800, +0x7183011e08, +0xf2020c00, +0x71838d1c00, +0xe3061c00, +0xe183878f00, +0x6182818e08, +0xe1c3810e00, +0x6103810e00, +0xf103010e00, +0xe101010e00, +0x6103808e00, +0x6183810200, +0x1e30b0c00, +0x30c3020c00, +0xe143810e08, +0xf303010e00, +0x71c1850c00, +0x6183850600, +0x10e3890c00, +0x3123031e00, +0xe3060000, +0xe147c08f08, +0xe163070e00, +0x6183810600, +0xe3020800, +0xf307808f00, +0x6183018104, +0x1e203848f00, +0x6183899e00, +0x30c38b1c00, +0x610e0800, +0x1f2020c00, +0x71e2020e18, +0x30c1021c00, +0x6103810e00, +0xe7090600, +0x6183810e00, +0xf103810e00, +0xf1a3010a18, +0x6083810c10, +0x1c303858700, +0x6081810c00, +0xe6030800, +0xe103810e00, +0x7107808f08, +0xe3c3818f08, +0x61e0810c10, +0xc101818e00, +0x30e2031e00, +0x2083810e00, +0x6083810e00, +0xe3080c00, +0x6103010800, +0x10e3071c00, +0x1e30a0800, +0x1e1810e00, +0x1e3021c00, +0xe20a1c00, +0x7083891c00, +0x6101011e00, +0x7103811c10, +0x7103811e00, +0x1e00f0000, +0x6103808608, +0x4101810600, +0x6183c39c00, +0x7183811e00, +0x70e30b1e00, +0x6180810e00, +0x70818d1e00, +0xf2020c00, +0x7103810c10, +0xc103808e00, +0x72020800, +0xe103818e00, +0x11e2070218, +0x11e3011c00, +0x30c3820c00, +0x10180818e00, +0xe1c3c78700, +0xe182808e08, +0x7187808e18, +0x3e3011e00, +0x30810b1c00, +0x2001010c00, +0x30c3891c00, +0xe30b0c00, +0xe103838608, +0xf1e3810e18, +0x7386070600, +0x71c3030e08, +0xf1c0cf9c00, +0x7103811c00, +0xe1e2070208, +0xf1e7810e10, +0x7307010708, +0xe3021c00, +0x19c3870400, +0xe103810600, +0x41e3010400, +0xe10e0000, +0xe101819e00, +0xc103848e00, +0x7183899e00, +0x6181010600, +0x870818f1c00, +0xe30b0c00, +0xe30e0c00, +0xe3020800, +0x10c1810c00, +0x10e3061c00, +0x3883030e00, +0x7083811e00, +0x31c18f1800, +0x7083091e00, +0x1c387c08700, +0x30c3810c00, +0xe103810600, +0x2081020408, +0x3183811e00, +0x61c2c99e00, +0x7182811c00, +0xc10381020c, +0x7182030e00, +0xe307c09f00, +0x31c3031e00, +0x1e3c7c18f08, +0x31e1020c10, +0x10e3021c00, +0x6101810208, +0xf183810e00, +0x2181010e08, +0xe180c09e00, +0x7383810618, +0x11e2060418, +0xe307c48700, +0x60e1020418, +0x61e3050218, +0xe387c08700, +0x71838d0e00, +0x7103070600, +0x7183030e08, +0x4101010208, +0x70818b1c00, +0xf2040800, +0x60e38b1c00, +0x6183818e00, +0xf1c7010608, +0x39e3070c00, +0x11e2020c10, +0x2081021c00, +0xe3c7c0cf00, +0x20e3011e00, +0x3083010c00, +0x10e30e0800, +0x7183030e08, +0xf181891e00, diff --git a/samples/digitrec/digitrec/data/training_set_6.dat b/samples/digitrec/digitrec/data/training_set_6.dat new file mode 100644 index 000000000..aab6a00c3 --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_6.dat @@ -0,0 +1,1800 @@ +0x6083850c00, +0x41c3c48e00, +0x204183c78c00, +0x6182c58e00, +0x6183c58e00, +0x106083870c00, +0x3083850c00, +0x20c103c78600, +0x3083cd8e00, +0x102083870400, +0x4083870400, +0x6081870c00, +0x106183870400, +0x1020c30f9d00, +0x20c107c78700, +0x6083870e00, +0x418367e78e00, +0x61c2cb9e00, +0x6182c78e00, +0x4102c58c00, +0x4103868600, +0x4102c70400, +0x104102830c00, +0x2083870c00, +0x4083870400, +0x4082850600, +0x2083c70c00, +0x20c103c78600, +0x6187cecf00, +0x1861838f8c00, +0x4103870400, +0x102083878400, +0x106083870c00, +0x3083cf9c00, +0x100083870c00, +0x20c102c78600, +0x106183c70e00, +0xe367cf8600, +0x6124cb8c00, +0x30c3c78e00, +0x204182c78e00, +0x4083e7478c00, +0x104102868600, +0x4082850400, +0x2081060400, +0x18267e78400, +0x1060820f0e00, +0x6083878c00, +0x204083870400, +0x204183870400, +0x2082878600, +0x6182878e00, +0x1020c3cf1c00, +0x4083870c00, +0x4102cb9c00, +0xc103c78600, +0x104183cf0800, +0x4183cf8e00, +0x106182c78600, +0x106107898e00, +0x4102878c00, +0x104083858c00, +0x4083870c00, +0x6183878600, +0x306183c78e00, +0x6186cf8c00, +0xc307c4c700, +0x104082870c00, +0xc102878600, +0x104082870c00, +0x30c78f8e00, +0x2083850c00, +0x204183c70600, +0x20c107c78c00, +0x106083870c00, +0x4102878600, +0x104083870400, +0x204103cf8c00, +0x204182c78400, +0x3c70000, +0x4183878400, +0x106182c70c00, +0x6183850c00, +0x6183cf8e00, +0xc1e74d8e00, +0x408326c78400, +0x102083870c00, +0x204183870400, +0x204102c78400, +0x2083858a00, +0x408122c78200, +0x8143468600, +0x8102c78600, +0x30c3870c00, +0x102001060c00, +0x61e3cf8e00, +0x204183c78400, +0x2083070a00, +0xc106e78000, +0x8143c70c00, +0x4103c78600, +0x1060838f0c00, +0x61c7858e00, +0x4182c78400, +0x204183cf8e00, +0x2041c3cf0c00, +0x6102868600, +0x8142c70c00, +0x4107c68600, +0x4102868600, +0x102083870c00, +0x3083070c00, +0x6082070600, +0x4083850e00, +0xc103c48600, +0x4102c78600, +0x6182070600, +0x106183c78600, +0xc167c78f00, +0x20c103c78600, +0x4102c78600, +0x4102078600, +0x204183878600, +0x3082870e00, +0x6183870c00, +0x8365c78600, +0x204102870600, +0x3083870c00, +0x6182070600, +0x2083070600, +0x6083070e00, +0x30e307ce8e00, +0xc1e3c78600, +0x104182c70c00, +0x104083c70400, +0xc126c98c00, +0x204183c78e00, +0x6082870400, +0x20c102c78600, +0x71c3cf9c00, +0x820868f8c00, +0x106183870c00, +0xc183c78600, +0x104103cf8e00, +0x20c143c68f00, +0x208102c78200, +0xc183c68600, +0x4102cd8e00, +0x208102c70400, +0x204083870400, +0x4103c70e00, +0x6183848700, +0x4183850600, +0x8326cf8600, +0x8366c70400, +0x106081070400, +0x408367e78e00, +0x4183070400, +0x204106c78400, +0x418307c7c200, +0x4083870400, +0x6183878600, +0x2041c3c70c00, +0x8343c68600, +0x204143c70400, +0x20c147c78c00, +0x6082040600, +0x20c106ce8e00, +0x102082078600, +0x106082c70400, +0x61c3850e00, +0x106083c78000, +0x20c103cf8600, +0x4081870c00, +0xf102868e00, +0x20c102878600, +0x208103c78600, +0x4083830400, +0xc347cf8600, +0x204104cf8400, +0x7103c70600, +0x31c3cf9e00, +0x4183c78600, +0x4183c78e00, +0x102083c70c00, +0x608267ef8400, +0x8102c68600, +0x8142c78600, +0x4083030000, +0x4103c78c00, +0x41c3070600, +0x6083cf8e00, +0x4107cf8e00, +0x8265c98e00, +0x60c3cf8c00, +0x4103870400, +0x4143c70c00, +0x8102870600, +0x10265ca8e00, +0x4083c70c00, +0x104183878600, +0x8126cf8e00, +0x3083078e00, +0x106083870400, +0x86186cf9e00, +0x2081850400, +0x4083860600, +0x104182478400, +0xc127ef8c00, +0x2083870c00, +0x104106cf8e00, +0x8176cf8c00, +0x2081870c00, +0x106183c78e00, +0x4183870c00, +0x4183870400, +0x6183848e00, +0x8224cf8000, +0x61878f8c00, +0x104183870400, +0x6183070600, +0x6083850c00, +0x20c103c78600, +0x2081070400, +0x204102870600, +0x60c3870600, +0x6182850e00, +0x6183850600, +0x106183cf8e00, +0x2082870e00, +0x106083870c00, +0x6082870400, +0x4082870400, +0x103870400, +0x4182c70600, +0x6187cd8e00, +0x4183848600, +0x106083cf0c00, +0x20c106c6c600, +0x408106c78e00, +0x2083870c00, +0x410205c78200, +0x208107c78600, +0x4002830400, +0x10224cf0000, +0x4103850600, +0x104103868600, +0x30c3050c00, +0x20c122c78e00, +0x4183c78600, +0x106083870400, +0x6183870600, +0x20c102878600, +0x41c3470400, +0x820c7cf8800, +0x4182078600, +0x6107cc8e00, +0x104182078600, +0x12082c78e00, +0x4103c70400, +0x106082878400, +0x204183870400, +0x204103878600, +0x6183070600, +0x106183878600, +0x4182cb0c00, +0x4083c70c00, +0x106183cf8c00, +0xc123c78e00, +0x41c3c70e00, +0x30c7cf9c00, +0x61c3878600, +0x70c38f8e00, +0x4183870400, +0x6083050c00, +0x820838f8c00, +0x6183870600, +0x30c38f0e00, +0x8142c48600, +0x4083c70800, +0x104103878600, +0x6083c70c00, +0x106083c78c00, +0xc103870600, +0x6182858e00, +0x106083c70c00, +0x8102c68600, +0x6183870e00, +0x40c3870800, +0x4143478c00, +0x61c7cd8e00, +0x6187cf8e00, +0x4081870400, +0x6081870600, +0x6083070400, +0x106083c70c00, +0x408102878600, +0x418367ef8c00, +0x82083870c00, +0x204103c78200, +0x6083030400, +0x6106c78e00, +0x104103878400, +0x6183850600, +0x8143c68600, +0x4083e7c78700, +0x4103878600, +0x6183878e00, +0x204103c78600, +0xc103830600, +0x2083870c00, +0xc107c78e00, +0x8103828300, +0x4102c68600, +0x20c103c78600, +0xc102c78e00, +0xe103878700, +0x204183878600, +0x82003840c00, +0x6083850600, +0x40c3c50e00, +0x6187cf8c00, +0x4102c70c00, +0x6083850c00, +0x6183878600, +0x6183c70c00, +0x4083850c00, +0x4083870800, +0x7102070600, +0x6186cf8c00, +0x41c3870400, +0x4083850600, +0x6183c78e00, +0x4183878c00, +0x104082c70c00, +0x82654b8c00, +0xc1c3c78600, +0x2081870800, +0x104182878600, +0x6183870600, +0x6083870600, +0x6083850600, +0x2083070e00, +0x6083870600, +0x2081070c00, +0x1061828f8e00, +0x86183c78400, +0x4142050400, +0x30c38d1c00, +0x6183c78e00, +0x106083c70c00, +0x30c30f0c00, +0x4103c78400, +0x82083c78e00, +0x104182870400, +0x30c3cf0c00, +0xa082c78c00, +0x4182878600, +0x208102c78600, +0x4081850400, +0x2083870800, +0x4162860400, +0x4183848e00, +0x6102870600, +0x204183870600, +0xc386efcf00, +0x81c3448e00, +0x3083060400, +0x106183c70c00, +0x6083870c00, +0x60c3c50c00, +0x204183030600, +0x82082cf8c00, +0x182083870c00, +0x3083850c00, +0xc103c68600, +0x4183850600, +0x104183870400, +0x6083c78e00, +0x106083870c00, +0x4082870c00, +0x408167470400, +0x106182050600, +0x106183878600, +0x2082cf8c00, +0x208103c70400, +0x8347ef8f00, +0x8103c78400, +0x4082c70400, +0x4083870600, +0x4102c70c00, +0x4183848600, +0x4102e7478600, +0x71820f8e00, +0x102083070400, +0x8143c48e00, +0x20c306c78e00, +0x102081070e00, +0x40c3c78e00, +0x106083870600, +0x83187cf8800, +0x2083c58e00, +0x6183c78c00, +0x204083870400, +0x204103870400, +0x4083070600, +0x8143468600, +0x8103870400, +0x8264c78400, +0x1061828f8e00, +0x204102870400, +0x20c3450c00, +0x106183cf8c00, +0x1061838f0c00, +0x106183cf8c00, +0x8143c78600, +0x206183870e00, +0x8102c78400, +0x4083030600, +0x6081870400, +0x6183870e00, +0x2081060400, +0x4105cf8000, +0x2083870c00, +0x20a3cf0000, +0x7186cf9c00, +0x4083870c00, +0x408102c78200, +0xc305cf8e00, +0x4183870600, +0x10e30e1c00, +0x20c102878600, +0x20c3c70c00, +0x2081850c00, +0x204103878600, +0x204102c70c00, +0x106187cf8e00, +0x4102870400, +0x20c107c78600, +0x124cf0000, +0x106082870c00, +0x6103858e00, +0x102082850400, +0xc304c78200, +0xc102c78400, +0x4182850e00, +0x2083870c00, +0x2082870000, +0x8103870400, +0x18343c78600, +0x4083870400, +0x20c102c78400, +0x204183c78e00, +0x204102c78600, +0x30c3870c00, +0x4083070600, +0x2082cf8c00, +0x208106464700, +0x7182870600, +0x300102c68000, +0x4103c68600, +0x820838f0c00, +0x6183858600, +0x20c103c78600, +0x4183c78600, +0x6083850c00, +0x21838f8e00, +0x418306c78600, +0x8107c78600, +0x8103c78200, +0x2082870c00, +0x30c3070e00, +0x8102c70400, +0x3083871c00, +0xc204eece00, +0x2083870c00, +0x6183848700, +0x20c143c68600, +0x20c102c78000, +0x81c3458600, +0x204143870400, +0x4182c78600, +0x2083050400, +0x4081850600, +0x208106c78600, +0x204146cf8e00, +0x87187cf8e00, +0x20c103c70400, +0x4103c78600, +0x82083878600, +0x10c103cf8400, +0x4183870600, +0x4103c78800, +0x8102830600, +0x4102850600, +0x7122070e00, +0x204182078700, +0x4083654f8c00, +0x4103850400, +0x6183c78e00, +0x1061c3cf8c00, +0x18c38f0c00, +0x2083c78c00, +0x4083c70400, +0x6083870400, +0x4083870400, +0x106107ce8e00, +0x6082c78e00, +0x204102878400, +0x204102c70400, +0x6081070600, +0x18367e68600, +0x4183870600, +0x3083060c00, +0x41c3c78600, +0xc102878600, +0x30c38f1800, +0x6183850e00, +0x104183c70c00, +0x106083870c00, +0x4183850600, +0x106182c78000, +0x204103870600, +0x20c102870600, +0x106082cf8c00, +0x4183830600, +0x104083870400, +0x104183c70c00, +0x20c102c78600, +0x2083850c00, +0x6187cf8c00, +0x104082868e00, +0x8143c70600, +0x8103830400, +0x71c7cf0e00, +0x106083c78400, +0x4083870600, +0x40c103870600, +0x820838f0c00, +0x204103c78600, +0x4183c70400, +0x4082070400, +0x4083850400, +0x106081070400, +0x6083070600, +0x821838c8f00, +0x106182c78600, +0x4082070400, +0x81e3470400, +0x4083870400, +0x60a3060c00, +0x18347c78600, +0x106102078400, +0x102083870c00, +0x8167ce8f00, +0x2083850c00, +0x20c3cf9c00, +0xc103878600, +0x4083850600, +0x6182870400, +0x2081850400, +0x1020838f0c00, +0x106183c68700, +0x104081850400, +0xc205c78400, +0x20c103c78600, +0x20c187cf8e00, +0x8103c78400, +0x4102858600, +0x6083cf8e00, +0x20c187cf8e00, +0x2082870000, +0x8183878600, +0x4083870400, +0x61c2870c00, +0x106182cf0c00, +0x20a3cf0c00, +0x6083870400, +0x4102ce8e00, +0x6183c78e00, +0x208104c68600, +0x71830f8e00, +0x204103878600, +0x20c106cd8e00, +0x30c3850c00, +0x204081870600, +0x204102c78600, +0x81654b8e00, +0x4183870600, +0x408122c78600, +0x204183c78c00, +0x2083870c00, +0x106182cf8c00, +0x20c106c78000, +0x204183870400, +0x83083cf0c00, +0x86183cf8600, +0xe102870600, +0x4082060600, +0x106083870000, +0x204183c78600, +0xc143870400, +0x6081030600, +0x6183850e00, +0x61828f8600, +0x102081870800, +0x2083870c00, +0x2083870c00, +0x2083870800, +0x408103c70400, +0x8107ce8600, +0x208307c78600, +0x106082050c00, +0x204103c78e00, +0x6083870c00, +0xc163c68600, +0x204083878600, +0xc102c70600, +0x1020e3cf0c00, +0x204103c78600, +0x4183c70400, +0x4083870600, +0x102082cf0c00, +0x104183c70c00, +0x106082cf8c00, +0x4183870400, +0x204102c78600, +0x4182870600, +0x8205c78400, +0x2081060400, +0x8102c70000, +0x204102c78600, +0xc325ef8400, +0x4083078600, +0x106183cf8e00, +0x6082070400, +0x204103c68600, +0x106083c78c00, +0x6103870600, +0x41c2c70c00, +0x6083070400, +0x6081050600, +0x4083850c00, +0x20c3c70c00, +0x408143468600, +0x6082870e00, +0x204103870600, +0x6183cf8e00, +0x106183878400, +0x8143870400, +0x2082850e00, +0x8143468600, +0x2041c3c70400, +0x2041c3c78c00, +0x4103858600, +0x106183870400, +0x2083c58e00, +0x6183878e00, +0xc162c78c00, +0x4183e7ef8e00, +0x106187cf8c00, +0x8143870400, +0x8363e78600, +0x106083870c00, +0x204103c78600, +0x20c103c78600, +0x6083c50400, +0x418307c78600, +0x4183870e00, +0x2081870400, +0x408103c78600, +0x4083870400, +0x6103870600, +0x408102c78600, +0x106083870c00, +0x6183858e00, +0x20c106c78400, +0x4081830000, +0x41c3c70600, +0x2083850e00, +0xe304cfcf00, +0x6183cf8e00, +0x6103870e00, +0x20c104c78600, +0x4102c70400, +0x2083850600, +0x104083870c00, +0x4083870400, +0x20c107c78600, +0x820c3870c00, +0x4102070600, +0x2081070e00, +0x20c183c78400, +0x20c102c70200, +0x4182cf0c00, +0x204103c78400, +0x208103870400, +0x6182cf8e00, +0x8122cf8e00, +0x4003850600, +0x204102c78c00, +0x10c3cf8800, +0x30830d0e00, +0x106083870400, +0x81e2448f00, +0x106182c78c00, +0x6083870600, +0x4083850e00, +0x6183c78400, +0x40c2c50c00, +0xc163c78e00, +0x20c3860c00, +0x4143870400, +0x6083850600, +0x3187cf0800, +0x2081870c00, +0x2083850c00, +0x204cb0000, +0x2041a3cf0c00, +0x6083870e00, +0x6182878e00, +0xc163c78c00, +0x106103878c00, +0x6183878e00, +0x408367c78600, +0x6082060600, +0x102083870c00, +0x31838f8e00, +0x204102878600, +0x8103c78600, +0x200103870400, +0x106083c70400, +0x3083870e00, +0x4103c70c00, +0x6183878e00, +0xc106cf8e00, +0x106083870c00, +0x830838f8c00, +0x6183078600, +0x18367c78600, +0x2083070e00, +0x4183c70c00, +0x820838f0c00, +0x104183870400, +0x4102860600, +0x6183c50c00, +0x4143c70e00, +0x104183870400, +0x3183cd9c00, +0x6183078700, +0x8163470600, +0x6183870e00, +0x2083070600, +0x104102868600, +0x1061e7ef8c00, +0x8264cb0c00, +0x4083870400, +0x104183cf8c00, +0x2082870800, +0x204103870400, +0x10142c70600, +0xc103c78600, +0x2082c60600, +0x82083870c00, +0x2041c3c70400, +0x408102c78400, +0x6082470400, +0xc207c7c600, +0x204183870600, +0x6082070a00, +0x20c1060c00, +0x2083c78e00, +0x106183878600, +0x4183850e00, +0x2041e2c70c00, +0x4143c78600, +0xc102c78600, +0x82083c70c00, +0x4103870600, +0x2083050c00, +0x6106858f00, +0xc1c7c78600, +0x4183870600, +0x8163478600, +0x6081030600, +0x106183878e00, +0x204102c60c00, +0x104104cf8f00, +0x2083870c00, +0x8103c68600, +0x8143c70600, +0x408103c78600, +0x6083c70c00, +0x104183870c00, +0xc307ce8f00, +0x20c103868600, +0x4183c78600, +0x4081030600, +0x820c3870c00, +0x4183870e00, +0x6083870c00, +0x408307c78600, +0x41c7cf8e00, +0x4083850c00, +0x7183848f00, +0x106183870600, +0x6083850400, +0x20e104c68e00, +0x106083c70c00, +0x2083870c00, +0x408103c78600, +0x41e3cf0c00, +0x204183c78600, +0x6183870e00, +0x204103cf0c00, +0x6147c48e00, +0x6183850c00, +0x8122c50e00, +0x3083050c00, +0x4103870600, +0x6083070600, +0x2083858600, +0x102083870c00, +0xc147cd8c00, +0x1061a7cf8c00, +0x71a2070e00, +0x206183cf8e00, +0x6083870600, +0x41c3c70400, +0x6183070600, +0xc183878600, +0x20c3050e00, +0x20c147c78e00, +0x408102c78400, +0x408367cf8c00, +0x82083870c00, +0x20c106c78600, +0x30c3cf1c00, +0x104107cf8600, +0x4083850400, +0x208306ef8400, +0xc103c58e00, +0x830c78f8e00, +0x4183870400, +0x20c123c78400, +0x104082870400, +0x6103850e00, +0x6182870600, +0x4082870c00, +0xc307c6c700, +0x830c38f1c00, +0x4103c48600, +0x8326cf8400, +0x8143468600, +0x30c3870c00, +0x4102c70c00, +0x4183c58e00, +0xc102878e00, +0x83083cc9c00, +0x3083850e00, +0x20838f0c00, +0x4182c70400, +0x408126cf8400, +0x4102c48600, +0x8102c70200, +0x408103c78600, +0x182c0478600, +0x82083c68e00, +0x2083858c00, +0x104081870400, +0x6183870c00, +0x204183878600, +0x408103c68600, +0x6082870600, +0x6081870c00, +0x6103868e00, +0x6183878600, +0x4183850600, +0xe187c7cf00, +0x8265068e00, +0x4183870e00, +0x2083870c00, +0x7083070600, +0x4103c68600, +0x6083850600, +0x8103c78c00, +0x1060c3c70c00, +0x82083870e00, +0x104106cf0c00, +0x4126ef8c00, +0x8166c78e00, +0x2083850400, +0x106182c78e00, +0x4183c48600, +0x2041c6cf0c00, +0x4103c68600, +0x61e3050704, +0x2082870400, +0x208103c78400, +0x8367cf8c00, +0x106187cf8c00, +0x4083870c00, +0x4102c70000, +0xc103c68700, +0x2083870c00, +0x106083870c00, +0x82083cf9c00, +0x106105cf9c00, +0x41c3c70c00, +0x4103868600, +0x830c38f9e00, +0x106083870c00, +0x4083870400, +0x6103870600, +0x6081070400, +0x8163c68700, +0x2081870c00, +0x2083848e00, +0x6083c58600, +0x20c3070e00, +0x6083850400, +0x2083870c00, +0x208345cf8e00, +0x20c3850c00, +0xc386c7c600, +0x4102870400, +0x104083878600, +0x4142c70400, +0x2083050c00, +0x4103850e00, +0x20c105c68600, +0x20c102c68600, +0x20c103c78600, +0x6183878600, +0x82083870c00, +0x1061c3c70c00, +0x6083870400, +0x6083050c00, +0x60c3c70c00, +0x20c147efce00, +0x8122c78400, +0x20c122cf8c00, +0x104102cf8c00, +0x4002070600, +0x20c103c78600, +0x81e3448e00, +0x4102c48600, +0x4183850e00, +0x2083078e00, +0x6182878600, +0x7187cf8e00, +0x4083870c00, +0x86182870c00, +0x3083870c00, +0x2083c78c00, +0x20c127cf8c00, +0x6183870e00, +0x4082010400, +0x4183030600, +0x4143870400, +0x2080840c00, +0x20c3870c00, +0x408245c78400, +0xc143870600, +0x6183070600, +0x20c3cf0800, +0x204103878e00, +0x104102c78c00, +0x8103c68600, +0x106083870c00, +0x2083c78c00, +0x2083870c00, +0x20c105cf8400, +0x104183c78600, +0x208162c58600, +0x20c102c68600, +0x4182078400, +0x4183850c00, +0x106103848e00, +0x204182c78e00, +0x7182cf8e00, +0x71a3850e00, +0x106083878400, +0x4083850c00, +0xc107c78600, +0x106082870400, +0x418302c78200, +0x30c3cf1c00, +0x20c106c78e00, +0x2041e3cf8c00, +0xc145cf8600, +0xc1c3c78e00, +0xc107c78e00, +0x4083870c00, +0x204102c68600, +0x4103870600, +0x6083878e00, +0x4083870400, +0x204082078600, +0x204083870400, +0x40c102c78600, +0x106183870400, +0x4183c78c00, +0x204102878600, +0x4183870600, +0x83654e8e00, +0x8127c68600, +0x4143c78c00, +0xc143c78600, +0x204083830600, +0x30c3060c00, +0x102081870400, +0x20838f0800, +0x20c107c78600, +0x106083c78c00, +0xc107ef8600, +0x30c38f0c00, +0x6082c70400, +0x4082870c00, +0x2081050400, +0x20c147c78600, +0x82183c70c00, +0x6183870c00, +0x4182878c00, +0x8104c78400, +0x4183850400, +0x102083870800, +0x6103070600, +0x6183c58e00, +0x208103870400, +0x8365cf0000, +0x4187cf8e00, +0x2083050400, +0x4183870600, +0x6183c70600, +0x6103878600, +0x4182c78400, +0x30c3870e00, +0x6183cf8e00, +0x408183c78600, +0x6182c70400, +0x20c38f0c00, +0x6183878600, +0x6183878e00, +0x104cf0000, +0x106083cf8e00, +0xc126cf8e00, +0x4083850400, +0x20838f0800, +0x2041c3c78600, +0x106083870c00, +0x106083c78c00, +0x1020c38f0e00, +0x6083070400, +0x204183c78600, +0x2041c3c78600, +0x204186cf8200, +0x6103848e00, +0x20c103c78e00, +0x6183c68e00, +0x20c103878600, +0x20c107c78e00, +0x18306c78600, +0x4143850600, +0x107104098f00, +0x6082070400, +0xc143c78c00, +0x204182cf8e00, +0x20c107878600, +0x4002850400, +0x6083870c00, +0x6082c70c00, +0x2083cf8c00, +0x6083070600, +0x41a2cf0e00, +0x6083870800, +0x4102870600, +0x41e3cf0c00, +0x2083870c00, +0x10c106cf8400, +0x8103c78600, +0x408143c78600, +0x20c104ca8e00, +0x2041024f8600, +0x3083070e00, +0x408167c78600, +0x106183c78600, +0x6081870c00, +0x30c107878600, +0x204102c68600, +0x41c3c78e00, +0x6083c78e00, +0xc102c78700, +0x104102c78c00, +0x4083c78c00, +0x104083c70c00, +0x418224c58600, +0x408367c78400, +0x4083870c00, +0x81c3c58600, +0x208304cf8e00, +0x8103c68600, +0x104107cc8e00, +0x6083870600, +0x4083870400, +0x3083050600, +0x4083850400, +0x4103850600, +0x6081870400, +0xc103c78600, +0x2081850c00, +0x3183cf8c00, +0x81c7478600, +0x2083cf8c00, +0x3083060c00, +0x102083c78c00, +0x2083870c00, +0x4183870c00, +0x204102c78400, +0x4183870600, +0x4183cf8c00, +0x8142c78600, +0x408367cf8600, +0x41c3870c00, +0x30c3870e00, +0x41c3870400, +0xc143c78600, +0x8124c98c00, +0x61828f0c00, +0x8143c70400, +0x4083c70000, +0x6083030400, +0x104186cf8e00, +0x125cf0000, +0x20c102ed8e00, +0x6183878600, +0xc103c68600, +0x4183850600, +0x6083870c00, +0x104082870600, +0x4103c70c00, +0x106083878c00, +0x106183878600, +0x2083070e00, +0x104143870400, +0x30c3070e00, +0x30c38f1c00, +0x104082070400, +0x4183c78600, +0x4103870400, +0x41c3c50c00, +0x4083050600, +0x10e107cf8600, +0x10c3060800, +0xc183078600, +0x6183cf8e00, +0x6103808e00, +0x2081860c00, +0x104103cf8c00, +0x2083870c00, +0x2083c70400, +0x102083870c00, +0xc103870600, +0x4083850400, +0x208103c78200, +0x4102c78c00, +0x20c38f0c00, +0x102082870c00, +0x204103878700, +0x81674f8c00, +0x204182870400, +0x2083060c00, +0x204182c78400, +0xc182c78e00, +0x40c3870c00, +0x104083850400, +0x408306e7c600, +0x3082060c00, +0x6083460c00, +0x2081850c00, +0x20c306c6c700, +0x6083070400, +0x20c103c78600, +0x20c103c78600, +0x7183050e00, +0x8102c78600, +0x4082030600, +0x104005848700, +0x4081850400, +0x6083060600, +0x2083850400, +0x871828b8c00, +0x4183878600, +0x6083870400, +0x204183ce8e00, +0x104183870c00, +0x2083870c00, +0x4082870400, +0x106083870000, +0x61a7ef8c00, +0x2082010400, +0x3083050600, +0x3083858e00, +0x6305cf8600, +0x6083c78c00, +0x8167cf8e00, +0x1060c3870c00, +0x6083850c00, +0x7102070b00, +0x41c3c70c00, +0x81c3870400, +0x20c102878600, +0x6083870e00, +0x106083870400, +0x106083cf8c00, +0xc347c78600, +0x8167c78e00, +0xc182478300, +0x6083850c00, +0x30c38f0c00, +0x106082870c00, +0x6183c78600, +0x2083070c00, +0x106083c78c00, +0x6083870600, +0x408224c78400, +0x104106cf8e00, +0x204103870400, +0x2083870c00, +0x8162c58600, +0x408307c7c000, +0xc103870600, +0x408325c78e00, +0x6081070c00, +0x2082070c00, +0x204102c70000, +0x6083870400, +0x106183cf8c00, +0x2081060400, +0x41c3c70600, +0x4082070400, +0x106183870800, +0x4083850400, +0x8307c7c200, +0x102083870400, +0x4083c78c00, +0x4183870600, +0x1248f0000, +0x4083850c00, +0x2083050e00, +0x6081030600, +0x204102c78400, +0x6103858e00, +0x104083870c00, +0x6182850e00, +0x8147c78e00, +0x6304cf8e00, +0x1061a2cf8c00, +0x20c106c78400, +0x4083860600, +0x104102c78400, +0x2083870400, +0x204183878600, +0x6082870600, +0x18103c78600, +0x61864f8e00, +0x6102878600, +0x106183850600, +0x2083870800, +0x2083c50c00, +0x30c1050c00, +0x4182c78600, +0x6103878e00, +0x6183cf8c00, +0x104102870400, +0x4143870400, +0x104182870400, +0x106083870c00, +0xc136cf0e00, +0x4103c70600, +0x82083870c00, +0x4103cf8e00, +0xc102c58600, +0x4183870600, +0x104103c48e00, +0x20c3cf0000, +0x8102c78600, +0x4083870c00, +0x2082850c00, +0x8102078600, +0x204103c78600, +0x204103878600, +0x6183070600, +0x6081060400, +0x61c3c78c00, +0x104083878400, +0x8106c78400, +0x4163c58600, +0x2082070600, +0x6081070600, +0x6103858600, +0x2081870400, +0x30c38f0c00, +0x2083870c00, +0x106083cf0c00, +0x2083cd8e00, +0xa041030400, +0xc103870400, +0x6081830400, +0xe107c78e00, +0x2082c78800, +0x4083870600, +0x4083850c00, +0x4083070600, +0x6183878600, +0xc1c7c78700, +0x20838f0c00, +0x106183870c00, +0x102082cf8c00, +0x81870400, +0x2083878c00, +0x10207c7c300, +0x104103c78600, +0x104082070400, +0x2083870e00, +0x2083c70c00, +0x20e103c78600, +0x4102c68600, +0x106083878600, +0x82083870c00, +0x8366268a00, +0x106083070600, +0x2081060c00, +0x2083870c00, +0x4083c70000, +0xc106878600, +0x104126cf8c00, +0xc183038200, +0x204102870600, +0x4104cf8e00, +0x4082038400, +0xc106c78600, +0x4000020c00, +0x4183c70c00, +0x106083c70c00, +0x104187cf8e00, +0x6083878e00, +0x61e3c78e00, +0x60c3c50e00, +0x8183c3c78600, +0x30c3070c00, +0x20c304c78600, +0x20838f0c00, +0x86186cf0c00, +0x41828f0c00, +0x4083870400, +0x4103c78e00, +0x106183c48e00, +0x106183870c00, +0x1060828f0c00, +0x4083830600, +0x2082070c00, +0x2082870c00, +0x6083078e00, +0x8102c68600, +0x8306c78600, +0x4082870c00, +0x106182cf0800, +0x4183870c00, +0x104187cf8e00, +0x106083870c00, +0x41c3870800, +0x4081870600, +0x8302c78400, +0x4103c48600, +0x4083870400, +0x204083030600, +0x820838f0000, +0x4102c70400, +0x204143c78c00, +0xc103c68600, +0x4183c78c00, +0x4182c78600, +0x408307c68600, +0x6082070600, +0x4182c70400, +0x106083870400, +0x6083870c00, +0x2082060400, +0x6103c78e00, +0x106083870400, +0x10264cd8400, +0x4103878600, +0x106183878600, +0x6083870400, +0x6183870600, +0x106183878600, +0x61c3c58e00, +0x2083c71c00, +0x106183c78e00, +0x6083858e00, +0x4083870600, +0x61c3cf0e00, +0x4103c70800, +0x4083050c00, +0xc325cf8000, +0x6182c70e00, +0x204183870400, +0x8102c78600, +0x30c3870c00, +0x20c307cf8e00, +0x6081870c00, +0x8365cf8400, +0x30c3078c00, +0x106183c78c00, +0x106083870c00, +0x4183878600, +0x2083870800, +0x107082870c00, +0x104082c70400, +0x4081c3c78600, +0xc307e7c300, +0x2083848e00, +0x106182c70400, +0x6083858600, +0x20c106c78e00, +0x104083870600, +0x8103c48700, +0x104083c78400, +0x6083870400, +0x20c143c78600, +0x6183070600, +0x4143870400, +0x104103c78c00, +0x81c3c70600, +0x4183868200, +0x20c1870c00, +0xc103870600, +0x61c3cf8400, +0x3083870c00, +0x82082870c00, +0x102083870c00, +0x4102c70c00, +0x106083c78400, +0x208306c78600, +0x4083870600, +0x41c3870400, +0x41c3c58e00, +0x6083870e00, +0x4183c78e00, +0x40c103c78400, +0x6083850400, +0x4083850e00, +0x4083870400, +0x7083070600, +0x418367e7c600, +0x106183cf0c00, +0x4102c70c00, +0x6183070600, +0x106083070c00, +0x204106eb8e00, +0x6102070600, +0x2041c7c78600, +0x106083070600, +0x30838f1c00, +0x182e54f8f00, +0x204182c78e00, +0x20c106c78600, +0x4102850600, +0x6081070800, +0x6083850c00, +0x82083cf8e00, +0x20c107c78600, +0x8102c68600, +0x4082870000, +0x6103c78e00, +0x6183878600, +0x820c3870c00, +0x104182470400, +0x41c3850c00, +0x4083870400, +0x6103850600, +0x204182078600, +0x4183870600, +0x4103870600, +0x408102c38000, +0x31c7cd9e00, +0x8103c68600, +0x4081870000, +0xc143c68700, +0x4183830800, +0x6082840e00, +0x20c103c78600, +0x4183070600, +0x106083070600, +0x4182c78400, +0x4183c78e00, +0x2083c48e00, +0x4183c78400, +0x4106c7c400, +0x204183878600, +0x106082c70c00, +0x104000870000, +0x82083cf8c00, +0x106183878e00, +0x104104cf8e00, +0x20c1c3c78700, +0x40c3c70c00, +0x4183c78400, +0x6083850c00, +0x208307c78600, +0x8167c78e00, +0x6083070400, +0x60c3870c00, +0x4182878600, +0xc2656e8600, +0x6083850c00, +0x8102c78600, +0x208106478000, +0x61c7cf8e00, +0x8167c78600, +0xc1654c8e00, +0x4103850e00, +0xc167078c00, +0x82082c70c00, +0x20c307cf8600, +0xe103878e00, +0x81810c00, +0x2081850c00, +0x4183870600, +0x204103850400, +0x4083cf0800, +0x6081870400, +0x4083070600, +0x41a3cf8c00, +0x208147c68600, +0x41e3478c00, +0x106187cf8e00, +0x408106cf8c00, +0x208103c78400, +0x7083858e00, +0x104102070600, +0x4081830000, +0x408103c78400, +0x204103870600, +0x104082070400, +0xc143c78600, +0x2083c70c00, +0x208123c70000, +0x6081030400, +0x204182c68e00, +0x204083850400, +0x30c3cd8e00, +0x4123c70800, +0x2083850e00, +0x20c143c70400, +0x4183c70600, +0x20c107c78600, +0x61c3cf9e00, +0x106083870c00, +0x204102858e00, +0x8143470400, +0x20c1070400, +0x3083850c00, +0x208102c78700, +0x4107cb8c00, +0x6183cf8c00, +0x6083070600, +0x20c124cf8c00, +0x4167cf8c00, +0xc103c70600, +0x30c3060c00, +0x104182870600, +0x82083c70800, +0xc143c78e00, +0x2082cf8c00, +0xc167cf1c00, +0x208103c68600, +0x4183c78600, +0x20c3cf0c00, +0x6083070400, +0x2083870c00, +0xc1674f0e00, +0x104083870600, +0x8342c78400, +0x4083050c00, +0x8167c78e00, +0x106082c70c00, +0x41c3470c00, +0x2081850c00, +0xc102c78e00, +0x4081850400, +0x20c103878200, +0x4083870600, +0xe305c68e00, +0x4103c70400, +0x6083070400, +0x4082850c00, +0x6083870600, +0x106083c70800, +0x106183cf8e00, +0x106187cf8e00, +0x6083c50c00, +0x106182c78e00, +0x204102c70000, +0x104183070600, +0x2083850c00, +0xc103c78600, +0xc183c78600, +0xc1e3468600, +0x204102870600, +0x6083870600, +0x408265ef8600, +0x6182070600, +0x4081070400, +0x104083c68a00, +0x102083870c00, +0x102082c70c00, +0x2083870c00, +0x4083850c00, +0x7183070e00, +0x20828f0800, +0x408363c78600, +0x418245cf8400, +0x2083ce8e00, +0x104103870600, +0x2083c70000, +0x4082cb0c00, +0x106187cf8e00, +0x4102c78400, +0xe187c78f00, +0x106182878e00, +0x3083060c00, +0x4183c50c00, +0xc307c7c600, +0x104103c60700, +0x6083870c00, +0x6081870c00, +0x618302c7c300, +0x60c3850e00, +0x4083850c00, +0x6083850c00, +0x4143c68e00, +0x8102c78400, +0x4081030400, +0x204103c78400, +0xe103848700, +0x4081030600, +0x4183878600, +0x2081060c00, +0x106183c70400, +0x106183c70c00, +0x20c3c70c00, +0x2081070400, +0x2083070c00, +0x204082878600, +0x4183870600, +0x106083878e00, +0x4183cf0c00, +0x6083878c00, +0x20c102c78600, +0x6081070c00, +0x4103c48600, +0x104106cf8c00, +0x4081050400, +0x408102870200, +0x4187cf8e00, +0x6083850e00, +0xc107ec4e00, +0x4081870400, +0x102083870400, +0xc163c70400, +0x6183c48c00, +0x4103850400, +0x106187cf8e00, +0x2083870c00, +0x2082070e00, +0x8325c98c00, +0x4182c70400, +0x8143c30400, +0x408102c78600, +0x2083c70c00, +0x2083070c00, +0x4083830600, +0x4083850c00, +0xc324cf8400, +0x4182c78400, +0x4083870400, +0x408142c78600, +0x6143878e00, +0x204183c78e00, +0x104083870400, +0x20c3070400, +0x8003830400, +0x408307ef8400, +0x30c38f0c00, +0x204104ce0c00, +0x6083870c00, +0x2083c70c00, +0x102083070e00, +0x204183c70c00, +0x204102870400, +0x20c102c78600, +0x1061828f0c00, +0x4082870400, +0x6183070e00, +0xc306ef8e00, +0x104183878e00, +0xc122c70600, +0x8165470c00, +0x2083850e00, +0x30c3cf8e00, +0x4102078600, +0x204183c78600, +0x204183870400, +0x41c3c70c00, +0x2083878c00, +0x4183878600, +0x61c3878600, +0x6083070600, +0x2083850c00, +0x20c103c78400, +0x8162c70c00, +0x4102850400, +0x208103c28400, +0xc143c78400, +0x4183830600, +0x102083870800, +0x2083850600, +0x104083c70c00, +0x204102c78c00, +0x4082868600, +0x61c3cf0c00, +0x106183878c00, +0x2081850c00, +0x104182870600, +0x10c107c68600, +0x4103c68600, +0x4183878600, +0x6182878e00, +0x4183c78600, +0x208265cc8e00, +0x8347e78200, +0x8103870000, +0x106183878c00, +0x6183c78e00, +0x2081050c00, +0x4183c68e00, +0x106083070600, +0x61e3cf0c00, +0x6081070400, +0x4082050600, +0x4103870400, +0x20e3cf0c00, +0x4182870400, +0x6083870c00, +0x20c103cf8600, +0x6083850e00, +0x20c3870c00, +0x6183870e00, +0x2083858e00, +0x2083850600, +0x102081870400, +0x4143c70600, +0xc167cf8e00, +0x106182cf8c00, +0x8163450400, +0x204083878600, +0x6081850400, +0x408367cf8600, +0x830c3070e00, +0xc186c78e00, +0x3083870400, +0x204103c70400, +0x204183878600, +0x104182c70c00, +0x208103c28600, +0x6082870400, +0x4183870400, +0x104106cf8c00, +0x204183c70400, +0xc083858600, +0x2081070400, +0x104002070600, +0x30c38b1c00, +0x408103878200, +0x2183cf8c00, +0x4183878600, +0x106082870c00, +0x6183878600, +0x10f305ce8f00, +0x8102c50600, +0x104102c78200, +0x4103848600, +0x4102068600, +0x106183c78e00, +0x106083870400, +0x4183870400, +0x104106cf8e00, +0xe143868700, diff --git a/samples/digitrec/digitrec/data/training_set_7.dat b/samples/digitrec/digitrec/data/training_set_7.dat new file mode 100644 index 000000000..56f54a602 --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_7.dat @@ -0,0 +1,1800 @@ +0x23c3c70604, +0xc041020400, +0x61e3820c10, +0x3c4810304, +0x1e3e0830408, +0x60c1820400, +0x40e0820c10, +0x3e0830c18, +0x1c1820810, +0x41e6c10608, +0x3e2c30418, +0x1e2820c10, +0x51e6c30c10, +0x21c3820400, +0xe3c1838400, +0xc3c3c78600, +0x1e1820400, +0x81c0820c10, +0x21e2830c18, +0x23c0830600, +0x1c2820410, +0x3e0c30400, +0x43c0830608, +0x3c0810600, +0x1c0820400, +0x43c0810204, +0xc2810408, +0x60c1860800, +0x1c2830608, +0xe3c1820408, +0x43c0830408, +0x1e1820c18, +0x61c0810408, +0x6344810608, +0x83e6830618, +0x1c1820c10, +0x1e0820c10, +0x1e2830608, +0xf061871820, +0x3c0820408, +0x11e7c30c10, +0x3e4c10408, +0x10e2820410, +0x3e0830608, +0x3c0830400, +0x3e0c10418, +0x81c2810204, +0x21c1830410, +0x1c0820410, +0xe141860400, +0x1e2820c10, +0x1e0830c10, +0x3c0810304, +0x63e4830c10, +0x3e0c30408, +0x40c1820c00, +0x23c0830408, +0x1e4830410, +0x43c1838600, +0x4181830408, +0x1e6830410, +0x63e0c30c10, +0x1e7408200, +0x60c3860400, +0x60c0820410, +0x41c1830600, +0x1c1820810, +0x1e0c61800, +0x1e6c30c00, +0x3c6c10208, +0x1c1830408, +0x83c0810608, +0x6142820400, +0x3c0830408, +0x3e0870400, +0x43e4830608, +0x1c4820408, +0x103c0c18618, +0x4381030600, +0xe260830418, +0x61c0830410, +0x1e6820c10, +0x1c2810204, +0x7e0418204, +0x61e0c30c10, +0x1c1820408, +0x3c4890204, +0x23c4810408, +0xc040820c10, +0xc1c0830408, +0x1e0820410, +0x61c1c70400, +0x1e0820830, +0x1c0820408, +0x61c1820c18, +0x81c1820c10, +0x41c4810408, +0x1e0820c10, +0x21c1c60810, +0x1c1c0830408, +0x1c0810608, +0x3c0c30408, +0x61e0820810, +0x1e6c10608, +0x1c0820800, +0x61e3830c10, +0x43c4810200, +0x41e1820c10, +0x180c0830400, +0x1c0820408, +0x1c1c70600, +0xe3e0830c18, +0x61e2830408, +0x3e7c10204, +0xc3c1830604, +0x1c0820410, +0x3e0830c10, +0x1e0820410, +0x3e4c10608, +0x41c0830418, +0x20e3820410, +0x43c6c1820c, +0x21c1030408, +0xc3e0c61820, +0x41e2810410, +0x3c0810408, +0x3c0810608, +0x41e1860820, +0xc0820400, +0x3c0810204, +0x1c2830408, +0x7e0c10608, +0x1c0820408, +0x3e0830408, +0x1c0830c10, +0x3e4c30400, +0xc3c6830600, +0x23c1830408, +0xe3e0c30e10, +0x1e2820408, +0x3e0c30810, +0x3c4810204, +0x3ecc10204, +0x3e481060c, +0xc3820c10, +0x1e060830c10, +0x1c68b0408, +0x1c1820408, +0x41e2820c10, +0x7e0c10608, +0x23c0830408, +0x3c7c10204, +0x1c2810008, +0x3e0820410, +0x1c0820410, +0x3e0830408, +0x3c0810408, +0x3c0810408, +0x1e0820810, +0x1c2830408, +0x81c0820400, +0x1c0810208, +0x61c1070600, +0x23c3870408, +0x83c1810204, +0xc3c0820408, +0x3c1830608, +0xe6c30c30, +0x7c0c10200, +0xe083038400, +0x1e0830c10, +0x1e6810408, +0x1e287c600, +0x3e0c30c10, +0x3e4830204, +0x21c0820810, +0x23e0830410, +0x1e1820c10, +0x1c0830408, +0x41c0830c10, +0x61c0830c10, +0x1c0820408, +0xe2820810, +0x80c0820408, +0x3c1820408, +0x1c0830408, +0x3e0410608, +0x3e0830c10, +0xc1c0830408, +0x41c6cd020c, +0x21c0820410, +0x61e0810408, +0x3e0c10608, +0x61e0830400, +0x1e0830410, +0x4041820c00, +0x41e6890608, +0x3c0820408, +0x61e0c71800, +0x1e4830418, +0xe3c183060c, +0x3c0830608, +0x21c0830418, +0x3c0820408, +0x3c0830408, +0x41c0820408, +0x3c0810302, +0x21c1030600, +0x1e2820c10, +0x3c0810410, +0x1e6830408, +0x23e0c30c10, +0xc3c0810608, +0xb1e0830c10, +0xe1e0830c30, +0x3c0830408, +0x20c1820800, +0x3e0c10408, +0x1e0820c10, +0x6041861820, +0x3c4408208, +0x3c0830408, +0x1e0820830, +0x1c0810200, +0x83c3830608, +0x1e6c10608, +0x41c0830608, +0x3e0820408, +0x81e0820410, +0x41c0820408, +0x41c0820408, +0x103c0810204, +0x1c0820410, +0x3e4c50204, +0x41c2810204, +0x43c0820408, +0x1e3c20820, +0x81e3c30810, +0x43e0c30c10, +0x3c0810608, +0x4181038400, +0x3c0830408, +0x1c2820410, +0x1c2810400, +0xc1c0830408, +0xc183c60c08, +0x3e0c30408, +0x81e0c20810, +0xd1c3870c00, +0x11e4c10408, +0xc041870400, +0x3c0830408, +0x33e1820c00, +0x41e2820800, +0x4040800800, +0x3c3c20408, +0x7c1c30608, +0x21e0820800, +0x1e2820408, +0x3e2878408, +0x83c0830408, +0xc040820c10, +0x3e4490200, +0x23c4830408, +0x3c0830408, +0x1e1820820, +0x3e0820408, +0x3e0820810, +0x3e3c30400, +0x3e0830418, +0x1c0810408, +0x6040820810, +0x1e2820c10, +0x43c0810608, +0xc3c3c78604, +0x1c0820408, +0x41e0820c10, +0x1e1820800, +0x43c0830408, +0x3e0830408, +0x3e0c30408, +0x61c3c78810, +0x3e3c30c10, +0x41c2830604, +0x163c1878608, +0x41c1820408, +0x61c0820810, +0xe141830c08, +0x3e4410608, +0x7c2c10204, +0x1e68a0800, +0xe1861000, +0x1c0820408, +0x41c0820408, +0x183c0410408, +0x3e0830c10, +0x1c4830408, +0x3c7c10200, +0x6140820400, +0x70e18e0800, +0xe1c1820c10, +0xe1e0830c10, +0x3c0810608, +0x3c2830408, +0x3c0810204, +0x1c3c1c38604, +0x3c0810408, +0x41c2820410, +0x3c0810008, +0x3c0810208, +0x1c0830408, +0xc1c0820410, +0x1c0800408, +0x1c0830408, +0x3c1830608, +0x63e0830c10, +0x1c3820c10, +0x180c0810408, +0x3c0830608, +0x61e0820830, +0x1c0820408, +0xc083820400, +0x23e0830408, +0x1c0820408, +0x3c0810408, +0x21c0820408, +0x1e2830418, +0x1e6830400, +0x41e0830c30, +0xc1c0830c18, +0x3c0810408, +0x3c0820408, +0x61c3830c10, +0x1c0830408, +0x3c6830408, +0x3e0c30c10, +0x63e0c10410, +0x7e0810408, +0x43c383060c, +0x3e0810408, +0x3e0c30830, +0x1e2820c10, +0x1e2810408, +0x3e0c30408, +0x1c2810200, +0x23c1830408, +0xc2820408, +0x3c1c60400, +0x1c2810410, +0x3c1870400, +0x21e0820810, +0x3c0830408, +0x61e0820c10, +0xc0c1830408, +0x41c1820408, +0x41c1820c10, +0xc141830408, +0x80c1820408, +0x63e6c30608, +0x11c1830408, +0x103c20000, +0xc3c0830408, +0x60c1820818, +0x1c0820408, +0x4041020408, +0x1e0820c10, +0x41c2810408, +0x8182810204, +0x23e0830408, +0x41c0830408, +0x3e4c10608, +0x83e4830608, +0x21e0820c10, +0x21e1830408, +0x41c0830408, +0x81c0830400, +0x67c0830604, +0x7e0830408, +0x1e1860810, +0x1e2830410, +0xc081038c00, +0x1c2850204, +0x1c0e0830c10, +0x3c0810604, +0x83c0830408, +0x1c0820408, +0x6140870400, +0xc040820800, +0x1e0820810, +0x83c0c10408, +0x181c0810608, +0x1c2810408, +0x41c0810600, +0x1c0820400, +0x21c2830408, +0xc1e0830c10, +0x3e0830408, +0x1c3c0830408, +0x1e0830410, +0x3c0810204, +0x60c0820810, +0x61e2830c10, +0x1c0820408, +0x1c0810408, +0x3c0820408, +0xe1c0830608, +0x3e0430c10, +0x23c0830408, +0x6040870400, +0x7c3c10208, +0x60c1860810, +0x81c0830408, +0x1e6820410, +0x21c1820c10, +0x3e1820c08, +0x6040820410, +0xc3c0820408, +0x81e0830c10, +0x43c081060c, +0x41c2810408, +0x1c1820408, +0x61c2820c00, +0x1ecc10200, +0x1e0820810, +0x21e0820d10, +0x1c2830418, +0xe1e0c71810, +0x1e3820810, +0x1c0820410, +0x1c2830408, +0xc1c0c30c10, +0x23e0c30c10, +0xc3c0830408, +0x1c1830600, +0x1c0810204, +0x1e4830400, +0x1c0810408, +0x181c1870600, +0xc0c1870c00, +0x3c0c10208, +0xc040820400, +0x21c0820c10, +0x1e2830408, +0xc3c0830604, +0x3c0820408, +0x40c1820800, +0x6142830408, +0x181c0810400, +0x1e6cb0410, +0x10e0801800, +0x1c0820408, +0x3c0830408, +0x327c10608, +0x4040820800, +0x3e0c30c10, +0x1e2820400, +0x1c1c20400, +0x1e6830c30, +0x41c1830408, +0x143c0810408, +0x1e7878410, +0x1e0830410, +0x1c0810400, +0x1c6c10208, +0x1c0820410, +0x3e081060c, +0xc1c0830400, +0x3c4c10208, +0xe1e0820c10, +0x61c2830c10, +0x3e0830c10, +0x3c0810204, +0x1c0810200, +0xe1e6830418, +0xe3c0830c10, +0xc1c6850600, +0x123e0810418, +0x61c0820410, +0x3e1820408, +0x43c2c10608, +0x3c0810608, +0x3c0830410, +0x21c0820410, +0x183c2830702, +0x1e6830418, +0x41c0830408, +0xc041c20c00, +0x11c6830408, +0x1c0830408, +0xc1c0830408, +0x3c0810408, +0x21c6830408, +0xc1c0820400, +0x81c3c78c08, +0x61e0810410, +0x81c0830400, +0x1e240870608, +0x1e68b060c, +0x61c1870c10, +0x3c0810400, +0x3c1878400, +0x60c1870810, +0xe1c0820408, +0x1c2810204, +0x7e0830408, +0x6020861800, +0xc3c1820604, +0x3c0810204, +0x4141830400, +0x7c30400, +0x1c0870400, +0x3c0810408, +0x23c1830608, +0x4041820c10, +0xe1c1870c10, +0x81e2820c10, +0xc3e2830604, +0x63c0830408, +0x7c0c10204, +0x40e1820c08, +0x63c0c70408, +0x3c0830408, +0x1e0830410, +0x1e0820c10, +0x3c3830608, +0xc2820810, +0x41c2810408, +0x1e2820c10, +0xe2820410, +0x1c0810408, +0x71e0861820, +0x81c0820408, +0x1e2820810, +0x1c0820410, +0x21e6490400, +0xe3e1878c08, +0x3e4c30418, +0x1e6820418, +0x1c0820410, +0x1e2830410, +0x21e0820c10, +0x23c0830408, +0x43c4810608, +0x21c1820c10, +0x3e3c30608, +0x41c3820408, +0x81c0830400, +0x61e1820c10, +0x3c4890204, +0xe0e0c31820, +0x246810204, +0x1c0830410, +0xc140830408, +0x61c1820c10, +0xe3c0830408, +0x41e2830408, +0x1e7c30c00, +0xc1c2870408, +0x1c4810204, +0x3e0820810, +0x1e2830c10, +0xe0c1830400, +0xe1e0830c10, +0x1c3820408, +0xa3c1830418, +0x81c3820400, +0x3c0830408, +0x1e0820810, +0x3e0830408, +0x83e0830408, +0x41c0830408, +0x1c0820410, +0x61c1070400, +0x21c0820400, +0xc0c1820400, +0x1c0c0878408, +0x1e2820c10, +0xe1e0c71800, +0x1c4810208, +0x41e6c30410, +0x103c3c78604, +0x21c0820c10, +0x3a4c10600, +0x3e7c10204, +0x23e2830408, +0x81c1830200, +0x1e2810408, +0x3e0c10600, +0x1e2820c10, +0x63c1820408, +0x43c0810204, +0x1c0830410, +0x3e4438200, +0x41c2830400, +0x83c083060c, +0x3e0830400, +0x1c1820c10, +0x1c0830410, +0x43e0830408, +0x3e7c30608, +0x1c1c1c30600, +0x1c1820408, +0x1e3820408, +0x31e0820c10, +0x1363810600, +0x23e0830c10, +0x81c0820400, +0x103c1c18e00, +0x1c0830418, +0x7c0c10608, +0xc2820408, +0x3c0810204, +0x181c60000, +0xc1c6838302, +0x1c0820408, +0xc1020400, +0x1c0830418, +0x1c0810410, +0x3e0830410, +0xc041820408, +0x3e4830c18, +0xc2820c10, +0x3e0c30c18, +0x3e4830c00, +0x1e0810408, +0x43e0c30c18, +0x61c0820c10, +0x3c0810208, +0xc1c0830408, +0x1c0820408, +0x3c4878200, +0x3c6810604, +0x3e0830c00, +0x3e0810204, +0x1e6c90418, +0xf324878c00, +0x1e1820c10, +0x3c0810408, +0x1c7830408, +0x1c0830408, +0x33e0c30c18, +0x181030200, +0x3e0c10618, +0x23e4c30c10, +0xc0c1c60408, +0x6141030408, +0xe3c083060c, +0x1c2830400, +0x21c1830200, +0x3e481020c, +0x1e2830c10, +0x1e0830400, +0x21e1870418, +0x3c1878408, +0x1e0821820, +0x60c2820c10, +0x3c0830408, +0xe1c0830c10, +0x7c0c18204, +0x1e6c30410, +0x3e4c10608, +0x1c2810608, +0x3e0810408, +0x21e0820c30, +0x1e0830c10, +0x1c2830604, +0x3c0838410, +0x3e283060c, +0xc3c0810204, +0x1c2810204, +0xc1c0830418, +0x3e0c30c10, +0x6181070600, +0xc040820c10, +0x1c6810204, +0xe1c0830408, +0x43c1820408, +0x3c0810608, +0xc3830400, +0xe3e0830c10, +0x1c0830410, +0x1e4830408, +0x167c10408, +0x41c0830c18, +0x3c3c38c00, +0x10c2820c10, +0x3c1830408, +0xc0c3820c10, +0x1e68b0418, +0x7e6c10604, +0x1c0810608, +0xc3820400, +0x61f0c30c10, +0xc1c0820408, +0x6041820810, +0x1c6810204, +0x3c0830c18, +0x21c1078c00, +0xe041070800, +0x1c0820408, +0x3c0810608, +0x1c3820400, +0x43c0878200, +0x41e2c30830, +0xc2830408, +0x81e0830c10, +0xe0c1870c10, +0x1e0820c10, +0x41c1020400, +0x3c0810208, +0x6040820800, +0x3e0c30400, +0xe1e1860c10, +0x43c0830408, +0x3e0830408, +0x60c1820c10, +0xf1e1861800, +0x1e2c30c30, +0x7e0c10204, +0x61c1820c10, +0x43c0810204, +0x1e6820410, +0xe3c0830408, +0x1e4830410, +0x303c0c10204, +0x23e0830c10, +0x6041020c00, +0xc7e0c30408, +0x21c0830408, +0x21c0820408, +0x3c0810608, +0xf0e08f0c10, +0x43e0820c10, +0x63e3c30408, +0x43e0c10418, +0x101c0870400, +0x60c0820c10, +0xc0c1020400, +0x21e0830c10, +0x181810204, +0xc1c0830408, +0x43c0810204, +0x1e0810408, +0x1e0820c10, +0xe041820c00, +0x1e2810408, +0x23e4c10408, +0x3e3810208, +0x61c0830c10, +0x41c0820410, +0xe040820e00, +0x43e4c18204, +0x7c0810208, +0xc3e0810608, +0x3e0c60800, +0x3c0830c10, +0xc1820408, +0x21c6830408, +0x3c0810400, +0xc0c38f0600, +0x1c0820c10, +0xc1c0830408, +0x1c1820400, +0x13e4830400, +0xc3850400, +0x21e2820c10, +0xe1c0820810, +0x81e6830408, +0xc3c1870400, +0xc1c2830408, +0x3e0820c10, +0x61e1820c10, +0xe1c0c78408, +0x11c2820c10, +0xe0e0830c10, +0x23c0830408, +0x21c0820410, +0x71a6c90408, +0x43e0c30c10, +0x61c2820810, +0x8180810600, +0x183e0c18608, +0x41e0c30c20, +0x3e4830408, +0x1c2810608, +0xc2c0878408, +0x3140820c10, +0xe1c0830408, +0xe0c1820408, +0x23c0830410, +0x3c0810204, +0x3e1c78608, +0x1c0820408, +0x61e0830410, +0x63c4830408, +0xe1c1820c18, +0x3c4c10204, +0x3c0810204, +0x3e7c18204, +0x1c2830408, +0x80c3820400, +0x3c0830408, +0xc1c0878a00, +0x3c0810408, +0x4120820c10, +0xe043c60400, +0x1c2c30208, +0x7c0830604, +0x3e3c30c00, +0x1c0830608, +0x1c6810204, +0x3c0830608, +0x3c1810200, +0x3e4810604, +0x1e2830c10, +0x41e4c30c20, +0x1c2820410, +0xc0c1820408, +0xc3c1830604, +0x3e0c30410, +0x20c1820c08, +0x3c0810604, +0x61e2830c10, +0x6142878c00, +0x143c0810204, +0x61c1820408, +0x3c0830408, +0x20c0820410, +0x20e1861820, +0x3e281060c, +0x3c3830410, +0x11e2830400, +0x3c083060c, +0xe3e7c30608, +0x1e0820410, +0x1c2810408, +0xc3c1870600, +0x3c0810608, +0x3e0820410, +0x3e0830c10, +0x3e0830418, +0xe040870408, +0x3c4810204, +0x181c60400, +0x3e0830604, +0x381c60408, +0x63e3838c10, +0x3c0810604, +0x3c0810608, +0x1c0820408, +0x60e1861800, +0x1e3830400, +0x3c2810302, +0x3c0810204, +0x7e0c10608, +0x1c2820408, +0x60c1820c10, +0xc1e0c30c10, +0xc0c1860c08, +0x3e781060c, +0xe3c1830608, +0xc1c0c10608, +0x1e0830c10, +0x1c0830410, +0x21c2870c08, +0x1c2810204, +0x47e0c1020c, +0xc0c0830400, +0x41e0820410, +0x21c0830400, +0x41e0820c10, +0x3e3c78c18, +0x41c2870408, +0x1c0830408, +0x2000020800, +0x7020820820, +0x43c1c70408, +0x3e4410000, +0x3c1830604, +0xc1c0830c10, +0x1e2820410, +0x1e2820c10, +0x6141820c10, +0x3c0810608, +0x3e1830618, +0x80c0830400, +0xc081870400, +0x83c18f8400, +0x81c2810200, +0xc0c3c30800, +0x81c1820408, +0x1e3830c10, +0x103c0810204, +0x1c0820400, +0x183e0c30c10, +0x41c0820400, +0x81e6c10408, +0x43c1830608, +0x3c0810608, +0x1c0820400, +0x1c0820410, +0x1c0830408, +0x21c2820c00, +0x23e0830c08, +0x3e0830408, +0x41e3830c10, +0xc3c0830408, +0x3c0810208, +0x3c0830408, +0x21c0820410, +0x7e7c30600, +0x21e0820c10, +0x1e0820810, +0x3e2c10604, +0x21c2820c10, +0x3c0820408, +0x6140830400, +0x6141020408, +0x3c0830408, +0x41c1830204, +0x3e0830408, +0x20c0820810, +0x1e3820c10, +0x8240830408, +0x3c0810608, +0xa1e0820c10, +0x3e0830c18, +0xc141820400, +0xc1c0820410, +0xe1c1830c00, +0x3c0c10208, +0xe141820c10, +0x61c0820410, +0x1e2820410, +0x1c4810204, +0x51e2820c10, +0x40c1820810, +0x3c0c18204, +0xe1c0830410, +0x41e6828c10, +0x41c0820408, +0x3c0810608, +0x1c0820c10, +0xc1c1820400, +0x3c1830408, +0x3c3830608, +0x380810204, +0x43c0810204, +0xc081020400, +0x61e1820810, +0xe1c1c60c10, +0xe1e0830418, +0xc083830600, +0x1c2810408, +0x21e7890608, +0xe0c1020408, +0x1e0830408, +0x63e0830c10, +0x81e0820820, +0x61c1870c10, +0x61c1820c10, +0x8180810200, +0xc1c1820408, +0x6141820c00, +0x31e0820c10, +0xc0820410, +0x23c0830408, +0x3e0830408, +0x41c0830408, +0x3e7820408, +0x1c0810408, +0xc2800408, +0x1e0830418, +0x103c087060c, +0xc041020410, +0x60c1820400, +0x61c1820810, +0x3c6810604, +0xc0c1830608, +0x3c0810608, +0xe1e3830608, +0x2181030400, +0x43c0830410, +0xc140830408, +0x81c3c70408, +0x21e2810408, +0x23c0810204, +0x3c0830604, +0x7c0c18608, +0x1e2c10410, +0x41e0c30c10, +0x3c0810208, +0x3c0810408, +0xc1c0820c10, +0x23e4c10418, +0x61c1870800, +0x1c3c0838702, +0x3e0c30c10, +0x41c1830608, +0x40c1820c00, +0xc3c0810204, +0x1e0820c10, +0x1e0830410, +0x43e0830608, +0x3e0c10410, +0x21c2810608, +0x81c3860408, +0x1c1830608, +0x71c0820c10, +0x23c0830c10, +0x71e0820c10, +0x1e3820c10, +0x3c0820408, +0x6041860400, +0x7c081060c, +0x1c0830400, +0x4142830400, +0x3c1820408, +0x1c1870408, +0x1c0820800, +0x1c2820408, +0xc140830408, +0xc1c1878400, +0x1c2820410, +0x41c6810604, +0x81c0830400, +0x1c0820800, +0x41c1830410, +0x61c1820c00, +0xe041870408, +0x3e281020c, +0x60c1820c00, +0x1e6820410, +0x81c0830400, +0x41c2820408, +0x21c3820810, +0x3c0810204, +0x1e4830410, +0x1c0830200, +0x1c0810400, +0x3e0c30c30, +0xc3c0810608, +0x81c0820408, +0x3c0810608, +0x7c0810204, +0x3e0410410, +0x381c20400, +0x6041820c10, +0x3e0c30c10, +0x41c0830408, +0x3e4810608, +0x3e0830c10, +0x27c083060c, +0x103c3838418, +0x41c0830408, +0x1c0830400, +0x81c0810400, +0x1c2810608, +0x3c0810408, +0x20c3020408, +0x41c1c20408, +0x3e2c10200, +0x41c2830408, +0x103c0810608, +0x3e0820408, +0xe040820c10, +0x1c2810204, +0x71e3830c10, +0x81e2c10408, +0x3c0830408, +0x1e0830418, +0x3c1830204, +0x3c0830608, +0x23c2810608, +0xf3c18f0604, +0x3c0830408, +0x81e0830c10, +0x3c0810408, +0x11e6c30c08, +0x21c0820c10, +0x3e0820810, +0x1c0820410, +0x3c2810408, +0x3c0830408, +0x1c1830408, +0x21c2810408, +0x1c043cf0204, +0x63e0430408, +0x1c3820c10, +0xe1c0830e00, +0x61c1020410, +0x41e0820c10, +0x21c0820410, +0x3e0810408, +0x3e7830408, +0x3c0830408, +0x61e6c10608, +0x1e6830418, +0x3e7c30608, +0x7c0c18304, +0x43e4c10400, +0x3c3810418, +0x1e6c10400, +0x3c0830410, +0xc041820408, +0x43c0830408, +0xc0c1830c10, +0x1c1c1830600, +0x3e0830410, +0x1c0810604, +0xc0c1830408, +0x81c0810408, +0xc140830408, +0x83c0830608, +0xe0c1020c00, +0x1e4410200, +0x1c0830200, +0x1c0c0870608, +0x31a0820820, +0x3c6810204, +0x4142830400, +0x3c183060c, +0x4183c30600, +0xe1c1870c10, +0x3e0820410, +0x1e3820410, +0xe0418f0200, +0x6040830800, +0xc1e0c10c10, +0xc0c1860400, +0x43c0c10608, +0xc1c1c70c10, +0x1e0830c10, +0x1c0830400, +0xc0c0c60408, +0x61e2820810, +0x41c1820400, +0x21c2830408, +0xe3c1878c10, +0x7c0818304, +0x21c0820408, +0x3e7c98200, +0x3c0810204, +0xc040830410, +0x1c2830418, +0x1c2830408, +0x11e68b0400, +0x21e0820c10, +0x23e5c30c10, +0x3c4810204, +0x23c3878c10, +0x81e0830c10, +0x1e0820c10, +0x61c1820c10, +0x3e4410204, +0xc1c0830c10, +0x3c0830608, +0x3e0c30c30, +0x41c0830410, +0x10c0820000, +0x7e0810408, +0xc0820800, +0x1c3020408, +0x33e0c78c00, +0x43e0c30c10, +0x1c1830408, +0x63e0830c10, +0x11e68b0c10, +0x8381830408, +0x121c1870204, +0x3e4810204, +0x41c6838408, +0xc140830400, +0x1c1820408, +0x3c081020c, +0x43c0830408, +0x7c8c18304, +0x1c0820408, +0x1e6830608, +0x41c0810608, +0x3e0830408, +0xe2830410, +0x21e4810418, +0x21c2830418, +0x3c3810204, +0xc0820c10, +0x61c0830408, +0x20c3820810, +0x3c0810408, +0xc1c0870400, +0x1e4410410, +0x1e4810408, +0x1e2830410, +0x3e0830c18, +0x1e4820800, +0xc1c0830410, +0x3c4810608, +0xc1c0830608, +0x3efc10200, +0x6341830408, +0x61c1070408, +0x6041820810, +0x1c3c20400, +0x1e1820c10, +0x1c1820408, +0x61c1820c10, +0x1e7c98300, +0x3e0c10400, +0x3e2830410, +0x83c3830604, +0x4141020c00, +0x11e1820810, +0x6043820400, +0x21c0820400, +0x3e4830410, +0x1c1c0830408, +0x83e4810608, +0xc1c0830410, +0x21e0820830, +0x1e0830410, +0x3e4c1060c, +0x4140820400, +0x3e0810608, +0x41e0830c10, +0x3e1820c18, +0x3c081040c, +0x43c0810608, +0x61e2830c10, +0x10e10e0800, +0x3c6810204, +0x60e0820800, +0x3c0810608, +0x21c1820408, +0x67e0810608, +0xc1c1820404, +0x3c0810204, +0x81c3870400, +0x1e0820c10, +0x1c0820408, +0x83c0810608, +0xc181830604, +0xc3c7830608, +0xc1c1830408, +0xe3e1830408, +0xc0c1830600, +0x1c0830408, +0xc3c1870408, +0x43c0830604, +0x3c0c30408, +0x83e0c30c18, +0x61e2890410, +0x61c0820810, +0x3e0c10400, +0xc240830400, +0x21e1820c18, +0x3c1830604, +0x30e1860800, +0x1c0810408, +0xf360820c10, +0x1c1820408, +0x83c0810408, +0x21c0820c10, +0x40e1871c00, +0x63e4830408, +0x1c0820810, +0x73e0c10410, +0x3e0810408, +0x81c0810408, +0x43c0830408, +0x3c1820408, +0xc3c6810204, +0x41c0820408, +0x1c0020408, +0x6041860810, +0x41e4830c10, +0x23e4c10608, +0x3c4c18304, +0x1e2820c10, +0x63e4810204, +0x23c583060c, +0x21e0820c10, +0x1c181078400, +0x41e2820c10, +0x21c0820c10, +0xe3c0830c10, +0x1c0820410, +0x21e2830c10, +0x1c0820410, +0x1c1860800, +0x1c3830400, +0x1c6810608, +0x41c0830408, +0x47c1830604, +0x3c1878400, +0x1c2830408, +0x23c0820408, +0x6140820810, +0x1e2820820, +0x3c0810408, +0x3e0c10408, +0x83e0c30c10, +0x3c2810200, +0x1e6c10410, +0x81c0830408, +0x81c0830c10, +0xe1e1830c10, +0x60c3c60c08, +0xe240810600, +0x41c1820408, +0x31e3820c10, +0x1c0820410, +0x3c0c1060c, +0x41e0830c10, +0x21e0830c30, +0x3c0830408, +0x3e0c10618, +0x60e1861800, +0x1e6c30418, +0x3c0810608, +0x1c0820408, +0x1c0820800, +0xe3e0830c10, +0xc3c0810204, +0xe0e3860800, +0x83c0830408, +0x23c0830408, +0x1e0820810, +0x1c0820408, +0x41e0820820, +0x182810208, +0x7e0c30608, +0x11c1820800, +0x31e2830c10, +0xe160820c00, +0x4041020408, +0x1c1c3c70608, +0x43c0810208, +0x1c0830408, +0x1c2820418, +0x3e0c30c10, +0x2041020800, +0x61c2830418, +0x41c0820408, +0x23e0830410, +0x1e2820c10, +0x1c0830410, +0x43c0870408, +0x3c0810608, +0x3e0c30c00, +0x83c0810408, +0x3e0830408, +0x3e2450204, +0x3e2c30c10, +0x4140810408, +0x6140820400, +0xe1c1830c00, +0xe6c10400, +0x7c30400, +0x41c3820408, +0x3c0830408, +0x1c0830418, +0x140810408, +0x21c1820410, +0x41c0830408, +0x1e1830c10, +0x1c1810204, +0x1e6830418, +0x183c20400, +0x63c1820c18, +0x11e7820c10, +0xe0c0820c10, +0x3c0830608, +0x43c0830604, +0x1c3c0c10608, +0x60c3860800, +0xe141830400, +0x3e6c30400, +0x1c2810408, +0x3c0810200, +0xc1820c10, +0xe140820c10, +0x1e0820410, +0x3c0820408, +0x21c2820400, +0x3061861820, +0x3c0830408, +0x23e4810418, +0x1e7c10608, +0x1c2810200, +0x183c1c30c00, +0x380818204, +0xe0c3c20c00, +0xc1c183060c, +0x41c0820408, +0x63c0830408, +0x7e6410204, +0x1e4830410, +0x3c1820408, +0x80c0c30800, +0x61e0830c10, +0x81e0820c10, +0x83c083060c, +0x1e6810408, +0x3c1810204, +0x3e0c30c18, +0x23e6878204, +0x71e0820c18, +0x1e2820810, +0x61c1820c10, +0x43e0830410, +0x40c1820c10, +0x3c0830604, +0x83e0830608, +0x3e0830c18, +0x61c0830408, +0x1e0830410, +0x1c0820810, +0xe041870800, +0x43c4810200, +0x1c1830400, +0x3e0810408, +0x1c0830608, +0x1c2810408, +0x1c2820410, +0xe1c1820c08, +0x80c0810408, +0x61e0820c10, +0x60e1c70800, +0xc1020c00, +0x41c0820408, +0x41c0810400, +0x61c1830408, +0x41c0870408, +0xc041830600, +0x1e6c30c00, +0x3c3810204, +0x3c4810204, +0x3c081020c, +0x63e1830e10, +0x3e6830408, +0x3e0830c10, +0xe122c10400, +0x3c0820408, +0xc3c0818204, +0x3c0810408, +0xc1e0830c10, +0xc1c1830c10, +0x1c3820c10, +0x1c2830410, +0x1c2810200, +0x1e0830408, +0x21c0820410, +0x3e7810200, +0x81e0820c10, +0x7c0c10204, +0x41c0830c10, +0x3e0830410, +0x133e1820e00, +0xe040831c00, +0xc140830408, +0x3c0830408, +0x21c0820c10, +0x3e0830c10, +0x3e3c30c10, +0x31e2410c10, +0x1e0830400, +0x1c0820408, +0xc1c1830608, +0xc3c0870408, +0x81c0820400, +0x20c1820400, +0x3c0810204, +0x30a2820800, +0x1c2810400, +0x83c0810608, +0x3c0810204, +0x1c0810600, +0x3c4830608, +0xc041830400, +0x21c2820c10, +0x41020408, +0x1c1820810, +0x81c1830600, +0x1c2830408, +0x1e0830c10, +0x1c6810408, +0x1c0830408, +0x3c0810408, +0x23e0c30410, +0x1e0820820, +0x83c0c30418, +0x1c0c1870200, +0xc0c0820800, +0x7e0810408, +0x103c1830604, +0x1c1c1830608, +0x103c6c10204, +0xe3e4c1860c, +0x80c0820800, +0x61e6c10410, +0x3c0830408, +0x1e0830410, +0xc1e0820810, +0x31e6c30c10, +0x21c0820410, +0x1c2850204, +0x1e2830408, +0x60e1861800, +0x3c4830608, +0x3e0410410, +0x1c2850208, +0x81c0810408, +0x1c0c1878e00, +0x1c2810604, +0x101c0830400, +0x3e4830418, +0x1e3820c10, +0x23c0830408, +0x1c3c0810408, +0x21c0830408, +0x183c0810608, +0x1c2830408, +0x3c0830408, +0x1c4830408, +0x41c1870c10, +0x183c081060c, +0x380810204, +0x3c2830408, +0x41c0820408, +0x3e3c30200, +0x3c0810208, +0x1c0830408, +0x1e7810608, +0x21c0820418, +0x103e6c58204, +0x1c0810408, +0x1c2810204, +0x1c2830408, +0x1e2820c10, +0x1e6830410, +0x81c0830408, +0x23a0820810, +0x1e4830608, +0x1c2830418, +0x41e1820c10, +0xc160c30c10, +0x61c0830408, +0x61c1830408, +0x21e0830c30, +0xe3c1870408, +0x41c1870604, +0x43c0830408, +0x1e4830410, +0x41e0010c10, +0xc1c1830600, +0x61e2830410, +0x1e3830c10, +0xc0c0820408, +0x3e0830c10, +0x1c0810408, +0x61c3820c10, +0x3c0830604, +0x1f1e1830c18, +0x6140820800, +0x1c0810408, +0x23e4830408, +0x21e0820c10, +0x1e4810408, +0x61c5878408, +0x1e6830c10, +0x81c2830408, +0x20e2820c10, +0x3c2810204, +0x1c1820408, +0x3e4830418, +0x23e4810408, +0x1c0820408, +0x41c1820408, +0xe1c4830608, +0x3c0830408, +0x61c1820810, +0x6140820c10, +0x1c0c0810400, +0xc3c0830400, +0xc7e0c3060c, +0xe0861820, +0x7c0810304, +0xc1820810, +0x3c0830408, +0x3c0820408, +0x41c0820410, +0x1c0820c08, +0x41c1820408, +0x7c0810204, +0x1c2820408, +0xe041820408, +0xc1c0820408, +0x43c083060c, +0x3e0810408, +0x1c2820408, +0x81c2810608, +0x61e6830c10, +0x11c3820c10, +0x6120820810, +0x43c0810608, +0x3e0830c10, +0x41e6830400, +0x21c1020410, +0x1c0820408, +0x21c0870600, +0x3c6c58204, +0x4141860408, +0x1c0830400, +0x61c1879820, +0x3e4810608, +0x1c1820408, +0xc181030604, +0x1c1c0830408, +0xc3c0830408, +0x4041020400, +0x83c0830608, +0x60c1020400, +0x3e0c10608, +0x41c0820410, +0x1e0820418, +0x20c1820810, +0x3c2810608, +0x3e0830410, +0xe1c1870408, +0x3e0830408, +0x81e0830408, +0x81c0820400, +0x21c1820c10, +0xc0820418, +0x1c2830408, +0xc3c0830608, +0x3e0820c00, +0x6061860800, +0x1e0830c10, +0xc1c1c60c10, +0x43c0828500, +0x3e5830c18, +0xe3c1860c08, +0x63c1870600, +0xc040830c00, +0x3c0830408, +0x40c1020408, +0x3c0810418, +0x3e2c10608, +0x83e3c30608, +0x1c6830608, +0xc1c0830408, +0x3e3c30410, +0x41c2820408, +0xe1c3830608, +0x1c0820c10, +0x63c4830c10, +0x1c0830410, +0x4141860400, +0x1c2810410, +0x3c7810204, +0x1c0810408, +0x1c0810408, +0x63c0810608, +0x61c0830c10, +0x1c0810408, +0x1c0820400, +0x3e3830c00, +0x33e1820c10, +0x1e0820c00, +0x61c1020408, +0x21c1470400, +0xc1c3820408, +0x3c0810208, +0x83c0810608, +0x3e0830418, +0x1e4c30410, +0x3e4830604, +0x3c4830608, +0x21c1030400, +0x1c0820c10, +0x1e2820830, +0x1c040810410, +0x41c2810608, +0x8181830608, +0x1c0820400, +0xe3c0830608, +0x1e3c1870604, +0xe1e0820810, +0x3c0810608, +0xc3e0c30c10, +0x43c081020c, +0x11e6830c10, +0x43c0830408, +0xe2820800, +0x23c0830408, +0xc3c0830408, +0xc3e0830408, +0x41e6458608, +0xc1c4830408, +0x3c0810408, +0x1c0810204, +0x21e0820820, +0x21c1878c10, +0xe041020810, +0x1e7830418, +0x63c1830408, +0x3c4810204, +0x41e2830c10, +0xe0e0820810, +0x61c1860c00, +0x61e0831810, +0x4181070604, +0xc1e0830c10, +0x1e6c20820, +0x4041820800, +0x3c0830408, +0x1e3830c10, +0x4140820408, +0x1c2810204, +0x7c4c18204, +0xe0c0820408, +0x3c1c30400, +0x382838200, +0xc0c1820c00, +0x1c0810608, +0xc3c0830408, +0xc040820400, +0x11e0820c10, +0x23e0830418, +0x61c0830c10, +0x8080810400, +0xc3c0830408, +0x41c1820c10, +0xc140830408, +0x61c1820408, +0x1c6810204, +0x41c683060c, +0x61c1820408, +0x3e4830608, +0xe2820820, +0x3c0830408, +0x41c1c38410, +0x23c0810604, +0x61e0830c10, +0x3c0820410, +0x83c0810408, +0x1c7830408, +0x1e0c30c10, +0x63c1830408, +0x23c1830408, +0x7c0810204, +0x11e0820830, +0x3e3830400, +0xe141820820, +0x3c4810206, +0xe041820c10, +0x83e6830418, +0x1c3c3c78400, +0x381810204, +0x61c1870c00, +0x51e1861820, +0x1c040830408, +0x21c7830608, +0x1c0c1830400, +0x7c0810204, +0x80e1820c00, +0x1c0810208, +0x3c1830608, +0x1c0810408, +0x1c2810608, +0x43c0810204, +0x1c4810408, +0x61e0830c10, +0x3e0810410, +0x80c1820408, +0x1c1e0820c10, +0x23c1870600, +0x1c0830204, +0x3c0810604, +0x3e0410608, +0x1e1820c18, +0x1c2830408, +0xe240830408, +0x41c3830408, +0x1c0830408, +0x3e0830c10, +0x1c2870608, +0x1c0810604, +0x1e4c90410, +0x1c283060c, +0x3e0830408, +0x23c3830408, +0x3c0810608, +0x43e0830c10, +0xc3c0810608, +0x1e3e1c70608, +0xe1c1830408, +0x3c0820410, +0xc0c0820408, diff --git a/samples/digitrec/digitrec/data/training_set_8.dat b/samples/digitrec/digitrec/data/training_set_8.dat new file mode 100644 index 000000000..16b3330da --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_8.dat @@ -0,0 +1,1800 @@ +0x7163850e00, +0x6141030e00, +0xe1e1870e00, +0xe1c3858f00, +0x29f3860c10, +0xe1c1870e08, +0xc143858e00, +0x71e7860e18, +0x31c3850c00, +0xf1a3870c00, +0xf3e7c89f00, +0x71e3870e18, +0x60c1870c00, +0x7163860c00, +0x61e3830608, +0xc181c30600, +0xf1c60800, +0x60c1060c00, +0x60c1860c00, +0x71a1c71c00, +0x71e3830e00, +0x60c3050e00, +0x31e3861800, +0x4141820600, +0x71e3861e00, +0x30e30e0e00, +0x6147830c00, +0x31e38f1e00, +0x6143820408, +0x61c1820e00, +0x71e3870c00, +0x61e3870e10, +0x61e1c60c10, +0x60c1830600, +0x21b4c60c08, +0xe3c3c48f00, +0x20a2060c00, +0x6103e60c08, +0xc3e1858f0c, +0x71e3850e00, +0x60e18e1410, +0x61e1860c00, +0x4081830400, +0xc1c3870e00, +0xf367858e00, +0x7183c71c00, +0x71e1870c00, +0xe343878f00, +0x21e3860c10, +0x41e3070e08, +0x60c3830e08, +0xe141830e08, +0xf1638f0e00, +0x71e3861c10, +0x4181850e00, +0x61c1830e00, +0x61c1830e00, +0x4083840600, +0x61c307870c, +0x6183058f00, +0xc1c7830604, +0xf1e3870e08, +0x3161870e00, +0x11c3070e00, +0xf1e3cd9e00, +0x61f3870c00, +0xc1c3030200, +0x20c1860c00, +0x6163870c00, +0x71c1860c00, +0x61c1830c00, +0x30e3850e00, +0x61c3870e08, +0x6163870e00, +0x61e1860e00, +0xe3e3489f08, +0x61e3050e00, +0x30e1050c00, +0x61c3060e00, +0x60e1860c00, +0x60e1861c00, +0x7181870c00, +0xf303858e00, +0xc0c1850000, +0xc1c1830e00, +0x60c1850c10, +0x20e1861c00, +0xf323870e00, +0x31e7870e0c, +0xc163850e00, +0x6021860c00, +0x71e1060c00, +0x61c3870e00, +0x61e3870e00, +0x61c3060600, +0x7123861c00, +0x11e3820c00, +0x71e3860c00, +0x7123cd9300, +0x60c1030e00, +0x30e1060c10, +0x3e3c21c00, +0x61e3870e00, +0xc1e3070600, +0x61c1820c00, +0x6083c60c00, +0xa1c3058f00, +0x71238e1c00, +0xc141830600, +0x71e3060e18, +0x61c1870c00, +0x6143858e00, +0xe1c3030600, +0x21e3830c10, +0x60c1820c08, +0x60e3860c00, +0x3f3860c00, +0x6103c49100, +0x2081860c00, +0x6141870e08, +0x61e1820c00, +0xe243c8ce00, +0x6103848800, +0x61e7870e08, +0xc123870600, +0xe143860e00, +0x71e1860c00, +0x71b3c61c00, +0x31c3870c00, +0x61e3870c00, +0x30a1820c00, +0x71a3871a00, +0xf343869e00, +0xc1e2870600, +0x71e1871e10, +0xf103850e00, +0x61c1870c00, +0xc141870600, +0x50e1861c00, +0x70a1861c00, +0x71a38e1c00, +0xe1438c8f00, +0x61e3870e00, +0x6141060c00, +0x31a38f1c00, +0xf1a1870c10, +0x70e3061c00, +0x6081030400, +0x7081830c00, +0x61c3070600, +0x7163070c10, +0x6141850c00, +0xc141830600, +0xe263070f04, +0x7322c70c00, +0x60c1020c00, +0xc3c3820604, +0x61e1870e00, +0x1c1c1870e00, +0xd161830e00, +0xe343030600, +0xe343830600, +0xe1e1870e00, +0xc373c70e00, +0x61c1060e08, +0x61c1830e00, +0xc363878e00, +0x6143820c00, +0x71a3c99c00, +0x70b1861c00, +0x6146878608, +0x70c1851e00, +0xe3e3830600, +0xe1c1050e00, +0x61c3070e08, +0x6141820600, +0x6143878f00, +0xc141838600, +0x61c3060c00, +0x41c1830c00, +0xe1c3850e00, +0x6141030608, +0x8e38e1800, +0x68f1861c00, +0x30a38e1800, +0x30a1860c00, +0x61e3860c00, +0x31e3850e00, +0xe1e3830e08, +0x6041040a00, +0x70e3861c10, +0x71e1870c10, +0x6143060e00, +0xe3c3830e00, +0x60c3860c08, +0xe141870c00, +0x6123860c00, +0x4103830600, +0x61e1850e08, +0xe143858e08, +0xe1c3858f00, +0x6141870e00, +0xf3e3c70e00, +0x6141070c00, +0x6141030600, +0x6081820600, +0x50c1060c08, +0x11238e1c10, +0x60c3850c00, +0x30a1861c10, +0x60c1020400, +0x61e3830e08, +0x6143850c00, +0xe3e3838e00, +0xe0c1870e00, +0x71e3061e08, +0x61a18d1e00, +0x60c1050c00, +0xf323830e18, +0x71e3860c00, +0x70e1871c00, +0x61e1860c00, +0x6041020c00, +0x30e1870c00, +0x60e3860c00, +0xc183c28600, +0xe161870e08, +0xe121830c00, +0x61c3848e00, +0x7143820c00, +0x61c3860e00, +0x61e3c70c00, +0x61e1060c08, +0x61e30e0e00, +0xe1c3850e00, +0x60c1060c00, +0xc1c3830e00, +0x6081820c00, +0x61c1870e00, +0x61c1861c10, +0x60c1850e00, +0x612286060c, +0x61c1870e00, +0x4173851c00, +0x23e7860e00, +0xc1e3860e08, +0xf343078f00, +0x31e3861c10, +0x60c1850e00, +0x61c1070e00, +0xc3e3870e08, +0xc383c48f00, +0x1c2e7838f08, +0xe083070e00, +0x61e1871e10, +0xe1238d1a08, +0x61e3860c00, +0x61c1870e08, +0xb0a1870e00, +0x30a38a1c00, +0x71a2860c18, +0xf1e3870e08, +0x60e3870c00, +0x70c1870c00, +0xe143020c00, +0xf3c3078700, +0x60c3850e00, +0x7163860c00, +0x4143830600, +0x31c3870c00, +0x61c1020608, +0x7122820c00, +0xe1c1840a08, +0x71e3060c10, +0x30e1861c10, +0x6163820c10, +0x6021860c00, +0x61c3870e08, +0x71a1870c00, +0xc3c383060c, +0x6163860c08, +0x60e3860c00, +0x6163870e00, +0x6183850e00, +0x61e1820c00, +0xe123878e00, +0x1a3cf0800, +0x6181860c00, +0x70c1860c00, +0x91c1850c00, +0x20e3891c00, +0x71e1871e18, +0x50e3070e08, +0x60e38e0c00, +0x7121860c10, +0x31c1870c00, +0x61a3830c00, +0xf1e38f1e18, +0x30c1061c00, +0x61c3870600, +0x71e3860e00, +0x61c1870608, +0x73e3870e08, +0x61e1870c00, +0xe1861000, +0x30c3850e00, +0x71e1870c00, +0xc3e3878700, +0x4143020600, +0xe141870e04, +0x7161860c00, +0xc1a1870c00, +0x71e3860c10, +0x61e3860c08, +0xe183848f00, +0xe163850e00, +0x61c38c8e00, +0xc0c1830600, +0x71c1860c00, +0xc1e1870600, +0xe363870f08, +0x70e3870e00, +0x71e3870e00, +0xe3c3848f00, +0x71c1871c00, +0xb143030600, +0x1e321830e08, +0x61c3870e00, +0x6121850e00, +0x61c3850e00, +0x6062820e08, +0x6143850e08, +0x71f1861c00, +0x31e3871c00, +0x60e1860c00, +0x71e3850e08, +0xe161870e00, +0x71a3c49e10, +0x61a1870808, +0x2183c28e00, +0x70a3861800, +0x6123870a00, +0x60e1c60c00, +0x6163830600, +0x6141860c00, +0x60c1850e00, +0x61e1830e08, +0x6143850e00, +0x7101070600, +0x71e3870e00, +0xe1c3848e00, +0x7193c70e00, +0xf163858e08, +0x61e3c70c10, +0xc1c3850e00, +0xf323870e00, +0x71c3861c00, +0x6143448e00, +0x71e38f0e00, +0x7163830c10, +0x41c3830400, +0x71e3e71c10, +0x71e1870e08, +0x31e3860c00, +0xe1c3070e08, +0x6081c60c00, +0x61c1830e00, +0x61c1870c00, +0x6143850e00, +0xf121860c00, +0xe1e3870e08, +0x30e1871e00, +0x60c1870e00, +0x60c1871400, +0x7363870c00, +0x30e38e1c00, +0x71a1870c00, +0x60c1870c00, +0x30c1820c00, +0xe1c3848e00, +0xc14183060c, +0x3081ce1800, +0xe1c3830600, +0xf121850e00, +0xc1c3c48700, +0x71a3860c10, +0x60c1850e00, +0xe1c1070e00, +0xe3e3870e0c, +0x61c3070e00, +0xe141830e00, +0x6143070e08, +0xe3c3858e08, +0xe0c1870600, +0x6061870c00, +0x20c1870c00, +0x20e1870c18, +0x7143060c00, +0xe1e3870e00, +0x30e38e1800, +0x61e3ce0c00, +0x7143871e10, +0x4161860c00, +0x70e1861c10, +0x30e1870c00, +0x6143850e00, +0x60e3060e00, +0xe103c70e00, +0x40c1860400, +0xe141030608, +0x61c3070e00, +0x60e3060c00, +0x71e3860c00, +0x61c1871e00, +0x61c3820600, +0x1e363858f04, +0x60c1020c00, +0xe173870e08, +0x71e1860c00, +0x61c1870e00, +0x3041060c00, +0xe121870e00, +0x71e3870e08, +0x61c3060c00, +0x31e1070c18, +0x6181850e00, +0x1e38e0c00, +0x30c1060c10, +0xe143848e00, +0x71e3861c00, +0x6163850e00, +0xe247878300, +0xe247c48f00, +0xc3c1830e08, +0x60e1871c10, +0x41e1851c00, +0x70e1861c00, +0x30c1060c00, +0x61c3060c00, +0xe141870e00, +0x61c3070c00, +0x71e3ce1c00, +0x51e3070e08, +0x60e1820c08, +0xc143830600, +0x6163870e08, +0x60c1060c00, +0x70c1850e00, +0x1e1e1850e08, +0xe1c3850e08, +0x61e3870e08, +0x60c1860c00, +0x73e7860c10, +0xe1c3848e00, +0xc387c68600, +0xd1a3cf0c00, +0xc143838600, +0x40c1820400, +0xe141030600, +0x6141820c00, +0xe181050e00, +0x61c3870e00, +0x7163850e08, +0x60c1020400, +0x61c1050e00, +0xe123850e00, +0x61c3070e00, +0xe1c1870e08, +0x60c1830c00, +0x61c1860c00, +0x71c3870c10, +0x6141870e00, +0xe1c1850e00, +0x1f3870c00, +0x6123c60c00, +0x1f1c1878e00, +0x6181c70e00, +0x61c1820c00, +0x71e3860e08, +0xe143830e08, +0x61c1070c00, +0x61c3820400, +0x60c3860c18, +0x60c3020408, +0x41e1860c00, +0x60c1820c00, +0x1c321838f08, +0x71e1870c00, +0xe143830600, +0xc141870e00, +0x6101820600, +0x11e3871e00, +0x71e3c61c18, +0x60c1860c00, +0xe323c60c00, +0x1e3e7c78900, +0x61c1870e08, +0x6143060c00, +0x61c1030600, +0x30c1860c00, +0x71c1070e00, +0x31e3860e00, +0x61c3830c00, +0x61e3c70e18, +0x61e1860c00, +0xc1d3c70e00, +0x7163860c00, +0x61e3850e08, +0x71e3851e10, +0x6132c61c00, +0xe143c88f00, +0x71e1870c00, +0x1f3860c00, +0xc081010e00, +0x61c1870e08, +0x6101850e00, +0x41c3830a08, +0x10c1060c00, +0x6123c61c10, +0x71e3861c10, +0xf3e7cf8e00, +0x4143020600, +0x61c3870e00, +0x60e1861c10, +0xe1e1c51c00, +0x71a3860c00, +0xe3e3c70e08, +0xc141030600, +0x61c1830c00, +0x61c3030604, +0x61c3850e00, +0xe1c1870e08, +0x71e3860c00, +0x60c1870c00, +0x6143070e00, +0xc183030300, +0xe141870e00, +0x60c3850e00, +0x31e3858e00, +0x79a1861e10, +0x61c1850e00, +0x61e3860c00, +0x71e1871c00, +0x31e3870c00, +0xc141030a08, +0xc1c3030600, +0xf163870e08, +0x61a3860e00, +0xe143850e08, +0xf1338e1c00, +0xc0c1858e00, +0x60c1060c00, +0x71a3060e00, +0xc141830600, +0x61c1870c00, +0x61c1830e08, +0x63c3830e08, +0xf122870e08, +0x61e1830e00, +0x71e3860c10, +0xe263858700, +0xf3e7870f08, +0xd3e787870c, +0x6143820c00, +0xc1c1830600, +0x30c1870c00, +0xe3c7838704, +0x1f3c60c00, +0x7123850e00, +0x61e3870e00, +0x6141070a08, +0x6143850c00, +0x6101830e00, +0xd367870e00, +0xe3c3838700, +0x61c3070e00, +0x6923860c00, +0x70e1851e00, +0x4041820600, +0x71a3850c00, +0xc223878600, +0x60c1870e00, +0x30c1870c00, +0x71e1870e00, +0x6181030600, +0x6143851610, +0x7121860c10, +0x61c1050a08, +0x60c1060c00, +0x60c1030e00, +0x71a1861c10, +0xf1c1030c00, +0x71a14e1c00, +0xe163870e00, +0x71e1870c18, +0xe1e1850e00, +0x30e1860c00, +0x30c3020c00, +0xe102c30600, +0xe141030e08, +0x60c1020c00, +0x61c3870e00, +0x61e2c61800, +0x61c3870e00, +0x61c3830e08, +0xf1f3851e00, +0x60e1840c00, +0x61c3860c00, +0xe1c1850c00, +0x71e38f1e10, +0x40e1020c00, +0x31e3850e00, +0x31c3030a08, +0x61c3030600, +0x4141820600, +0xc1c3070600, +0x71e1871c00, +0x7163861c10, +0x60e1860c00, +0x6143070600, +0x61e3860e08, +0x71e3c61c30, +0xe141870e08, +0x71e2860c10, +0x70a1cd1e00, +0x71a3860e10, +0x71e3c71e00, +0xe043830e00, +0x1c2c3878700, +0x4163030c00, +0x6123861c10, +0x10c3861c00, +0x60c1830c00, +0x60c1820c00, +0x6161870c00, +0x7081860c00, +0x31c3030408, +0xf1e3850e08, +0x60a3ce1c00, +0xc3e7838704, +0x6161830e00, +0xf3c3070702, +0x71e3871c00, +0x30e1861800, +0xc141060e00, +0xc3c3030700, +0x60a3ca1c00, +0x60a1861c10, +0xe141030600, +0x6143850c00, +0xe141830600, +0x5163861c10, +0xe1e1870e08, +0x2081020c00, +0x71238f1e00, +0x61e3860c08, +0x61f38f1c00, +0x61c1870e00, +0xe1c1c70608, +0xe3438c8f00, +0x6161870e00, +0x60e1870e00, +0x61c3c68e08, +0x61e3c30430, +0x71e3850e00, +0xc143860e08, +0x61c1850e00, +0x61c1860e00, +0x61c1030e00, +0xc0c1830c00, +0xe143850e00, +0xe3e3850f0c, +0x63e7870608, +0x71818f1c00, +0xe163860e00, +0x60a1820c00, +0x61c3870600, +0x6373860e00, +0x30a1860c00, +0x61c1030600, +0xc183850700, +0x51c1820c00, +0xc363070e00, +0x2123850e00, +0x6163820c08, +0x61c3851e00, +0x60e1860c00, +0xf3c1830600, +0x71e1861c10, +0x41e3870e08, +0x6141850c00, +0x73c3830e00, +0xe1c3050e00, +0x7303c71c00, +0x6103030600, +0x71c3060c00, +0x70e38f1c10, +0x71e3871c10, +0x71e38d1c10, +0xc343830600, +0x61c3830600, +0x61c1870c00, +0xe123870e00, +0x61c1030600, +0xe161830e00, +0xc1c3870e00, +0xe1c3830600, +0x6143860e08, +0xc1e3c60c10, +0x60c3050e00, +0x60c1820c00, +0x61c3820c00, +0xe3e3870e08, +0x71b3c61c10, +0x71c1070c00, +0x61a3860c00, +0xe227c89f00, +0x6141070e00, +0x60e1860c00, +0xf1e3870e00, +0x41c1870c00, +0x3e7848f00, +0x6143830600, +0xe161870e00, +0x61e3850e00, +0xe1c1850e08, +0xe3e3c70e00, +0x71a1860c10, +0x30e18e1800, +0x6143050e00, +0x10e1061800, +0x70e1871c10, +0x1e3c61c00, +0x61c3870e00, +0x20c3830c00, +0xc1c1850e00, +0xe347878f00, +0x61c3030600, +0x6367838700, +0x6143820c00, +0xe343850e00, +0x6103830600, +0x30a3860c00, +0x23e7c78e00, +0x6163870e00, +0x33e1861410, +0x30e1861c10, +0x61c3810e00, +0x61c3c48f00, +0x60e1870c00, +0xe141830600, +0x61c3870e00, +0xe143870e00, +0xe161860c00, +0x31e3870e08, +0xe1c1850c10, +0x71e3cf9e00, +0xe363870f00, +0x61c1870e08, +0xf163870e08, +0x71e38f1c00, +0xe181850600, +0xc3c7c68700, +0x61c1860c00, +0x6161820e08, +0x183c3878700, +0x70a3860c00, +0x6141830e00, +0x4141830400, +0x40c1070600, +0xf123cc9e00, +0xe1038d0e00, +0x3e3c20c00, +0xe123830600, +0xc1c3830700, +0x6143870c00, +0x71e3860c00, +0x60c1070e00, +0x6163c70c00, +0xe1861800, +0x8180830600, +0x60c3850a08, +0x1e303830704, +0x61c3070e00, +0x6163820608, +0x61e3870e00, +0x4161860c00, +0x71b38e1c00, +0x71e3861c00, +0xe7ca1800, +0x71e3870e00, +0xe0e1870e00, +0x61e1870c10, +0x70c1060c00, +0x61e18f1c00, +0x63c3830608, +0x6141820c08, +0x60a3ca1800, +0xe1c1830600, +0x71a3860c10, +0xe163c60c00, +0x60c1870c00, +0x70c1860c10, +0x61c3070604, +0x20a3860c00, +0x8e3860800, +0xe0c3850d04, +0x4113850c00, +0x61c3850e00, +0xe161070a08, +0x21c1870e08, +0x60e1860c10, +0x2081850c00, +0x6343030608, +0xe141070e00, +0x6141870e08, +0x30a3860c10, +0xe163030e08, +0xe143828e00, +0x61c3860400, +0x7103820e00, +0x1c363858700, +0x4041070600, +0xe123830e08, +0x7141060c00, +0xe1c1870e08, +0x61c1870e00, +0x71c1870e00, +0xe223830e08, +0xe141870e00, +0x31e3860c00, +0xe143848e00, +0xc181070400, +0x21e3cf0800, +0x61e7860c00, +0x71e3861c10, +0x71e3871e10, +0x31e1871c00, +0x1e3e7cf9e00, +0x61c1870c00, +0xe1c3848e00, +0x71e3860e00, +0x71e1850e00, +0xe023860c00, +0x61e1820e00, +0x30a2820c00, +0xe247c89b08, +0x51e3060c00, +0xc141830e08, +0x71c1870c00, +0x71e1830e00, +0x7123861c00, +0x6081850c00, +0xe1c7cc8e00, +0x60c1860c00, +0x61c1030600, +0x6163860e00, +0x61c1830c00, +0x6143830e00, +0x6143820400, +0x71a1870e00, +0x61c3830e08, +0x7081850e00, +0x6143870e00, +0xd163850e00, +0x60c1860c00, +0xe143858e00, +0x1071c3879e00, +0xc3f3c51e00, +0x61c1830c00, +0x6143848f08, +0x6141830e00, +0x71a1861c10, +0xe103c60c00, +0x71e1060c00, +0x71e3870e00, +0x71a3851e10, +0x20c3020600, +0xc3c3830704, +0x6123830c00, +0xe1c1830e00, +0x50b3ce1800, +0xc161870e00, +0xc143c60608, +0x11c3070600, +0xe1e3070e0c, +0x43c3830208, +0x6163860e00, +0x30c1861c00, +0x49e3860e00, +0x61c1870e00, +0x70a1861c00, +0x61c3858e00, +0xc143850600, +0xe343808f00, +0x31e3861c00, +0x61c38d9e00, +0xe1c1870e00, +0xe103830600, +0xe363870e08, +0xe1c3c48e00, +0x61c3070e08, +0x61e3c70c00, +0x71e3850e10, +0xc0c1870c00, +0x71a18e1c00, +0x20e3840820, +0xe343870f08, +0x6141070e00, +0xc0c1050c00, +0xf1e1870e18, +0x61e3860a00, +0x20c1060c00, +0x7123cf0e00, +0xc143830600, +0x6143868f00, +0xe3c3870e00, +0x83b3c30608, +0x61c3070e00, +0x6143850e00, +0x71a1860c00, +0xe163870e00, +0x20c1060c00, +0x19e3060c00, +0x70e38d0e00, +0xe1c3870e08, +0x4143c30600, +0x30e1861c10, +0xf3e3c70e08, +0x71e3871e00, +0x61e3878e08, +0x6143848e00, +0xe143868f00, +0x61e1870c00, +0x30e1861410, +0x61c3870c00, +0x7123c61c10, +0x61c1850e00, +0xe143858700, +0x61c3c49e10, +0x71a3c58e00, +0x61c3850e00, +0x60c1870e00, +0xe1c3c48f00, +0xe081848e00, +0x41c1830e08, +0x61c1870e00, +0xf1e1860c00, +0x6141060e08, +0xe3e3870e08, +0x61e3070e00, +0x60c3870c00, +0x49e30f0c00, +0x6141850e08, +0x61c1830c00, +0xc183830600, +0xf367870f08, +0x60c1050e00, +0x61c1070e00, +0x7123860c00, +0x4141810600, +0x61c3830a08, +0x60c1060c00, +0xf327c58f08, +0x70a1870c00, +0x41c3830600, +0x61c3050600, +0x21a2860c00, +0x2143860c00, +0xe141850e00, +0x7081871c00, +0x6163c68e00, +0x61c3870c00, +0xe121060400, +0x70e1860c00, +0x1c3c7c78700, +0xc1c3820608, +0x7143870e00, +0x6143820e00, +0x71e3860c00, +0x61e1830c00, +0x71e3050c00, +0x60e1860c00, +0x61c383060c, +0x31e3861c00, +0x6181050c00, +0xe3870c00, +0xe383878704, +0xe143c48f08, +0x71e3871e08, +0x3081860c00, +0xe143c48e00, +0x60c1870c00, +0x61c6820e00, +0x60c1860c00, +0x30e1861c00, +0x6123860c00, +0x41c1860600, +0x41e3830608, +0xe3e3c78e00, +0xe3c3870e00, +0xe183070f00, +0x6081860c00, +0x51e3860e08, +0x6101060c00, +0x7143070e00, +0x41e7870e00, +0x60e1860c00, +0x6161830c00, +0x6163c78f00, +0x61c3070600, +0xe263870f04, +0x61e18e0c00, +0x20e3841010, +0x41e3870e08, +0x61c1870e00, +0xc163830600, +0xe247c48f00, +0x60c3870e00, +0x61c1830c00, +0xe3c1878600, +0x61e3870e00, +0x11e3860810, +0x43c3830e08, +0x61e3830c00, +0xf123cd1e00, +0xe143848e00, +0xf3e3871e08, +0xe3c3830600, +0xe1e1850e08, +0x71c1850e00, +0x60e3850c00, +0xe181030600, +0x31e3860c00, +0xe367c71900, +0x60a3861c00, +0x7081870c00, +0x61c3870e00, +0x71e3870e08, +0xc1e3070e00, +0x61c1870c00, +0x6143030600, +0x41e3860c00, +0xe1e3860e08, +0x40c1870400, +0x11e3871c10, +0x7143858e00, +0x60a1851c00, +0x71e1871c00, +0x70a1871c00, +0x71e2861c00, +0x70b1871c00, +0x60c1820c00, +0x4121860c10, +0xc3a1c70e08, +0xf163070e00, +0xc183030700, +0x61a3860c10, +0xe141870c00, +0xf123859e10, +0x60c1830e00, +0x30a38e1c00, +0x61e7cf0c00, +0x60a1c60c00, +0x60c1870c00, +0x30e38e1c00, +0x60e1870e00, +0x60e1861800, +0x6081820a00, +0x6143030700, +0x20c3850a00, +0x61e3860e08, +0xe1c1848f04, +0xf3e687060c, +0x70e3871600, +0xe3c3878e00, +0x71e1861c10, +0xe181830e00, +0x61c3070e00, +0x21c3030600, +0x60e3860c00, +0x21a5430c00, +0x6343c58e00, +0x71c3820c00, +0x60c3060e00, +0xe141050e00, +0x6141870c00, +0x6161870e08, +0x60e3860c10, +0xe1c1850600, +0x61c1870c00, +0x21c2830600, +0x60c1820400, +0x60c1070e00, +0x6141870e00, +0x49e1871c00, +0xe143870600, +0x71e3870c00, +0xe1c3830600, +0x61e3830e00, +0x60c1860c00, +0x31e3860c00, +0xc141830600, +0x7163871e00, +0xe143848e00, +0x6121860e08, +0x70e1870e00, +0x6143830e00, +0xe103c70e00, +0x71f3861c00, +0xe123840e00, +0xe1e3470c18, +0x61c2870600, +0x51a2ce0800, +0x4187830604, +0xe1e3830608, +0x63c7830e00, +0x6143870e00, +0x70e1860c10, +0x61e3cf1e00, +0x61c1850c00, +0x31a1870a10, +0x8141830600, +0x6143870e08, +0x31e3860c00, +0x60c1850e00, +0x61c1870e00, +0x6103860800, +0x71a3cd0e00, +0xe343cc9f00, +0x60e1860c18, +0x6161020c18, +0x60c1850e00, +0x61c3849200, +0x60c1020c00, +0x60c1020400, +0x71a1871e10, +0x61e1871c00, +0x6141820c00, +0xe1c1830608, +0x71e1871c00, +0xe181010600, +0xe123870e08, +0xe141c60c00, +0x6163870e00, +0x1e7c60800, +0x61c3850e00, +0x60c1850c00, +0xe143848e00, +0xf0e3870c00, +0x71a3871c00, +0xe141030600, +0x2243878e00, +0xe1c3c78f00, +0xe163870e08, +0x4143830600, +0xc143830600, +0xc043848f00, +0x4183830600, +0x61c3060e08, +0x6163820e08, +0x61e3859e10, +0x61c1870e00, +0x70e1060c00, +0x31a38f1e00, +0x61c3870600, +0xc1e3870e00, +0xe103c30f04, +0xe143428e08, +0x71a3871a00, +0x61c1830e00, +0x61e3870c00, +0x41c1850e00, +0x7143861c00, +0x31a3860c08, +0x6363850e00, +0x6141870e08, +0x41c1830400, +0x30a1861c10, +0x61c1830600, +0x6043860c00, +0xe323870f00, +0x71a3871c10, +0xe143870f00, +0x6141030e00, +0x21c1030c00, +0x60c1870c00, +0xc161870e08, +0xe143c50c00, +0xe022c60c00, +0x71a3860e08, +0xc143850e00, +0x61e3850c00, +0xc1c1830600, +0x141030c00, +0x1e3c61800, +0x70a1861c00, +0xc143848e00, +0x30e3871c00, +0x31e3860c00, +0x70e1861400, +0x21c3050e00, +0x71e3ce1c00, +0x61a1870e00, +0xe141830c08, +0x6141030600, +0x60e1871c10, +0xe147870a08, +0x70a38f0e00, +0xe1c3850f04, +0x6141030c00, +0xe143830600, +0xf1861800, +0x6143030a00, +0x71c1060e00, +0x60c3830400, +0x1c3c3878700, +0x6141070608, +0xe163c70e00, +0xe243858f04, +0xc343030600, +0x71c3870c00, +0xe3e7c78e00, +0x61c3878e00, +0x71c1860c00, +0x6163060e08, +0x60c3c89e00, +0x60e1070e00, +0xc347830600, +0xc1c183060c, +0x6081010400, +0x60e1860c00, +0x60c1870c00, +0x31e38e1800, +0x30e18e1800, +0x6183050e00, +0x41c1830600, +0x21e1c71e18, +0x21c3860e00, +0x70a3861c00, +0x20c1860c00, +0xe1c3858e00, +0xc1c1030600, +0x70a38e1c10, +0x7121860c00, +0xe133870e00, +0x6041020c00, +0x1c3c3c78704, +0x60c1850e00, +0x61c1820c00, +0x6141030c00, +0x71e1871c00, +0x6143868d00, +0xe0c1050e00, +0x40c1030600, +0x20c1020c00, +0x51c3060c00, +0xe163870600, +0x70a38e1c00, +0x2183830e00, +0x60c1060c00, +0x61c1830e00, +0x7061860c00, +0x61e1870e00, +0x61e3850e08, +0x71e3861c00, +0x60c1050e00, +0x61c3870c00, +0xf163c48f00, +0xe1c1850e00, +0x6143870e00, +0x61c3830e00, +0x31e3861c00, +0x7143060e00, +0x71c3860c00, +0x61c1830600, +0xf1e3851e00, +0x30c3060e00, +0x60c1050e00, +0xc143870600, +0x6161050c00, +0xe1860c10, +0xc1e3870e00, +0xc141830600, +0xe1c1858e08, +0x61e3870e08, +0xc1c3070600, +0x20c1070e00, +0x61c38f0e00, +0x60c1860c00, +0x60c1830e08, +0xe3e3830e18, +0x61c3050e00, +0x2083030600, +0x60e1061c10, +0x6143c60c10, +0xe1e1870e00, +0x60c1851e00, +0xe3e7c60e00, +0xe1a1c20c00, +0xc341828600, +0x61c1820e00, +0xe141830600, +0x4141010e00, +0x6163860c00, +0xc1c1870600, +0x71a3cc1800, +0xe141870c00, +0x30c1860c00, +0x61c3870e08, +0x7121871e10, +0xc163860c00, +0x71c1871c00, +0x7163861c10, +0xf1e3c70e18, +0x7141870e00, +0x7081850c00, +0x60c1860c00, +0xe367830e00, +0x71c3870e00, +0xe14383070c, +0x30e3861c00, +0x70c1060c00, +0x7141861e00, +0x60c1030c00, +0xc181850600, +0x71e1060c10, +0x30e1871c00, +0x6141820c00, +0x4143830408, +0x71c3051e00, +0xe1c1851e08, +0x63e7858f08, +0x61c1020c00, +0x31a1861400, +0x60c1870400, +0x71e3060c08, +0xe143870e00, +0x6132c61c00, +0x6103c78e00, +0xe1c3870e00, +0x6142020e00, +0xe163870e08, +0x6141830e08, +0x61c3860e00, +0xe101830e00, +0x6081020600, +0x6163860c00, +0x21c3070e00, +0x61c3070e00, +0x61e3c61c10, +0x6143870e00, +0x4143820e08, +0x60a38e0e08, +0x1f3c60c00, +0xa3c3830600, +0x70e1860c00, +0x30a1861800, +0x7123860c00, +0xf1e3c78e00, +0x6103860c00, +0xe3e1870e08, +0x4143830400, +0x41f1870e00, +0x71e1871c00, +0xe3e7cf8f08, +0xe123870e00, +0x60a3c61c10, +0x6143050a08, +0x6143860c00, +0xe143830608, +0x70a1870e00, +0x61c3870e08, +0xe1e38f1e00, +0xe3c3878f04, +0x60e3860c00, +0x61c1070e08, +0x30c1060c00, +0x6147870e00, +0xe163870a08, +0x71c3830e00, +0xe123870e18, +0x71e3870600, +0x71e38f1e10, +0xe143848e00, +0x20f1860c00, +0x61e3860c00, +0x61e3871e00, +0x30e38e1c00, +0x6143848f00, +0x31e3861c10, +0x6143850e00, +0xe103050600, +0xf1e3870e18, +0x61c3850e00, +0xe1e3878e00, +0xe123848f08, +0xe163860c00, +0x60c1060c00, +0x1e7870c00, +0x61c3830e00, +0x71e3860c00, +0xe1e3830e00, +0x61e3851e00, +0x6181870e00, +0xe363850e00, +0x70c1870c00, +0x6143870e00, +0x31c3060e00, +0x60c1850e00, +0x61c1830e00, +0x7163830e00, +0x60e1c20c10, +0x60e1860c00, +0x61c3870e00, +0x61c1870e08, +0x20e1860c18, +0xd3c383070c, +0x71e1851c00, +0xf1f3070c00, +0xe163870e08, +0x6143060c00, +0x1e3861800, +0xe1c1850e00, +0xe143870e00, +0x1e243c48f00, +0xe1e3070e08, +0x61c1050e00, +0x60e1860c00, +0x1e1c50c00, +0x61c3830e10, +0x71e1860c00, +0x61c1870e00, +0x60c1860c00, +0x71e1851e00, +0x61c1850c00, +0x71e1861c10, +0xa3c3830700, +0x40c3850c00, +0xe303c70c00, +0x61e3c68f08, +0xc3e383060c, +0xe123830e08, +0x4141070e00, +0x6323830608, +0x1e3c3830704, +0x60c3850e00, +0xe163870600, +0x60c1c70c00, +0x30e3861400, +0x61e1870c00, +0x63e7850e08, +0xe163858e08, +0x6143838e00, +0x71e3870e18, +0xe0c3850c00, +0x61c1020600, +0x71e38f0e00, +0xe3c1830608, +0xe0a38d1e10, +0x61c3870e00, +0x20a3861c00, +0x6163070e00, +0x61e1860c00, +0x73e3871e08, +0xf341830e08, +0x71f3c71e18, +0x60e1060c00, +0x39a3070c00, +0x6163860c00, +0x6163830e00, +0x71e38d0c00, +0x6183030600, +0x61c3820c00, +0x6141870e08, +0x60a3861800, +0x71e3870e00, +0x71c3861c00, +0x71c1870c00, +0x70a1c61c10, +0xc123050f00, +0x71e7c89e00, +0x70e38f1c00, +0x7123860e00, +0xc143030600, +0x61c3060c00, +0xe347848f08, +0x31e3871c10, +0x70c1870c00, +0x41e3820c08, +0xe143830e08, +0x6163870e00, +0x61a3870c00, +0xe1c3870e00, +0x7163871c00, +0x6143830e08, +0x41e3060c00, +0x2041020a00, +0x60c3851e10, +0xc143830700, +0xf363870e08, +0x6163870a00, +0x71e3050e00, +0x4143870400, +0x31a3861c10, +0xc181070700, +0x43e3870e08, +0xe141050a00, +0x20c3060c00, +0x60c1860c00, +0x71a18f1c00, +0xe1c1030600, +0xe143870e08, +0xc347c68f04, +0x60c1050e00, +0x60c1820c00, +0xe143858e00, +0x41e3870e00, +0x4143830600, +0xe143830e10, +0xe1c3850e00, +0xc1c1870e00, +0xe1c1850e00, +0x4163870c00, +0xe143848f00, +0x61e1870e00, +0xe3a7c48e08, +0xc1c1870600, +0x7123871e10, +0x61e1870e00, +0x30c1020c00, +0x60c1820c00, +0x70c1070e00, +0x21c3820608, +0x1c101878f00, +0x7163860e00, +0xc161850a00, +0x70a1c61c00, +0x61c1050e00, +0x70e1060c00, +0x61c3848e00, +0xc141030600, +0x70e1820c00, +0x11c3860c00, +0xe141870c00, +0x51e3860c00, +0x21c3860c08, +0xe1e3870e00, +0x61e3c70e00, +0xf3c3060e00, +0x6181850c00, +0xc3e3c58e00, +0x61e3870e00, +0xc1c1850a0c, +0x4143870e00, +0xe143830e00, +0x6383050700, +0xf1e3871e00, +0xe1c3c48f00, +0x71c3020c00, +0x1c5c60000, +0x1367838704, +0xe163cc8e00, +0x6081030600, +0x10e2861820, +0xe143820c00, +0x30e1861c00, +0x6161870e00, +0x7363851e10, +0xc343838700, +0xe141830e08, +0x6141020600, +0x60c1870c00, +0x6141030c00, +0x7141850e00, +0x20c1060c00, +0x61c3830e10, +0x71c3070e00, +0x70c1061c00, +0x70a3c61800, +0xe1e1860c00, +0xe103820600, +0x61e1860c00, +0x61e3870e00, +0x61c1870e08, +0x71a3870c00, +0x6163820600, +0x6081810400, +0x6081870c00, +0xc1e1870e08, +0x70e1060c00, +0x7163860c10, +0xf363830600, +0x6143830608, +0x60c1860c00, +0x60e1870c00, +0x61e38f1e00, +0xe30c0000, +0x60e1870c10, +0xf1e1871e10, +0x1e7870c00, +0xe283828600, +0x7143060e08, +0x21c3830e00, +0xe143830600, +0xe1e3c60c00, +0x61e3820c00, +0x71a1870c00, +0x71c3860c00, +0xc1e3870e08, +0x20e3061800, +0x6143820e08, +0xe163870e08, +0xf3e3870e00, +0x20e3c60c10, +0x7143c28e00, +0x60a1870c00, +0x60e1c61c00, +0xe1061800, +0x71c3870e08, +0xe163870e08, +0x71e1870e08, +0x30e1861c00, +0x31e3860c18, +0x6141830400, +0xe1c3050600, +0x60c1050e08, +0x31a3860c10, +0x1c363878700, +0x6141830400, +0x30e1860810, +0x7103860c10, +0xe243c88f00, +0xe0c1870e00, +0x71e3860e00, +0x71e3060c00, +0x4143870600, +0xe141870e00, +0x20c1860c00, +0x31c1070c00, +0x31a3860c00, +0x71e3870c00, +0x71a1870e08, +0x61e1870e08, +0x6143050a08, +0x60c1851e00, +0x61c3870e00, +0x31c3820c00, +0x6163c48e00, +0xe141870e00, +0xe343849318, +0xe1e18f1e00, +0xe1e1870e00, +0xc3c3c48f00, +0xe3e3870f00, +0x60c3891e00, +0x60c1030c00, +0x6143830e00, +0xe143870e00, +0x61e3c59c00, +0x30e18e1c10, +0x71a3861c00, +0x61e3860e08, +0xe1e1870e08, +0x41c1050a08, +0x6143850a04, +0x61c3850c00, +0x61e3c61c10, +0x61c1850e08, +0x51e1861c00, +0xe163c89e00, +0x60c1060c00, +0x8141860c00, +0xe363078d0c, +0x71e3861e10, +0xe3c3030604, +0x61e1871c10, +0x61e3060c00, +0x71a3870e00, +0x41e7838608, +0x60c3850e00, +0x61e3860c00, +0x61c3850e00, +0x4143830600, +0x61c3860e08, +0x5121870c00, +0x60e1060c08, +0x61e3c71e10, +0xe163870e00, +0x6161830e08, +0xc1c3830600, +0x71c1830c00, +0x60a3860c00, +0x61c1870e00, +0x40e1830c00, +0xe267838600, +0x71a3860e00, +0x31e1861800, +0x60c1020c18, +0x70c1871c00, +0x21e2860c00, +0x6161850a08, +0xc1e1830e08, +0x6163860c00, +0x7123820a14, +0x60c1070e00, +0x70e1860c00, +0x6141050e00, +0x61c3830e08, +0xe1c1830600, +0xc143050600, +0xe3e3878e00, +0x1e1ce1800, +0x71c1850e00, +0xe141030600, +0xf143870e00, +0x61e3ce0c00, +0x1f7870e00, +0x61c3870600, +0x6121860c00, +0x6143820600, +0xe343870e08, +0x61e3cf1e18, +0x41c1070600, +0xe1c1830e00, +0x30618e1c00, +0xf1c1850e08, +0xf1e3870e00, +0xe1861800, +0x7163870e00, +0xe3860800, +0x7163060c00, +0x71e3870c10, +0x21c3830400, +0x4143820600, +0x61e3850e08, +0x61e38d0e00, +0x6163860e08, +0x70a3ce1c00, +0x60a1861c00, +0xd1c1050e00, +0x61e3c70e00, +0x7081060c00, +0xe141870e00, +0x60c1060c00, +0x60c1060c00, +0x1a3860800, +0xe3e3830a0c, +0x40c1850e00, +0x61c1870e00, +0xe3e3870e08, +0x6141830c00, +0x31e2861c10, +0x61c3850e00, +0xe163830e08, +0xc1c0448e00, +0x61c1870e00, +0xe343830e00, +0x6141020608, +0x61e2860c10, +0x61f2cf1c18, +0xe1c3c78e00, +0x6143870e00, +0x31e3860c30, +0x6081820c00, +0xe163830600, +0x70c3871c10, +0x61e3870e00, +0xe3e3c70604, +0xe3c3878f08, +0x6163078e08, +0xe1e3cc8f0c, +0x6143830e08, +0x6143830608, +0x61c3830e00, +0x50c1860c00, +0xe1c1848f00, +0x71e3871e10, +0x40c1850e00, +0x11e3860c00, +0x1c343838700, +0xe3e3860e0c, +0x63e786060c, +0x71e1860c00, +0x6081c70c00, +0x6143870e00, +0x60c1070c10, +0x61c3030600, +0x61c1850e00, +0x6143820a08, +0xe163870e00, +0x61c1030600, +0x60c1060c00, +0xe141870e08, +0x61e3c60c08, +0x71e3861e10, +0x70c1050e10, +0x61c3870e00, +0xc103870600, +0x31a3860c00, diff --git a/samples/digitrec/digitrec/data/training_set_9.dat b/samples/digitrec/digitrec/data/training_set_9.dat new file mode 100644 index 000000000..344dc48d9 --- /dev/null +++ b/samples/digitrec/digitrec/data/training_set_9.dat @@ -0,0 +1,1800 @@ +0x1c7870408, +0x31e7860408, +0xe367c08e00, +0x40c3870600, +0x1e5cf0c10, +0x43c4870204, +0x63e4878204, +0x41c7870608, +0x41c3830408, +0x1e3820410, +0x61e7870408, +0x21c3830c10, +0x61e1820c10, +0x1c2870204, +0x21c7c20800, +0xc142850e00, +0x61c1810408, +0xc3820408, +0x146810200, +0x61e7830c10, +0x40c1830410, +0x41c3830408, +0x4143820408, +0x61c3820408, +0x1c2810408, +0x21e3830c10, +0x43c4830100, +0xe3820830, +0xe1c1848e00, +0x1e3830408, +0x61c6870204, +0x61c3830608, +0x21e3870408, +0x41c7c50608, +0xc3820410, +0x1c5c68080, +0x41c1820408, +0x4144850200, +0x21e7cd0408, +0x41c4870404, +0x41c3c30608, +0x21c3870408, +0x41c3870200, +0x1c3810604, +0x61e3870400, +0x20c3820810, +0x41c3830408, +0x41c2870204, +0x1c3850204, +0x1e7860820, +0x41c3820408, +0x83820408, +0x4143810408, +0x21c3820408, +0x2183810400, +0x1c2870600, +0x6163820408, +0x61c2870608, +0x61c1820400, +0x1c3030408, +0x1c1820810, +0x6125c70c10, +0x4102870200, +0x61e7870604, +0x21c7870608, +0x71e7c70418, +0x6143820408, +0x31e5870408, +0x41c3850200, +0xc146870200, +0x2183820c10, +0x4143020400, +0x4143810200, +0x4143820408, +0x6143830604, +0x41e5870208, +0x21c3830410, +0x6147870408, +0x20c3830400, +0xc3820400, +0x6147870408, +0x20e3830410, +0x41c7850200, +0x41c3830408, +0x41c3810204, +0x1c3820408, +0x1e3820408, +0xc3830204, +0x2143820810, +0xc3a7c18304, +0x4144850200, +0x83c3810204, +0x1c2870408, +0x43c7c70604, +0x1c3810408, +0x183810204, +0xe14287020c, +0xc3e6c78300, +0x6166870408, +0x21e7870410, +0x4145850204, +0xc1c6870208, +0x6143820400, +0x1c2860400, +0x1e6c70204, +0x20c2860800, +0xc3820c10, +0x4143810604, +0x6183830400, +0x43c6870204, +0x61e7c70600, +0x61c3870400, +0x1e3830410, +0x1c6868102, +0x41c3850604, +0x21c3820408, +0xc244c70200, +0x30a3820800, +0x41c3830608, +0x41c3820408, +0x21a3820800, +0x20c1820c10, +0x1c5870204, +0x1c7850608, +0x60c3820400, +0x41c3830208, +0x20c3820408, +0x4147858608, +0x61c3870408, +0x4143850408, +0x1c7850204, +0xc142870200, +0xc141810e00, +0x3c4ce8102, +0x61e7cf8600, +0x21e4cf0408, +0x61c3820408, +0x61c3820400, +0x43c7870604, +0xc143850200, +0x61c3830600, +0x41c7c70400, +0x4142870204, +0x143810200, +0x61c3830608, +0x1c3830408, +0x1c3810204, +0x40c1820408, +0x1c244c70300, +0x43644f8100, +0x1c2870204, +0x1e3830400, +0x1c3830000, +0x61e1831c00, +0x61c3810208, +0x1c3810400, +0x20c3820400, +0x61e78f0600, +0x41c7850204, +0x21a3820410, +0x6183870200, +0xc1c3830204, +0x1c58f0204, +0x61c3820c00, +0x4143060200, +0x61c3830400, +0x41c3830400, +0xc3810200, +0x40c1820400, +0x21c3830408, +0x4143810600, +0xc2044fc080, +0x41c3830410, +0x21c7870408, +0x63e58f0408, +0x41e7830608, +0x20c3810400, +0x1c3810204, +0x61e7c50204, +0xc3820408, +0x41c3810608, +0x1c3830204, +0x4142860400, +0x41c7870408, +0xc245850200, +0x61c3830408, +0x4102810204, +0x61c3810608, +0x4143830600, +0x4143810600, +0x21c3850408, +0x41e3830600, +0x21c2870200, +0x21e2870410, +0x1c7c70204, +0x61e3870408, +0x41c3870208, +0x20c1820820, +0x41c3830604, +0x183830604, +0x41e3830c10, +0x1c3850204, +0xc343810204, +0x6166870600, +0x21c3c10410, +0x61c3870408, +0xc3860408, +0x1c3810408, +0x21e3830c10, +0x4142810600, +0x20c3860810, +0x1c3830408, +0x71e3c70c10, +0x187810204, +0x61c3810608, +0x83c7830204, +0x43e7c10608, +0x4143870400, +0x1c7850204, +0x183810204, +0x21e7c70204, +0x4143810e00, +0xc344870200, +0x1c7870608, +0x61c7870408, +0x43c7c38300, +0x41c3870204, +0x41c7810204, +0x41c3810204, +0x41c3830408, +0x1e3830408, +0x61c3820400, +0x61e3830410, +0x6106c70400, +0xc2860400, +0x6144870200, +0x143830408, +0x6167870400, +0x61c2870400, +0x61c3810608, +0x2184c70410, +0x3c7810204, +0x61c38f0200, +0xc207c08100, +0x6142870600, +0x21c3820410, +0x31e7860c10, +0x1c3810200, +0x1c3830400, +0xc102830400, +0x1c3820410, +0x41e3820400, +0x4142830400, +0x6143820408, +0x41c7870304, +0x4143870200, +0x21c3830410, +0x7182c70600, +0x1c3810204, +0x1c3830408, +0xc1c2830204, +0xe1e3c39e00, +0x1c3830408, +0x4043060200, +0x6081820830, +0x21c3820c10, +0x60c1820c00, +0xc3820c10, +0x1c58f0204, +0x63c7870604, +0x21a3830c10, +0xc143810600, +0x43c5ce8102, +0xc1c3830204, +0x41c3810204, +0x61e3830c10, +0x4143820410, +0x1c3810204, +0x61e7830c00, +0x60c3860400, +0xe3e7c38e10, +0xc343870204, +0x21c3830608, +0x61c3810c00, +0x4183830408, +0x4142870200, +0xc1c3830608, +0x1c3830408, +0x4187870600, +0x4142870204, +0x61c3830c10, +0x1c3830604, +0x21e3820c00, +0x4102870200, +0x6162870400, +0x41c2830600, +0x4142870200, +0x1c3830408, +0x6143830c00, +0x3c7810204, +0x61c7828410, +0x41c3820408, +0x2183870408, +0x6143c20408, +0x1c3820400, +0x1e0c30c20, +0x61c3830c00, +0x41c3810604, +0x20c3860408, +0x61e68f0608, +0xc101810400, +0x61c3830408, +0x6163810408, +0x20c2870204, +0x21e2870408, +0x61a6c70408, +0x41c1830c10, +0x41c3830400, +0x61e2cf0c00, +0xc1830408, +0x4180810200, +0x21c58d0408, +0x41c3810608, +0xe344870302, +0x1c3810408, +0x1c3850204, +0x41c3870600, +0x20c1820400, +0x61c3830400, +0x41c3810608, +0x21e3830410, +0x1c3830408, +0x41c3820c10, +0x1c3870608, +0x21e3870400, +0x4142830400, +0x4147850204, +0x40c3020400, +0x60e2870400, +0x41c3830408, +0x10e3860800, +0x4143820400, +0x1c3830604, +0x61c38f0e00, +0xc1c7870204, +0x1e7c30608, +0x4103810200, +0x31e3870408, +0x41c7850204, +0x61e7830400, +0x61c3830400, +0x2083810408, +0x41c3810608, +0x41c5850408, +0x20c3860400, +0x1e3c30c10, +0x61c3830600, +0x21c3870204, +0x1c3830204, +0x41c3830408, +0x1e7870604, +0x21e2870400, +0x41c3830200, +0x61c3c10608, +0x61c3820c00, +0x4163820c10, +0x4102870408, +0x83e4c70204, +0x6183850200, +0x4143850200, +0x61a3830410, +0x21a7830408, +0x3a7c78100, +0x4143810400, +0x1c3820410, +0x83c3830604, +0x6147830408, +0x6143870408, +0x83c7c30608, +0x41c3830218, +0x1e3820c10, +0x6163830408, +0x4142870204, +0x1c2820408, +0x4143820410, +0x41c3830408, +0x41c3820408, +0x1c3810200, +0x41c3810204, +0x4142870200, +0x183870300, +0x20c1820810, +0x41c3820400, +0x4147810204, +0x71c2870600, +0x21c3810400, +0xc1e4cf8200, +0x41c3820408, +0x4141820400, +0x41c3830600, +0xc1c7870200, +0x1c3810204, +0x41c2860c10, +0xc3820400, +0x41c3830608, +0x21e2870208, +0xc3e3c30c10, +0x61c1830400, +0x4182870200, +0x61c3830400, +0x1e5870204, +0x61c1810c00, +0x1c7810408, +0x4142870200, +0x4143830200, +0x41c3830408, +0x1c3820410, +0x61c1820810, +0x41c3810408, +0x384c78100, +0x20c3820408, +0x6143860408, +0x61c3870408, +0x1c6870408, +0xc3c7878100, +0x41c3830608, +0x21e3820810, +0x61e3820c10, +0x3183830c10, +0x1e3830408, +0x41e7810604, +0x61c3820408, +0x2143820400, +0x1c3830600, +0x20c3850410, +0x61c3870408, +0x1c6870408, +0xe264870200, +0x41c3820408, +0x1e7830408, +0x21a7830c10, +0xc143810204, +0x41c3820408, +0x1c3870408, +0x40c0820400, +0x4144cc8100, +0x4142820400, +0x61c3830608, +0x61e7c70608, +0x41c3850208, +0x61c3830c10, +0x1c3820408, +0x6143830208, +0x145850204, +0x4143870408, +0x43c781020c, +0xc246810300, +0x41c2870200, +0x2183820400, +0x61c3820400, +0x41c7830608, +0x6142870200, +0x61e7cf0600, +0x61c3830608, +0x61c3830600, +0x61c3020812, +0x41c3830408, +0x41c3830600, +0xc3830408, +0x61c2870600, +0xc3c6c78102, +0x61c3020a04, +0x41c3830608, +0x21c7850204, +0x2142830200, +0x6143830410, +0x41c3830408, +0x183810204, +0xc1820408, +0x4183810400, +0x41c3830408, +0x1c3820400, +0x7141020810, +0x21e3820400, +0x20c3820400, +0x20c3820400, +0x21c3820410, +0x41c3830408, +0x3c7818302, +0x6103868100, +0x41c7830204, +0x41c3820408, +0x41c3810600, +0xc0c5850204, +0x41c3830608, +0x4142870204, +0x61048f0204, +0x61c3870408, +0x1c3820408, +0x21c3830408, +0x4183810608, +0x6143830410, +0x21c2870200, +0xc3830408, +0x21c3870600, +0x10f38e0820, +0x6143820608, +0x21c3820c10, +0x4143870600, +0x6183830408, +0x10c3850410, +0x4143830608, +0x31a3c21820, +0x1c3820410, +0x4143810204, +0x21c3860410, +0x1c3870600, +0x1e3830408, +0x20c3870408, +0x4143810600, +0x41c3810204, +0x147850200, +0x61c4850608, +0x21e3850608, +0x7125860c00, +0x6163820c10, +0x41c3810200, +0x2183830408, +0x43c7810204, +0x20e3820c10, +0x61a3830408, +0x4143830600, +0x4146870608, +0x20c3870400, +0x1c3830408, +0x1c2830204, +0x1c3850204, +0x41c3870204, +0xc3820410, +0x4143810400, +0x4142870204, +0x6140870400, +0x41c3870204, +0x41c383060c, +0x4143830408, +0xc1c1810c00, +0x20c3830410, +0x1c3830408, +0x41c3c30e08, +0xc3860408, +0x4142830200, +0x1c1810408, +0x41c7870604, +0x41c3810600, +0x21e2870410, +0x20c2860400, +0x41c6870204, +0x30c2870400, +0x6143820510, +0x61c3870600, +0x1e3820c10, +0x41e3c30400, +0xc347cd8302, +0x41c2870204, +0x20c3820820, +0xc1c3810604, +0x61c3830608, +0x21c2830408, +0x41c3810408, +0xc3820410, +0x21e6870204, +0x61e3820410, +0x41c6870204, +0x4181820400, +0x1e4870408, +0x41c3810408, +0x60c3830408, +0x41c3020910, +0x1c7cd8204, +0xe143810e08, +0x61c1820c10, +0x21e7c30410, +0x1c3860408, +0x41c7870204, +0xc3c7858300, +0x1c3820408, +0x41c6810604, +0x61e7850608, +0x41c2870204, +0x1c7810204, +0x20c1830408, +0x41c3830408, +0x21c3830608, +0x41c1830408, +0x21c3830400, +0xc3850408, +0x20c3870204, +0x41e687040c, +0x43c3830408, +0x1c2820408, +0x4143810204, +0x61c7870204, +0x4143830408, +0x183810204, +0x6142870200, +0x61c382850a, +0xe2870010, +0x41c3830400, +0x61e6c70408, +0x41c1830410, +0x6143830400, +0x41c3870408, +0x1c3810204, +0x4142810200, +0x1c7810204, +0x61c7850204, +0x43c7830200, +0x1c3870400, +0x41c7810204, +0xc1c3810204, +0x21c3820400, +0x21a3860c10, +0x6143810e00, +0x1c2830204, +0x4142870400, +0x4183810608, +0x41c3870204, +0xc1c2870204, +0x41c44f8000, +0x1c3838408, +0x21e3c30408, +0x41c3810204, +0x61c2870608, +0x1e2870400, +0x7127860810, +0x4142830200, +0x61e28f0c00, +0x61c3820d12, +0x41c5850200, +0x31c38f0400, +0x1c3820408, +0xc083870100, +0x61e7cf0608, +0x21c3870408, +0x1c3820408, +0xc143830204, +0xc3e7c30608, +0x61e2870200, +0x41c3830408, +0x4143830408, +0x1e3830410, +0x1c3830604, +0x142820400, +0x6183860408, +0x61c3830608, +0x41c2870208, +0x1c3820c10, +0x1c3830408, +0x1e7c70408, +0x1c3860400, +0xc183810204, +0x1c2810000, +0x61c3820c10, +0x4142870204, +0x1c3850008, +0x21c3850400, +0x1c1820408, +0xc1c3830204, +0x41c3830204, +0x41c3810204, +0x31e3820c10, +0x2183850408, +0x61278f0408, +0x71e7870608, +0x4103830400, +0x4143810200, +0x1c2870408, +0x21e3820800, +0x61e3820c10, +0xc3830408, +0xc343810680, +0x21e2870400, +0x4142870204, +0x61e1820810, +0x41c3830600, +0x21e7860c10, +0xc3870408, +0x4143820400, +0xc3830204, +0x6143830408, +0x61c7870408, +0x41c2830204, +0x4143830600, +0x41c3810204, +0x41c2870600, +0x41c3810204, +0x4165c78302, +0x41c3810408, +0x63c3810204, +0x61a3c20c10, +0x6167870204, +0x1e3820c10, +0x1c3810204, +0x31a6cf0c10, +0x387818100, +0x41c1830408, +0x41c2870408, +0x61e2870400, +0x1c3830408, +0x20c1820410, +0x61c3830408, +0x1c3870204, +0x61448f0204, +0x1c3820408, +0x61c1820c10, +0x21a3820c10, +0x61c3830c10, +0x4142870204, +0x2182870400, +0xc1c3870600, +0x4143820400, +0x1c3820408, +0x71e7cf8200, +0x1224cf0200, +0xc147810204, +0x4143810200, +0x41c7850204, +0x4143830604, +0x61a3810c10, +0x43c7c58302, +0x1c48c0408, +0x2183820408, +0x3c7810604, +0x1c3830204, +0x61e3820c10, +0xc143830600, +0x41c2830410, +0xc342c78300, +0x6103820c10, +0x41c3830608, +0xc1c3810608, +0x4143870600, +0x41e3820c30, +0x1c3820408, +0x41c3820400, +0x41c3810600, +0x41c2870604, +0x60c3870400, +0x41c3810204, +0x61c3820400, +0xc34383060c, +0x4100820400, +0x41e7810408, +0x1c3830608, +0x41c7870608, +0x61c3830408, +0x4183870600, +0x61e7c70c10, +0x20c3870408, +0x61c3810600, +0x4143830604, +0x71a7c70c00, +0x2183850408, +0xc3e7c98204, +0x41c3830600, +0x21e3860400, +0x20c3830c00, +0x61c3830400, +0x21c3860400, +0x21e3860400, +0x61c3820c08, +0x41c3820408, +0x6143820c10, +0x247010200, +0x2063020820, +0x21c3830408, +0x4143810200, +0x41a7810204, +0x61c3830408, +0x61e7870204, +0x31c3860c10, +0x1c3820408, +0x21e3c30c00, +0x61c3820c08, +0x4143810204, +0x61c3820c10, +0x1c3870408, +0x43c4cf8f00, +0x61c3830408, +0x61e7c30408, +0x41c3830408, +0x6143820c00, +0x61e5cf0400, +0x1c3830600, +0x21c3820c10, +0x61c6810608, +0x61a3820800, +0x41c3830604, +0x6143830400, +0x1e2870204, +0x21e7c70c10, +0x21e5c60c10, +0x41c7870204, +0x21e3870408, +0x60c1020c10, +0x1c3830408, +0x20c1820810, +0x6143820400, +0x3c3878302, +0x4182830204, +0x41c1810e00, +0x8181030204, +0x41c3810204, +0x40c3820400, +0x61e3830c00, +0x40c3820400, +0x61e3820800, +0x41c2c70204, +0x61e7c70408, +0x21c3860400, +0x41c3830608, +0x21c3860408, +0xc2c7848104, +0x21e7c10608, +0x21c7870410, +0x21c3870e00, +0x21e7c70408, +0x61c7c78c10, +0x41c3810204, +0x41c3830600, +0x1c3870204, +0x1c3870604, +0x41c7810608, +0x6147870410, +0x41c3830408, +0x41c3820400, +0x20c3830400, +0x4143020408, +0xc1c3870102, +0x21c3830410, +0x20c1820c10, +0x41c3830600, +0x41c3830400, +0x61c3830408, +0x31c78d0400, +0x43c7cf8302, +0x41e5dd9200, +0x41c2870604, +0x1c3810608, +0x21e7820c10, +0x21c3820c00, +0x21a48f0408, +0x41c3870208, +0x20c1820410, +0x41c3830c10, +0x21c2870400, +0xc3c7810704, +0x4143820408, +0x20c3820800, +0x41e6c78204, +0x61e3830410, +0x21e3830408, +0x4143810204, +0x163830400, +0x61c3830408, +0x2187870410, +0x4147810600, +0x20c2870408, +0x20c1020400, +0x41c78d0204, +0x1c3820400, +0x41c3810200, +0x61e7850608, +0x61a3820c10, +0x4142870400, +0x1c3830204, +0x6144870600, +0x1c3810408, +0x1c3810600, +0xc3810008, +0x1c3850200, +0x61c3870400, +0x6127c70400, +0x6145850204, +0x3e4870200, +0xe3c7cf8304, +0x1c3830204, +0x61a2870410, +0x6143810408, +0x6143810408, +0x61c2870408, +0xc3870408, +0x41c0820d10, +0x4147810204, +0x61e3820c10, +0xc3830604, +0x1c7830410, +0x1c3810400, +0x61c3830608, +0x4142870400, +0x41c1820408, +0x4082810200, +0x71c28f0600, +0x1e3820410, +0x43c7838602, +0x1c3870408, +0x1e3820810, +0x41c2870608, +0x3c7850206, +0x43e6c78102, +0x61c3870408, +0x61c3820408, +0x41c3830408, +0x41e3820c10, +0x6162870200, +0x4143830408, +0x41c3830600, +0xc344870204, +0x4141850204, +0x6143860400, +0x61a3820810, +0x30c3870400, +0x1c1820400, +0x20c3870608, +0x1c3810408, +0x1c3850204, +0x61e3c30e00, +0x61e6cf0208, +0x21e4c70200, +0x21e6870408, +0x4163830408, +0x21c3c30408, +0x21c3830c10, +0x61e3c38e00, +0x4142030200, +0x1c3810200, +0x1c7850608, +0x61c7850204, +0x61e3830c10, +0x61e7c70408, +0xc3c7c50204, +0x21c3810600, +0x61c3830410, +0xc3c7c10600, +0x43c7c08208, +0x1c7850204, +0x61c4c70408, +0x1c3810408, +0x1c3810408, +0x41e7810408, +0xc344850204, +0x21e3820810, +0x20e3820800, +0x61c7830600, +0x61e4c70608, +0x61e3830410, +0x2083820408, +0xc3c7870608, +0x61c3820c10, +0x41c3850200, +0x6143c70400, +0x6183020408, +0x83c381020c, +0x61c6870204, +0x41c3870204, +0x21e7c70410, +0x20e3820c10, +0x6143830408, +0x6142870208, +0x31e3c70c10, +0x44870000, +0x43c48f8302, +0x61648f0204, +0x61c3870608, +0xc1448f0200, +0x61e3830408, +0x6143830400, +0x1c3820c10, +0x6143820400, +0x41c3830600, +0x61e3c70418, +0x6101830400, +0x43c7c78304, +0x143810200, +0x4143850200, +0x4081820400, +0x6143810c00, +0x41c3830608, +0x1c3830410, +0x1c3810204, +0x1c7810204, +0x4142830400, +0x10e3820810, +0x43c781820c, +0x41c3830600, +0x4142830200, +0xc1c2870204, +0x6143810e08, +0x21c3820c10, +0x1c3830600, +0x6142830200, +0x1e3820810, +0x60c1820c10, +0x41c3830204, +0x4143870204, +0x41c3850200, +0x1c2870008, +0xc3c4ce8102, +0x4147810200, +0x4143850204, +0x41e7810608, +0x2003820000, +0x6146870204, +0xc142830200, +0x20c3820410, +0x21e3c20810, +0x41c3820c00, +0x4143830408, +0x20c3820400, +0x21e6c60c10, +0xe163c18e00, +0x6144870608, +0x6103870200, +0x21e4870208, +0x6145850408, +0x41c7c30204, +0x40c1820408, +0x4142830204, +0x4143830408, +0x1e4870204, +0x31c3820410, +0x21c1820400, +0x21c3810608, +0x1c6870408, +0x61c7810c00, +0x20c3820810, +0xc183830200, +0x31e6c30c10, +0x6125870400, +0x41c2870204, +0x31a3870400, +0x21c3830408, +0x1c3820408, +0x1e5cf0400, +0x21e3c30418, +0x41c3c70608, +0x63c7870204, +0x6167870400, +0x61e7c70c10, +0x43c7810302, +0x4142830600, +0xe3c4870200, +0x1c3830600, +0x63e4cf8100, +0x63e7c78302, +0x61c3830408, +0x61c48f0600, +0x1e3820418, +0x387c08100, +0x1e3830408, +0x2183850208, +0x31e3870c10, +0x1e786040c, +0x21e3830c00, +0xc146870204, +0x1c3830408, +0xc3850400, +0x4142870204, +0x1c3810204, +0x1c3820408, +0x31e6cf0810, +0xe3c7830600, +0x4141820400, +0x40c1020408, +0x3083820410, +0x31e7860c10, +0x41c3870408, +0x61e5870400, +0x61c3820408, +0x61c3870408, +0xc183810302, +0x21e6c70408, +0x41c3870200, +0x4143870400, +0x41e3830400, +0x1c3850200, +0x43efcf8300, +0x41c3870604, +0x21c3820c10, +0x6143820c10, +0x6142870408, +0x21c3810608, +0x21c7810204, +0xe2860410, +0xc2c7830202, +0x20c1820c10, +0x20c1820400, +0x61c4870204, +0x41c4870204, +0x41c3830408, +0x1c1850200, +0x20c3820c10, +0x1c3820408, +0xc3c5850204, +0x20c1820400, +0x1c3810204, +0x20c2830400, +0x61c3830408, +0x4183830204, +0x61e2870408, +0x41c3830204, +0x21e7870204, +0x63c4870204, +0x41c3870604, +0x61c2870200, +0x1e3830410, +0x1c7850202, +0x41c3830408, +0x71a58f0410, +0x43c4cf8100, +0x41c3810200, +0x43c4cf8204, +0x6163810408, +0x1c3830410, +0xc143810204, +0x1c2870204, +0x1c3810200, +0xe1c3830418, +0x4101820400, +0x61c3830404, +0x41c3870204, +0x81c3830604, +0x1c3830408, +0x6142870400, +0x20c3820c10, +0x21a3860810, +0x61c7870408, +0x61c7870604, +0xc347830300, +0x21c3830410, +0x41c3830408, +0x21e5870408, +0x4143810600, +0xc247c38102, +0x61c2870400, +0x6143870600, +0x61c78f0204, +0x4187810204, +0xc347c58300, +0x4143830400, +0x41c3830400, +0x61c7870204, +0x1c3870408, +0x41c3830408, +0x20a2860400, +0xe367c58e00, +0xc142830300, +0x4142870200, +0x21c3870410, +0xc1c7870204, +0x41c381060c, +0xc3c3810600, +0x20c1820c10, +0x21c3830400, +0x2042820400, +0x21a4860410, +0x61c2820400, +0x4181810200, +0x41c1820408, +0x6143870408, +0x1c3830408, +0x1c7810204, +0x4147810204, +0x41c5870408, +0x83c6c78102, +0x1c3870204, +0x31e3830c10, +0x11e7810208, +0x61c7870408, +0x6144870204, +0x61c7830200, +0x11e7c30c00, +0x41c2870408, +0x41c3820400, +0x1c7810200, +0x1c3830410, +0x61c3820408, +0xe3e4c70608, +0x20e1820410, +0x41c3810408, +0x41c3810600, +0x61c3820408, +0x1e5cf0410, +0xc346870304, +0x20c3820408, +0x61e3830c10, +0x1c3830408, +0x4181820408, +0x1c2870204, +0x41c3830400, +0x63e7cf0606, +0x61e3830c08, +0x21c3830604, +0x41c3820408, +0x43c7c18300, +0x6104870400, +0x41c3830604, +0xe1e3c78300, +0x4143810000, +0x81c7870204, +0x31e3860830, +0xc3c7810302, +0x20c3850400, +0x6143870400, +0x1c3820410, +0xc347810204, +0x1c3810408, +0x21e7870600, +0x61a3c30410, +0x81c3810204, +0x1c2870408, +0xe3c60830, +0x20e2870400, +0x61c3810608, +0xc143810e00, +0x8284850302, +0x41c3830408, +0x1c3810408, +0x41c3830204, +0x41c2c70200, +0x4143810204, +0x41c6870200, +0xe1c3870204, +0x21e3418610, +0x4143830204, +0x1e7c70204, +0x61c2860408, +0x21c3820c10, +0x4146870200, +0x2100870200, +0x20e3820810, +0x1e7c70608, +0x41c3830600, +0x61c3830c10, +0x4143830400, +0x41c3870608, +0x1e3820408, +0x41c2870604, +0x41c3830600, +0x41c7870204, +0x143860408, +0x41c3830608, +0x1c3830408, +0x20c3820810, +0x1c2860400, +0x61e4c70204, +0x41c3860408, +0x31e3870c10, +0xc347830600, +0x21e3830408, +0x20c3820400, +0x1c4c70204, +0x61e3860c18, +0x1c7870408, +0x61a2870408, +0xc1c3830204, +0x41c3830408, +0x43c7810204, +0xc363810e08, +0x41c3830608, +0x1c3820408, +0x6043020410, +0x1c3830204, +0x4143870600, +0x20c3820400, +0x4144870204, +0xe321c10e00, +0x1c3810204, +0x41c2870204, +0xc3c20c10, +0x6143830600, +0x61c3830408, +0x30e3820800, +0x4143810200, +0x4143810408, +0x20c1820410, +0x1c385060c, +0x61c3820410, +0x43c7810302, +0x21e3820810, +0xe344c70204, +0x2142870400, +0xc244870200, +0x41c3830608, +0x20c3860c10, +0x6141830c10, +0x6367c50204, +0x41e4870204, +0x61c3830408, +0x6142830608, +0x6142870200, +0x61c3820400, +0x20c3830408, +0x61c3870608, +0x3c4870204, +0x41c3810200, +0x6143820410, +0x31e78f0410, +0x61c3830410, +0x21c3870600, +0x41c3830408, +0xe3820400, +0x1c5850204, +0x41c3810200, +0x61c7860408, +0x4143830600, +0x6143830408, +0x41c3870604, +0x4347838300, +0x6143870408, +0x6142870200, +0x3e7c48f00, +0x4143820400, +0x61c1020408, +0x1c3810204, +0xc143810608, +0x1c3830408, +0x8387c78300, +0x21c48f0200, +0x61e3830c10, +0x61e3c30c18, +0x20c3820410, +0xe344878302, +0x41e3830408, +0x1c7850204, +0x6187c10400, +0x61e2870408, +0xc103810608, +0xc3e5870204, +0x41e3c30608, +0x6143820410, +0x41c3810408, +0x40c3860400, +0x61e7870c10, +0x61c3850408, +0x6142830200, +0x6143810608, +0x71a7870608, +0x41c5850200, +0x2083820400, +0x21c3830c10, +0x41c1830408, +0xc2860400, +0x4143810408, +0x4100870200, +0x1c3810204, +0x41c3830408, +0x61c3830410, +0x61e7830408, +0xc142870204, +0x31e3860c10, +0x21e5850410, +0x6142830200, +0x3c383060c, +0x8383870300, +0x41c7810408, +0x41c7870204, +0x61c3830408, +0x41a2870400, +0xc1c3830604, +0x183810200, +0x41c1020408, +0x41c7870200, +0x41c3830604, +0x20c3820410, +0x61e58f0600, +0x41c3830408, +0x61c7870600, +0x1c3830408, +0x21c7c70408, +0x1c3830418, +0x41c3830600, +0x4143870204, +0x21c3810408, +0x41e7830410, +0x3c6830102, +0x61c3820408, +0x1c3820c00, +0x61c3820400, +0x71e3c30820, +0x30e2860800, +0x1e7870608, +0xc1020400, +0x61c3830410, +0x21c3870400, +0x41c3870200, +0x41c3860408, +0x6143870400, +0x1c3820400, +0x61e7c30408, +0x61c6870600, +0x143820408, +0xe367c91e10, +0x41c2810610, +0x6145870202, +0x1e3830408, +0x1c3870008, +0x21e7cf0604, +0x31e4870408, +0x1e4c70608, +0x21e3870408, +0x1c3830400, +0x4143c20408, +0x61c3830600, +0x61c3830608, +0x21e7c70418, +0x4143830204, +0x41c3830608, +0x1c3820408, +0x43c7878302, +0x6183c10408, +0x41c3870608, +0xc183810204, +0x41c7c7060c, +0x61c3870204, +0xc3e7cf8100, +0x61a7c60c10, +0x6183820810, +0x4143830608, +0x4143810600, +0x4145850100, +0x21e3830408, +0x41c3820408, +0x41c6870200, +0xc182870200, +0x41c3870604, +0x40c1820408, +0x21c3830410, +0x21c7810608, +0xc143810200, +0x41c7870202, +0xa146c70200, +0x20c3820408, +0x20c2860400, +0x1c3820408, +0xc1c3810204, +0x61c3870408, +0x3c7c10410, +0x4183810100, +0x21c3820408, +0xe3820810, +0x4143810600, +0xc1c3810204, +0x41c3830400, +0xe3860408, +0x41c3870408, +0x60c1810c00, +0x61e2870400, +0x8345cf8100, +0x21e3830c10, +0x4144870204, +0x63e7c70608, +0x6122870200, +0x20c3820408, +0x61c3830408, +0x61e3cd1e10, +0x6103870200, +0x41c3870400, +0x1c3830408, +0xc3c3810608, +0x41c383060c, +0x4143860400, +0x1c3810200, +0x1c3820400, +0x20c3820800, +0x21c6870408, +0x43c7810204, +0xc346c78304, +0x40c3820400, +0x4101820408, +0x61c3810200, +0x183810200, +0xc3820810, +0x83c7818304, +0x4143820400, +0x41c6c70604, +0x41c3820408, +0x61c1810c10, +0x61e3870400, +0x20e3860408, +0x43c3c30608, +0x41c3830608, +0x41c3870204, +0x4143810408, +0x1c3830408, +0xc3c7870302, +0x20c3860410, +0xc244cf8102, +0x1c3820408, +0x2083870408, +0x1c3810600, +0x43c7878100, +0x1c7810204, +0x41c3810608, +0x1c3810204, +0x4143830600, +0x6367850608, +0x61e3830c00, +0x1c3850208, +0x1e3860400, +0x4101830600, +0x61a2870608, +0x41e3820408, +0x4142870204, +0x1c3830604, +0x61e3820400, +0x41c48f0204, +0x4143830408, +0x41c3830408, +0x1c6cd0000, +0x61c7870600, +0x6163820400, +0x4143870204, +0x21e3830c10, +0x41c7810608, +0xc143818e00, +0x1c7870204, +0x21c3820408, +0x1c7850208, +0x4143850200, +0x21e7cf0608, +0x4146870204, +0x61c3020810, +0x61c2870604, +0x61c3c70408, +0x1c7850204, +0x8143810200, +0x1c3830408, +0x1c3870608, +0x63e4cf0608, +0x41c3870408, +0x1c3810408, +0x41c3c10410, +0x6142860400, +0x1c3830604, +0x41c5cf0204, +0x21e7c70608, +0x61c3810e08, +0x61a1811c10, +0x20c1820c00, +0x41e3830400, +0xc3870408, +0x4147850204, +0x41a3c70608, +0x2187c70408, +0x2043820c00, +0x41c3830408, +0x6103830408, +0x6143820810, +0x61e7870408, +0x41c7870408, +0x21c3820400, +0x60e1820800, +0x40c2870400, +0x3e4ce8102, +0x4142870204, +0x1c5870200, +0x21e7860c10, +0x21e3820c10, +0x1e7870408, +0x41c3820408, +0x1c7830408, +0x21e3830c10, +0x20c3820400, +0x61e7810608, +0x1c3830608, +0x4143820400, +0x6324cf0b00, +0x41e3870608, +0x1e3860410, +0x3c7c08102, +0x61c3830408, +0x1c3830604, +0x61e7830408, +0x21c1820408, +0x1c3820408, +0x1c3820408, +0x1c3820400, +0x41c3830400, +0x61c3870608, +0x21e2830408, +0x21c3820400, +0x61e3820800, +0x21c3870408, +0x61a2c70400, +0x1c3820410, +0x61e3820c10, +0x4142870408, +0x41c2870408, +0x6323c78608, +0x43c7810608, +0x20c3820408, +0x1c3870604, +0xe2648f0200, +0x6143870400, +0x4103810200, +0x61c3830408, +0x20e3820c10, +0x83c7850306, +0x41e3830408, +0x6183820400, +0x61e2c70400, +0x60c3820c00, +0x73e6cf8300, +0x41c3870300, +0x61e3830410, +0xc146870300, +0xc3c3810204, +0x6143820408, +0x183810204, +0x41c3820408, +0x61c3820c10, +0x4147810204, +0x1c3870204, +0x41c3830410, +0x4143810204, +0xc204c78100, +0x1c3810408, +0x1c4870408, +0x21c3820400, +0x41c3c30410, +0x21c3830408, +0x61e3830c10, +0x4142870200, +0x83c7830204, +0x21c3820c10, +0x4142850000, +0x61c3830608, +0x61c2870200, +0xc1c3810204, +0x21e3830410, +0x20c3820410, +0xc3820810, +0x41c4c78500, +0xc142830200, +0x71c3830c10, +0x4143830600, +0x41c3820408, +0xc3810600, +0x41e3830408, +0x31a3870410, +0x21e7c70c10, +0x21c3820400, +0x21c3830410, +0x61e7c70608, +0x6143820c00, +0x21c3820400, +0x2083820410, +0x61c7870400, +0x61e3830410, +0x61c3820408, +0x71a78f0410, +0xc3c5870204, +0x61e38d0408, +0x6143820c00, +0xc3c6878300, +0x1c3820400, +0x41c3820408, +0x61e7870408, +0x1c3810204, +0x6143830400, +0x41e6cf0000, +0x4143820400, +0x41c3830600, +0x6123830410, +0x20c3000408, +0x20a3820800, +0x4146870604, +0x41c3830204, +0x6146870400, +0x61c3830408, +0x21e3830400, +0xc3830408, +0x41c3830604, +0xc143810200, +0x23e7c70400, +0x1c3870204, +0x4183810600, +0x41c4870204, +0x1c3820408, +0x61c3020408, +0x1c3810204, +0x61c6870200, +0x61e6870604, +0x60c1820400, +0x61c3820810, +0xc307c10200, +0x21e78f0408, +0x63c7c78302, +0x61c3830600, +0x61c7870608, +0x1c78d0204, +0x61c1820810, +0x6143830408, +0x20c3860400, +0x61c3870600, +0x41c3810600, +0x1c3830408, +0x1c3830408, +0x21c3820408, +0x41c6870204, +0x1c3830204, +0x41c3830408, +0x41c3810204, +0x21c3820c10, +0x21e7870408, +0x21c3830410, +0x61c6870204, +0x1c3860408, +0x61c3830c10, +0x41c78f0204, +0x21e3830c00, +0x1c3830c10, +0x61a3830410, +0x61c3820c10, +0x61c3828700, +0x41c3810604, +0x21c3830400, +0x41c3830608, +0x4143820400, +0x4143870600, +0xc1c3870204, +0x1c3870408, +0x6143850200, +0x1c3850204, +0x6143820c10, +0x21c3830408, +0x61c1020810, +0x21c78f0204, +0x1c3810204, +0x21c7810608, +0x4142870200, +0x1c3850204, +0x41c7830200, +0x21e5cf0400, +0x142830200, +0x41c7850200, +0x11e78e0810, +0x61c3870408, +0x41c3830608, +0x41448f0204, +0x61c2870204, +0x61c7870608, +0x21c3830400, +0x1c3830408, +0x61c3830400, +0x183810200, +0xc3820400, +0xc143830e00, +0x10e3820820, +0x1c4870204, +0xe3c7c18e08, +0x71a6cb1e00, +0x41c7870204, +0x41c3810600, +0x61c3870608, +0x81c3870204, +0x41c3830408, +0x4141c20c10, +0xc143830204, +0x6183830408, +0x61c3860c10, +0xe3820c10, +0x1c3810200, +0x61a3820820, +0x21e3810408, +0x21e3870408, +0x20c3820400, +0x1c3820400, +0x6144cf0302, +0x41c3830408, +0x4143870200, +0x20c3820800, +0x41c3830408, +0xc3820408, +0x21e7850408, +0x4144870200, +0x61e3c70410, +0xc306478100, +0x21c3830408, +0x61e7870408, +0x21c2870204, +0x1c3830408, +0x1c2870102, +0xe344470204, +0x61c3810408, +0x1c3830408, +0x6166c70408, +0x6103810200, +0x1e1820820, diff --git a/samples/digitrec/digitrec/hcl_code_dig.py b/samples/digitrec/digitrec/hcl_code_dig.py new file mode 100644 index 000000000..97f579a79 --- /dev/null +++ b/samples/digitrec/digitrec/hcl_code_dig.py @@ -0,0 +1,150 @@ +import heterocl as hcl +import time +import numpy as np +import math +from digitrec_data import read_digitrec_data + +# Declare some constants and data types. For images, we need unsigned 49-bit +# integers, while for knn matrices, we need unsigned 6-bit integers. +N = 7 * 7 +max_bit = int(math.ceil(math.log(N, 2))) +data_size = (10, 1800) + +# HeteroCL provides users with a set of bit-accurate data types, which include +# unsigned/signed arbitrary-bit integers and unsigned/signed fixed-points. +# Here we use `UInt(N)` for an N-bit unsigned integer. +dtype_image = hcl.UInt(N) +dtype_knnmat = hcl.UInt(max_bit) + +# We can initialize a HeteroCL environment with default data type by using +# `hcl.init(dtype)`. Here we set the default data type of each variable to +# the unsigned integer with the maximum bitwidth. +hcl.init(dtype_image) + + +def top(target=None): + + # Algorithm definition (§1) + def knn(test_image, train_images): + + # Imperative programming and bit operations (§2) + def popcount(num): + out = hcl.local(0, "out") + with hcl.for_(0, train_images.type.bits) as i: + # Bit selection operation + out[0] += num[i] + return out[0] + + # This function update the candidates, i.e., `knn_mat`. Here we mutate + # through the shape of tensor `dist`. For each `dist` value, if it is + # smaller than the maximum candidate, we replace it. + def update_knn(dist, knn_mat, i, j): + max_id = hcl.local(0, "max_id") + with hcl.for_(0, 3) as k: + with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id[0]]): + max_id[0] = k + with hcl.if_(dist[i][j] < knn_mat[i][max_id[0]]): + knn_mat[i][max_id[0]] = dist[i][j] + + # Main algorithm (§3) + # Fist step: XOR (§3.1) + diff = hcl.compute(train_images.shape, + lambda x, y: train_images[x][y] ^ test_image, + "diff") + + # Second step: popcount (§3.2) + dist = hcl.compute(diff.shape, + lambda x, y: popcount(diff[x][y]), + "dist") + + + # Third step: initialize the candidates (§3.3) + knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") + + + # Fourth step: update the candidates (§3.4) + hcl.mutate(dist.shape, + lambda x, y: update_knn(dist, knn_mat, x, y), + "knn_update") + + # Final step: return the candidates (§3.5) + return knn_mat + + # Inputs/Outputs definition (§4) + # Scalars (§4.1) + test_image = hcl.placeholder((), "test_image") + # Tensors (§4.2) + train_images = hcl.placeholder(data_size, "train_images") + + # Data type customization (§5.1) + scheme = hcl.create_scheme([test_image, train_images], knn) + scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) + + # Compute customization (§5.2) + s = hcl.create_schedule_from_scheme(scheme) + + diff = knn.diff + dist = knn.dist + knn_update = knn.knn_update + + # Merge loop nests + s[diff].compute_at(s[dist], dist.axis[1]) + s[dist].compute_at(s[knn_update], knn_update.axis[1]) + + # Reorder loop to expose more parallelism + s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) + + # Parallel outer loop and pipeline inner loop + s[knn_update].parallel(knn_update.axis[1]) + s[knn_update].pipeline(knn_update.axis[0]) + + # At the end, we build the whole offloaded function. + return hcl.build(s, target=target) + +offload = top('sdaccel') +with open('sdaccel_code.cl', 'w') as f: + f.write(offload) + +def knn_vote(knn_mat): + knn_mat.sort(axis = 1) + knn_score = np.zeros(10) + + for i in range(0, 3): + min_id = np.argmin(knn_mat, axis = 0)[i] + knn_score[min_id] += 1 + + return np.argmax(knn_score) + +# Data preparation +train_images, _, test_images, test_labels = read_digitrec_data() + +# Classification and testing +correct = 0.0 + +# We have 180 test images +total_time = 0 +for i in range(0, 180): + + # Prepare input data to offload function + # To load the tensors into the offloaded function, we must first cast it to + # the correct data type. + hcl_train_images = hcl.asarray(train_images, dtype_image) + hcl_knn_mat = hcl.asarray(np.zeros((10, 3)), dtype_knnmat) + + # Execute the offload function and collect the candidates + start = time.time() + offload(test_images[i], hcl_train_images, hcl_knn_mat) + total_time = total_time + (time.time() - start) + + # Convert back to a numpy array + knn_mat = hcl_knn_mat.asnumpy() + + # Feed the candidates to the voting algorithm and compare the labels + if knn_vote(knn_mat) == test_labels[i]: + correct += 1 + +print("Average kernel time (s): {:.2f}".format(total_time/180)) +print("Accuracy (%): {:.2f}".format(100*correct/180)) + +# for testing +assert (correct >= 150.0) diff --git a/samples/gemm/gemm_main.py b/samples/gemm/gemm_main.py index fb05a094d..53305b98c 100644 --- a/samples/gemm/gemm_main.py +++ b/samples/gemm/gemm_main.py @@ -33,6 +33,8 @@ def kernel(matrix_1, matrix_2): def time_gemm(dtype, m=1024, n=1024, k=1024, target=None): hcl.init(dtype) f = gemm(m, n, k, dtype, target) + + print (f) np_1 = np.random.randint(10, size=(m, k)) np_2 = np.random.randint(10, size=(k, n)) np_3 = np.matmul(np_1, np_2) @@ -52,5 +54,6 @@ def time_gemm(dtype, m=1024, n=1024, k=1024, target=None): ############################################################################### # Test the algorithm with different data types dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] -for dtype in dtypes: - time_gemm(dtype) + +# for dtype in dtypes: +# time_gemm(hcl.Float(), 10, 10, 10, 'sdaccel') diff --git a/samples/gemm/gemm_sdaccel.py b/samples/gemm/gemm_sdaccel.py new file mode 100644 index 000000000..d9ae115ef --- /dev/null +++ b/samples/gemm/gemm_sdaccel.py @@ -0,0 +1,7 @@ +import heterocl as hcl +import numpy as np +from gemm_main import * + +#dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] +#for dtype in dtypes: +time_gemm(hcl.Int(32), 10, 10, 10, 'sdaccel_sw_emu') diff --git a/samples/gemm/gemm_vhls.py b/samples/gemm/gemm_vhls.py index e27fa155e..8edd84bdd 100644 --- a/samples/gemm/gemm_vhls.py +++ b/samples/gemm/gemm_vhls.py @@ -2,6 +2,6 @@ import numpy as np from gemm_main import * -dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] -for dtype in dtypes: - time_gemm(dtype, 10, 10, 10, 'vhls_csim') +#dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] +#for dtype in dtypes: +time_gemm(hcl.Int(32), 10, 10, 10, 'vhls_csim') diff --git a/samples/smith_waterman/smith_waterman_main.py b/samples/smith_waterman/smith_waterman_main.py index 6515863dd..22926e096 100644 --- a/samples/smith_waterman/smith_waterman_main.py +++ b/samples/smith_waterman/smith_waterman_main.py @@ -145,14 +145,14 @@ def batch_sw(seqAs, seqBs, outAs, outBs): -# f = top() -code = top('sdaccel'); -with open('sdaccel_code.cl', 'w') as f: - f.write(code) - -code2 = top('merlinc') -with open('merlinc_code.cl', 'w') as f: - f.write(code2) +f = top() +# code = top('sdaccel'); +# with open('sdaccel_code.cl', 'w') as f: +# f.write(code) + +# code2 = top('merlinc') +# with open('merlinc_code.cl', 'w') as f: +# f.write(code2) diff --git a/tvm/src/codegen/hlsc/build_hlsc.cc b/tvm/src/codegen/hlsc/build_hlsc.cc index 42fb68089..ce7903553 100644 --- a/tvm/src/codegen/hlsc/build_hlsc.cc +++ b/tvm/src/codegen/hlsc/build_hlsc.cc @@ -24,7 +24,7 @@ runtime::Module BuildVivadoHLSCSim(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); - + std::cout << code; return runtime::CreateVivadoHLSModule(funcs[0], code); } diff --git a/tvm/src/codegen/hlsc/vhls_module.cc b/tvm/src/codegen/hlsc/vhls_module.cc index c5f004a93..fd28234db 100644 --- a/tvm/src/codegen/hlsc/vhls_module.cc +++ b/tvm/src/codegen/hlsc/vhls_module.cc @@ -345,9 +345,9 @@ class VivadoHLSModuleNode final : public ModuleNode { GenHostCode(args, shmids, arg_types, func_, test_file_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; - system("g++ main.cpp -o out"); + // system("g++ main.cpp -o out"); LOG(CLEAN) << "Running C simulation ..."; - system("./out"); + // system("./out"); LOG(CLEAN) << "Finished C simulation"; // system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.cc b/tvm/src/codegen/opencl/aocl/aocl_module.cc deleted file mode 100755 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/aocl/aocl_module.h b/tvm/src/codegen/opencl/aocl/aocl_module.h deleted file mode 100755 index e69de29bb..000000000 diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index d882bb1e3..917a52de8 100755 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -3,20 +3,10 @@ yb269@cornell.edu */ -# include -# include -# include -# include -# include -# include "../../runtime/meta_data.h" -# include -# include "./codegen_sdaccel.h" -# include "./codegen_aocl.h" -# include "./codeanalys_openclc.h" -# include "../build_common.h" -// # include "./sdaccel/sdaccel_module.h" -// # include "./aocl/aocl_module.h" - +#include "./codegen_aocl.h" +#include "./codegen_sdaccel.h" +#include "../build_common.h" +#include "./sdaccel_module.h" @@ -37,11 +27,11 @@ runtime::Module BuildSDAccelSim(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); - + std::cout << code; return runtime::CreateSDAccelModule(funcs[0], code); } -TVM_REGISTER_API("codegen.sdaccel_sw_emu") +TVM_REGISTER_API("codegen.build_sdaccel_sw_emu") .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = BuildSDAccelSim(args[0]); }); @@ -49,9 +39,6 @@ TVM_REGISTER_API("codegen.sdaccel_sw_emu") - - - template std::string BuildOpenCL(Array funcs){ using TVM::runtime::Registry; @@ -66,10 +53,6 @@ std::string BuildOpenCL(Array funcs){ } std::string code = cg.Finish(); - if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { - code = (*f)(code).operator std::string(); - } - LOG(WARNING) << "OpenCL doesn't have runtime, return kernel code"; return code; } @@ -86,5 +69,5 @@ TVM_REGISTER_API("codegen.build_aocl") .set_body([]( TVMArgs args, TVMRetValue * rv ) { * rv = BuildOpenCL(args[0]); }); -} -} +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/opencl/common/common.mk b/tvm/src/codegen/opencl/common/common.mk new file mode 100755 index 000000000..baf07f79b --- /dev/null +++ b/tvm/src/codegen/opencl/common/common.mk @@ -0,0 +1,150 @@ +#******************************************************************************* +#Vendor: Xilinx +#Associated Filename: common.mk +#Purpose: Common Makefile for SDAccel Compilation +# +#******************************************************************************* +#Copyright (C) 2015-2016 XILINX, Inc. +# +#This file contains confidential and proprietary information of Xilinx, Inc. and +#is protected under U.S. and international copyright and other intellectual +#property laws. +# +#DISCLAIMER +#This disclaimer is not a license and does not grant any rights to the materials +#distributed herewith. Except as otherwise provided in a valid license issued to +#you by Xilinx, and to the maximum extent permitted by applicable law: +#(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX +#HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, +#INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR +#FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether +#in contract or tort, including negligence, or under any other theory of +#liability) for any loss or damage of any kind or nature related to, arising under +#or in connection with these materials, including for any direct, or any indirect, +#special, incidental, or consequential loss or damage (including loss of data, +#profits, goodwill, or any type of loss or damage suffered as a result of any +#action brought by a third party) even if such damage or loss was reasonably +#foreseeable or Xilinx had been advised of the possibility of the same. +# +#CRITICAL APPLICATIONS +#Xilinx products are not designed or intended to be fail-safe, or for use in any +#application requiring fail-safe performance, such as life-support or safety +#devices or systems, Class III medical devices, nuclear facilities, applications +#related to the deployment of airbags, or any other applications that could lead +#to death, personal injury, or severe property or environmental damage +#(individually and collectively, "Critical Applications"). Customer assumes the +#sole risk and liability of any use of Xilinx products in Critical Applications, +#subject only to applicable laws and regulations governing limitations on product +#liability. +# +#THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT +#ALL TIMES. +# +#******************************************************************************* +SHELL = /bin/bash +VPATH = ./ + +#supported flow: cpu_emu, hw_emu, hw +CC = xcpp +CLCC = xocc + +ifeq ($(XDEVICE_REPO_PATH),) +#no device repo path set. do nothing + DEVICE_REPO_OPT = +else + DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} +endif + +#HOST_LFLAGS += ${XILINX_SDACCEL}/lib/lnx64.o/libstdc++.so.6 +HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 +HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread +CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} + +ifeq (${KEEP_TEMP},1) + CLCC_OPT += -s +endif + +ifeq (${KERNEL_DEBUG},1) + CLCC_OPT += -g +endif + +CLCC_OPT += --kernel ${KERNEL_NAME} +OBJECTS := $(HOST_SRCS:.cpp=.o) + +.PHONY: all + +all: run + +host: ${HOST_EXE_DIR}/${HOST_EXE} + +xbin_cpu_em: + make SDA_FLOW=cpu_emu xbin -f sdaccel.mk + +xbin_hw_em: + make SDA_FLOW=hw_emu xbin -f sdaccel.mk + +xbin_hw : + make SDA_FLOW=hw xbin -f sdaccel.mk + +xbin: ${XCLBIN} + +run_cpu_em: + make SDA_FLOW=cpu_emu run_em -f sdaccel.mk + +run_hw_em: + make SDA_FLOW=hw_emu run_em -f sdaccel.mk + +run_hw : + make SDA_FLOW=hw run_hw_int -f sdaccel.mk + +run_em: xconfig host xbin + XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} + +run_hw_int : host xbin_hw + source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} + +estimate : + ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} + +xconfig : emconfig.json + +emconfig.json : + emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . + +${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} + ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ + +${XCLBIN}: + ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} + +%.o: %.cpp + ${CC} ${HOST_CFLAGS} -c $< -o $@ + +clean: + ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil + +cleanall: clean + ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou + + +help: + @echo "Compile and run CPU emulation using default xilinx:adm-pcie-7v3:1ddr:3.0 DSA" + @echo "make -f sdaccel.mk run_cpu_em" + @echo "" + @echo "Compile and run hardware emulation using default xilinx:adm-pcie-7v3:1ddr:3.0 DSA" + @echo "make -f sdaccel.mk run_hw_em" + @echo "" + @echo "Compile host executable only" + @echo "make -f sdaccel.mk host" + @echo "" + @echo "Compile XCLBIN file for system run only" + @echo "make -f sdaccel.mk xbin_hw" + @echo "" + @echo "Compile and run CPU emulation using xilinx:tul-pcie3-ku115:2ddr:3.0 DSA" + @echo "make -f sdaccel.mk XDEVICE=xilinx:tul-pcie3-ku115:2ddr:3.0 run_cpu_em" + @echo "" + @echo "Clean working diretory" + @echo "make -f sdaccel.mk clean" + @echo "" + @echo "Super clean working directory" + @echo "make -f sdaccel.mk cleanall" diff --git a/tvm/src/codegen/opencl/sdaccel.mk b/tvm/src/codegen/opencl/sdaccel.mk new file mode 100755 index 000000000..7c361cf6b --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel.mk @@ -0,0 +1,84 @@ +#******************************************************************************* +#Vendor: Xilinx +#Associated Filename: sdaccel.mk +#Purpose: Makefile exmaple for SDAccel Compilation +# +#******************************************************************************* +#Copyright (C) 2015-2016 XILINX, Inc. +# +#This file contains confidential and proprietary information of Xilinx, Inc. and +#is protected under U.S. and international copyright and other intellectual +#property laws. +# +#DISCLAIMER +#This disclaimer is not a license and does not grant any rights to the materials +#distributed herewith. Except as otherwise provided in a valid license issued to +#you by Xilinx, and to the maximum extent permitted by applicable law: +#(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX +#HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, +#INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR +#FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether +#in contract or tort, including negligence, or under any other theory of +#liability) for any loss or damage of any kind or nature related to, arising under +#or in connection with these materials, including for any direct, or any indirect, +#special, incidental, or consequential loss or damage (including loss of data, +#profits, goodwill, or any type of loss or damage suffered as a result of any +#action brought by a third party) even if such damage or loss was reasonably +#foreseeable or Xilinx had been advised of the possibility of the same. +# +#CRITICAL APPLICATIONS +#Xilinx products are not designed or intended to be fail-safe, or for use in any +#application requiring fail-safe performance, such as life-support or safety +#devices or systems, Class III medical devices, nuclear facilities, applications +#related to the deployment of airbags, or any other applications that could lead +#to death, personal injury, or severe property or environmental damage +#(individually and collectively, "Critical Applications"). Customer assumes the +#sole risk and liability of any use of Xilinx products in Critical Applications, +#subject only to applicable laws and regulations governing limitations on product +#liability. +# +#THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT +#ALL TIMES. +# +#****************************************************************************** +ifndef XILINX_SDX +$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) +endif + +SDA_FLOW = cpu_emu +HOST_SRCS = vadd.cpp +HOST_EXE_DIR=. +HOST_EXE = vadd +HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL +HOST_LFLAGS = + +KERNEL_SRCS = default_function.cl +KERNEL_NAME = default_function +KERNEL_DEFS = +KERNEL_INCS = +#set target device for XCLBIN +XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 +XDEVICE_REPO_PATH= +KEEP_TEMP=1 +KERNEL_DEBUG= +XCLBIN_NAME=bin_vadd +HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" +#BOARD_SETUP_FILE needs to point to setup.sh generated by xbinst command +BOARD_SETUP_FILE=setup.sh + +ifeq (${SDA_FLOW},cpu_emu) + CLCC_OPT += -t sw_emu + XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin +else ifeq (${SDA_FLOW},hw_emu) + CLCC_OPT += -t hw_emu + XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin +else ifeq (${SDA_FLOW},hw) + XCLBIN = ${XCLBIN_NAME}_hw.xclbin + CLCC_OPT += -t hw +endif + +HOST_ARGS = ${XCLBIN} + +COMMON_DIR = ./common +include ${COMMON_DIR}/common.mk + diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc deleted file mode 100755 index 066e1602e..000000000 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.cc +++ /dev/null @@ -1,328 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include "./sdaccel_module.h" -# include -# include -// # include -// # include -# include - -namespace TVM { -namespace runtime { - -namespace { - -void PrintIndent(std::ofstream& stream, int indent) { - for (int i = 0;i < indent; i++ ) { - stream << ' '; - } -} - -inline size_t GetTypeSize(TVMType t) { - size_t byte = (t.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - return byte; -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size_t byte = (arr->dtype.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - size *= (byte * 8 * arr->dtype.lanes + 7) / 8; - return size; -} - -inline TVMType Type2TVMType(Type t) { - TVMType tt; - if (t.is_int()) tt.code = kDLInt; - else if (t.is_uint()) tt.code = kDLUInt; - else if (t.is_float()) tt.code = kDLFloat; - else LOG(FATAL) << "Unacceptable type: " << t; - tt.bits = static_cast(t.bits()); - tt.fracs = static_cast(t.fracs()); - return tt; -} - -inline std::string Type2Str(TVMType t) { - -} - -inline std::string Tpye2ExtStr(TVMType t) { - -} - - - - - -inline std::string Type2Byte(TVMType t) { - std::string str = ""; - if (t.code == kDLFloat) { - str += "float"; - } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "u"; - str += "int"; - if (t.bits <= 8) str += "8"; - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - str += "_t"; - } - return str; -} - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - arg_sizes.push_back(GetDataSize(arr)); - arg_types.push_back(arr->dtype); - } else { - const Variable* var = func->api_args[i].as(); - TVMType t = Type2TVMType(var->type); - arg_sizes.push_back(GetTypeSize(t)); - arg_types.push_back(t); - } - } -} - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } - } -} - -// copy values from the shared mem to local mem -void PrintCopy() - - - - -// copy values from local mem back to shared mem -void PrintCopyBack() - - - -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string test_file) { - int indent = 0; - std::ofstream stream; - stream.open("host.cpp"); - - // write the header files and macro commmands. - stream << "# define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; - stream << "# define CL_HPP_TARGET_OPENCL_VERSION 120\n"; - stream << "# define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; - stream << "# define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# include \n"; - stream << "# pragram once\n"; - stream << "# define LENGTH (1024)\n"; - stream << "# define NUM_WORKGROUPS (1)\n"; - stream << "# define WORKGROUP_SIZE (16)\n"; - stream << test_file; - stream << "int main(void) { \n"; - indent += 2; - - - // get the platform and devices - stream << "#if define(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; - PrintIndent(stream, indent); - stream << "# define STR_VALUE(arg) #arg\n"; - PrintIndent(stream, indent); - stream << "# define GET_STRING(name) STR_VALUE(name)\n"; - PrintIndent(stream, indent); - stream << "# define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n" - stream << "#endif"; - - - // get the xclbin filename . - stream << "char * xclbinFilename = argv[1]\n"; - stream << "size_t \n"; - - // source memories - - - // create the test data and goldn data locally - - - - - // OpenCL HOST CODE AREA START - // get First Platform - stream << "std::vector platforms;\n"; - stream << "cl::Platform::get(&platforms)\n;"; - stream << "cl::Platform platform = platform[0];\n"; - stream << "std::cout << "" " - - // get accelerator devices and select 1st such device - - // create context and command queue for selected device - - - // load xcl binary into the buffer - - - // creat program from binary file - - // create kernel - - // create buffers inside device - - // copy input data to device buffer from host memory - - // run the kernel - - // copy device result data to host memory - // OpenCL HOST CODE AREA END - - - - // compare the results of the kernel to the simulation - - - - - for ( int i = 0;i < args.size(); i++ ) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << Type2Byte(arg_types)[i] << "*"; - PrintIndent(stream, indent); - - - } - } - - // call the function - PrintIndent(stream, indent); - stream << func->name << "("; - for (int i = 0;i < args.size();i++) { - if (i != args.size()-1) { - stream << ", "; - } - } - stream << ");\n"; - - // copy to shared mem - for (int i = 0;i < args.size();i++ ) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - PrintIndent(stream, indent); - } - } - stream << "}\n"; - stream.close(); -} -} // namespace - -class SDAccelModuleNode final : public ModuleNode { - public: - SDAccelModuleNode(LoweredFunc func, std::string test_file) - : func_(func), test_file_(test_file) {} - - const char* type_key() const { - return "sdaccel_sw_emu"; - } - - PackedFunc GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self) final { - return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ - if (args.size() != (int)func_->args.size()) - LOG(FATAL) << "The function should take in " << func_->args.size() - << " inputs but get " << args.size(); - std::vector arg_sizes; - std::vector arg_types; - std::vector shmids; - CollectArgInfo(args, func_, arg_sizes, arg_types); - GenSharedMem(args, shmids, arg_sizes); - GenHostCode(args, shmids, arg_types, func_, test_file_); - // TODO: find a better way to do the following - LOG(CLEAN) << "Compiling the generated SDAccel OpenCL code ..."; - LOG(CLEAN) << "Running SDAccel OpenCL simulation ..."; - system("make -f sdaccel.mk run_cpu_em"); - LOG(CLEAN) << "Finished SDAccel OpenCL simulation"; - system("make -f sdaccel.mk cleanall"); - FreeSharedMem(args, shmids, arg_sizes); - }); - } - - private: - LoweredFunc func_; - std::string test_file_; -}; - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code) { - - std::shared_ptr n = - std::make_shared(func, code); - - return Module(n); -} - - -} // namespace runtime -} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h deleted file mode 100755 index cfdf8cadd..000000000 --- a/tvm/src/codegen/opencl/sdaccel/sdaccel_module.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - -#ifndef SDACCEL_MODULE_H -#define SDACCEL_MODULE_H - -# include -# include -# include "../../build_common.h" - -namespace TVM { -namespace runtime { - -Module CreateSDAccelModule( - LoweredFunc func, - std::string code); - -} // namespace runtime -} // namespace TVM - -#endif \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc new file mode 100644 index 000000000..dff45809d --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -0,0 +1,408 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file build_vhls.cc + * \brief Build HLS C modules from source. + */ +#include "./sdaccel_module.h" +#include +#include +#include +#include +#include + +namespace TVM { +namespace runtime { + +namespace { + +void PrintIndent(std::ofstream& stream, int indent) { + for (int i = 0; i < indent; i++) + stream << ' '; +} + +inline size_t GetTypeSize(TVMType t) { + size_t byte = (t.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + return byte; +} + +inline size_t GetDataSize(TVMArray* arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= arr->shape[i]; + } + size_t byte = (arr->dtype.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + size *= (byte * 8 * arr->dtype.lanes + 7) / 8; + return size; +} + +inline TVMType Type2TVMType(Type t) { + TVMType tt; + if (t.is_int()) tt.code = kDLInt; + else if (t.is_uint()) tt.code = kDLUInt; + else if (t.is_float()) tt.code = kDLFloat; + else LOG(FATAL) << "Unacceptable type: " << t; + tt.bits = static_cast(t.bits()); + tt.fracs = static_cast(t.fracs()); + return tt; +} + +inline std::string Type2Str(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) str += "ap_fixed<"; + else str += "ap_int<"; + str += std::to_string(static_cast(t.bits)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) str += "ap_ufixed<"; + else str += "ap_uint<"; + str += std::to_string(static_cast(t.bits)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2ExtStr(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) str += "ap_fixed<"; + else str += "ap_int<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) str += "ap_ufixed<"; + else str += "ap_uint<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2Byte(TVMType t) { + std::string str = ""; + if (t.code == kDLFloat) { + str += "float"; + } else if (t.code == kDLInt || t.code == kDLUInt) { + if (t.code == kDLUInt) str += "u"; + str += "int"; + if (t.bits <= 8) str += "8"; + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + str += "_t"; + } + return str; +} + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + arg_sizes.push_back(GetDataSize(arr)); + arg_types.push_back(arr->dtype); + } else { + const Variable* var = func->api_args[i].as(); + TVMType t = Type2TVMType(var->type); + arg_sizes.push_back(GetTypeSize(t)); + arg_types.push_back(t); + } + } +} + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} + +// copy values from the shared mem to local mem +void PrintCopy(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr) { + for (int i = 0; i < arr->ndim; i++) { + PrintIndent(stream, indent); + stream << "for (size_t i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + if (i == arr->ndim-1) { + PrintIndent(stream, indent); + stream << "arg_top_" << nth_arr; + for (int j = 0; j < arr->ndim; j++) { + stream << "[i" << j << "]"; + } + stream << " = ("; + stream << Type2ExtStr(arr->dtype); + stream << ")(arg_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << "])"; + if (arr->dtype.fracs > 0) + stream << " >> " << static_cast(arr->dtype.fracs); + stream << ";\n"; + } + } + for (int i = 0; i < arr->ndim; i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } +} + +// copy values from local mem back to shared mem +void PrintCopyBack(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr) { + for (int i = 0; i < arr->ndim; i++) { + PrintIndent(stream, indent); + stream << "for (size_t i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + if (i == arr->ndim-1) { + PrintIndent(stream, indent); + stream << "arg_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << "] = ("; + stream << Type2ExtStr(arr->dtype); + stream << ")(arg_top_" << nth_arr; + for (int j = 0; j < arr->ndim; j++) { + stream << "[i" << j << "]"; + } + stream << ")"; + if (arr->dtype.fracs > 0) + stream << " << " << static_cast(arr->dtype.fracs); + stream << ";\n"; + } + } + for (int i = 0; i < arr->ndim; i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } +} + +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string test_file) { + int indent = 0; + std::ofstream stream; + stream.open("main.cpp"); + + stream << "#define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; + stream << "#define CL_HPP_TARGET_OPENCL_VERSION 120\n"; + stream << "#define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; + stream << "#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1\n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#pragma once\n"; + + + + + + + stream << test_file; + + + + stream << "int main(void) { \n"; + indent += 2; + + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << "*)"; + stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + PrintIndent(stream, indent); + stream << Type2Str(arg_types[i]) << " "; + stream << "arg_top_" << i; + TVMArray* arr = args[i]; + for (int j = 0; j < arr->ndim; j++) + stream << "[" << arr->shape[j] << "]"; + stream << ";\n"; + // copy from shared mem + PrintCopy(arr, stream, indent, i); + } else { + // directly assign the value to the variable + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << ")"; + if (args[i].type_code() == kDLInt || + args[i].type_code() == kDLUInt) { + stream << int64_t(args[i]); + } + stream << ";\n"; + PrintIndent(stream, indent); + stream << Type2Str(arg_types[i]) << " "; + stream << "arg_top_" << i; + stream << " = ("; + stream << Type2ExtStr(arg_types[i]); + stream << ")(arg_" << i << ")"; + if (arg_types[i].fracs > 0) + stream << " >> " << static_cast(arg_types[i].fracs); + stream << ";\n"; + } + } + + + // call the function + PrintIndent(stream, indent); + stream << func->name << "("; + for (int i = 0; i < args.size(); i++) { + stream << "arg_top_" << i; + if (i != args.size()-1) + stream << ", "; + } + stream << ");\n"; + + // Runing Kernel + + + + + // copy to shared mem + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + PrintIndent(stream, indent); + stream << "shmdt("; + stream << "arg_" << i << ");\n"; + } + } + stream << "}\n"; + stream.close(); +} +} // namespace + +class SDAccelModuleNode final : public ModuleNode { + public: + SDAccelModuleNode(LoweredFunc func, std::string test_file) + : func_(func), test_file_(test_file) {} + + const char* type_key() const { + return "sdaccel_sw_emu"; + + } + + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final { + return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) + LOG(FATAL) << "The function should take in " << func_->args.size() + << " inputs but get " << args.size(); + std::vector arg_sizes; + std::vector arg_types; + std::vector shmids; + CollectArgInfo(args, func_, arg_sizes, arg_types); + GenSharedMem(args, shmids, arg_sizes); + GenHostCode(args, shmids, arg_types, func_, test_file_); + // TODO: find a better way to do the following + LOG(CLEAN) << "Compiling the generated SDAccel OpenCL Code ..."; + system("make -f sdaccel.mk run_cpu_em"); + LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; + // system("./out"); + LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; + system("make -f sdaccel.mk cleanall"); + FreeSharedMem(args, shmids, arg_sizes); + }); + } + + private: + LoweredFunc func_; + std::string test_file_; +}; + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code) { + + std::shared_ptr n = + std::make_shared(func, code); + + return Module(n); +} + +} // namespace runtime +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel_module.h new file mode 100644 index 000000000..6a2a89cd3 --- /dev/null +++ b/tvm/src/codegen/opencl/sdaccel_module.h @@ -0,0 +1,23 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file build_vhls.cc + * \brief Build HLS C modules from source. + */ +#ifndef SDACCEL_MODULE_H +#define SDACCEL_MODULE_H + +#include +#include +#include "../build_common.h" + +namespace TVM { +namespace runtime { + +Module CreateSDAccelModule( + LoweredFunc func, + std::string code); + +} // namespace runtime +} // namespace TVM + +#endif From fa30a01e16fa798c4c7f68dd19c4799899d295ab Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Tue, 3 Sep 2019 17:55:53 -0400 Subject: [PATCH 059/103] create the sdaccel host --- tvm/src/codegen/opencl/sdaccel_module.cc | 199 ++++++++++++++++------- 1 file changed, 144 insertions(+), 55 deletions(-) diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index dff45809d..e72133157 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -271,63 +271,75 @@ void GenHostCode(TVMArgs& args, stream << "#include \n"; stream << "#include \n"; stream << "#pragma once\n"; - + + // stream << test_file; + stream << "int main(void) { \n"; + stream << "#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; + indent += 2; + stream << "#define STR_VALUE(arg) #arg\n"; + stream << "#define GET_STRING(name) STR_VALUE(name)\n"; + stream << "#define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n"; + stream << "#endif\n"; + stream << "char* xclbinFilename = argv[1];\n"; + // Source Memories - - stream << test_file; - stream << "int main(void) { \n"; - indent += 2; + // Getting First Platform + stream << "std::vector platforms;\n"; + stream << "cl::Platform::get(&platforms);\n"; + stream << "cl::Platform platform = platforms[0];\n"; - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << "*)"; - stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - PrintIndent(stream, indent); - stream << Type2Str(arg_types[i]) << " "; - stream << "arg_top_" << i; - TVMArray* arr = args[i]; - for (int j = 0; j < arr->ndim; j++) - stream << "[" << arr->shape[j] << "]"; - stream << ";\n"; - // copy from shared mem - PrintCopy(arr, stream, indent, i); - } else { - // directly assign the value to the variable - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << ")"; - if (args[i].type_code() == kDLInt || - args[i].type_code() == kDLUInt) { - stream << int64_t(args[i]); - } - stream << ";\n"; - PrintIndent(stream, indent); - stream << Type2Str(arg_types[i]) << " "; - stream << "arg_top_" << i; - stream << " = ("; - stream << Type2ExtStr(arg_types[i]); - stream << ")(arg_" << i << ")"; - if (arg_types[i].fracs > 0) - stream << " >> " << static_cast(arg_types[i].fracs); - stream << ";\n"; - } - } + + // Getting ACCELERATOR Devices and selecting 1st such device + stream << "std::vector devices;\n"; + stream << "platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);\n"; + stream << "cl::Device device = devices[0];\n"; + + // Creating Context and Command Queue for selected Device + stream << "cl::Context context(device);\n"; + stream << "cl::CommandQueue q(context, device);\n"; + + // Loading XCL Bin into char buffer + stream << "std::ifstream bin_file(xclbinFilename, std::ifstream::binary);\n"; + stream << "bin_file.seekg (0, bin_file.end);\n"; + stream << "unsigned nb = bin_file.tellg();\n"; + stream << "bin_file.seekg (0, bin_file.beg);\n"; + stream << "char *buf = new char [nb];\n"; + stream << "bin_file.read(buf, nb);\n"; + + + // Creating Program from Binary File + stream << "cl::Program::Binaries bins;\n"; + stream << "bins.push_back({buf,nb});\n"; + stream << "devices.resize(1);\n"; + stream << "cl::Program program(context, devices, bins);\n"; - // call the function + // Creating Kernel and Functor of Kernel + stream << "int err1;\n"; + stream << "cl::Kernel kernel(program, \"default_function\", &err1);\n"; + stream << "auto default_function = cl::KernelFunctor(kernel);\n"; + + + + // Creating Buffers inside Device + + + + // Copying input data to Device buffer from host memory + + + + + // Running Kernel PrintIndent(stream, indent); stream << func->name << "("; + stream << "cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),"; for (int i = 0; i < args.size(); i++) { stream << "arg_top_" << i; if (i != args.size()-1) @@ -335,21 +347,99 @@ void GenHostCode(TVMArgs& args, } stream << ");\n"; - // Runing Kernel + stream << "q.finish()\n"; + // Copying Device result data to Host memory - // copy to shared mem - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - PrintIndent(stream, indent); - stream << "shmdt("; - stream << "arg_" << i << ");\n"; + + + + + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << Type2Str(arg_types[i]) << " "; + stream << "arg_" << i; + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + stream << "[" << arr->shape[j] << "]"; } + stream << ";\n"; } + + + + + + + + // for (int i = 0; i < args.size(); i++) { + // if (args[i].type_code() == kArrayHandle) { + // // read from the shared memory + // PrintIndent(stream, indent); + // stream << Type2Byte(arg_types[i]) << "* "; + // stream << "arg_" << i << " = "; + // stream << "(" << Type2Byte(arg_types[i]) << "*)"; + // stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + // PrintIndent(stream, indent); + // stream << Type2Str(arg_types[i]) << " "; + // stream << "arg_top_" << i; + // TVMArray* arr = args[i]; + // for (int j = 0; j < arr->ndim; j++) + // stream << "[" << arr->shape[j] << "]"; + // stream << ";\n"; + // // copy from shared mem + // PrintCopy(arr, stream, indent, i); + // } else { + // // directly assign the value to the variable + // PrintIndent(stream, indent); + // stream << Type2Byte(arg_types[i]) << " "; + // stream << "arg_" << i << " = "; + // stream << "(" << Type2Byte(arg_types[i]) << ")"; + // if (args[i].type_code() == kDLInt || + // args[i].type_code() == kDLUInt) { + // stream << int64_t(args[i]); + // } + // stream << ";\n"; + // PrintIndent(stream, indent); + // stream << Type2Str(arg_types[i]) << " "; + // stream << "arg_top_" << i; + // stream << " = ("; + // stream << Type2ExtStr(arg_types[i]); + // stream << ")(arg_" << i << ")"; + // if (arg_types[i].fracs > 0) + // stream << " >> " << static_cast(arg_types[i].fracs); + // stream << ";\n"; + // } + // } + + + // // call the function + // PrintIndent(stream, indent); + // stream << func->name << "("; + // for (int i = 0; i < args.size(); i++) { + // stream << "arg_top_" << i; + // if (i != args.size()-1) + // stream << ", "; + // } + // stream << ");\n"; + + // // Runing Kernel + + + + + // // copy to shared mem + // for (int i = 0; i < args.size(); i++) { + // if (args[i].type_code() == kArrayHandle) { + // TVMArray* arr = args[i]; + // PrintCopyBack(arr, stream, indent, i); + // PrintIndent(stream, indent); + // stream << "shmdt("; + // stream << "arg_" << i << ");\n"; + // } + // } stream << "}\n"; stream.close(); } @@ -382,7 +472,6 @@ class SDAccelModuleNode final : public ModuleNode { LOG(CLEAN) << "Compiling the generated SDAccel OpenCL Code ..."; system("make -f sdaccel.mk run_cpu_em"); LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; - // system("./out"); LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; system("make -f sdaccel.mk cleanall"); FreeSharedMem(args, shmids, arg_sizes); From 51584e4c94beddd95251126393860a3ca09fceee Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Tue, 3 Sep 2019 19:30:05 -0400 Subject: [PATCH 060/103] fixed the indent problem partly --- samples/gemm/gemm_sdaccel.py | 1 + tvm/src/codegen/opencl/sdaccel_module.cc | 91 ++++++++++++++++++------ 2 files changed, 72 insertions(+), 20 deletions(-) diff --git a/samples/gemm/gemm_sdaccel.py b/samples/gemm/gemm_sdaccel.py index d9ae115ef..37da40980 100644 --- a/samples/gemm/gemm_sdaccel.py +++ b/samples/gemm/gemm_sdaccel.py @@ -4,4 +4,5 @@ #dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] #for dtype in dtypes: +#time_gemm(hcl.Int(32), 10, 10, 10, 'sdaccel_sw_emu') time_gemm(hcl.Int(32), 10, 10, 10, 'sdaccel_sw_emu') diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index e72133157..f08932480 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -15,11 +15,14 @@ namespace runtime { namespace { + void PrintIndent(std::ofstream& stream, int indent) { for (int i = 0; i < indent; i++) stream << ' '; } + + inline size_t GetTypeSize(TVMType t) { size_t byte = (t.bits + 7) / 8; if (byte > 2){ @@ -30,6 +33,8 @@ inline size_t GetTypeSize(TVMType t) { return byte; } + + inline size_t GetDataSize(TVMArray* arr) { size_t size = 1; for (tvm_index_t i = 0; i < arr->ndim; ++i) { @@ -45,6 +50,8 @@ inline size_t GetDataSize(TVMArray* arr) { return size; } + + inline TVMType Type2TVMType(Type t) { TVMType tt; if (t.is_int()) tt.code = kDLInt; @@ -56,28 +63,45 @@ inline TVMType Type2TVMType(Type t) { return tt; } +// inline std::string Type2Str(TVMType t) { +// std::string str = ""; +// if (t.code == kDLInt) { +// if (t.fracs > 0) str += "ap_fixed<"; +// else str += "ap_int<"; +// str += std::to_string(static_cast(t.bits)); +// if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; +// else str += ">"; +// } else if (t.code == kDLUInt) { +// if (t.fracs > 0) str += "ap_ufixed<"; +// else str += "ap_uint<"; +// str += std::to_string(static_cast(t.bits)); +// if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; +// else str += ">"; +// } else if (t.code == kDLFloat) { +// str += "float"; +// } else { +// LOG(FATAL) << "Unknown type"; +// } +// return str; +// } + inline std::string Type2Str(TVMType t) { - std::string str = ""; + std::string = ""; if (t.code == kDLInt) { - if (t.fracs > 0) str += "ap_fixed<"; - else str += "ap_int<"; - str += std::to_string(static_cast(t.bits)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) str += "ap_ufixed<"; - else str += "ap_uint<"; - str += std::to_string(static_cast(t.bits)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - else str += ">"; + str += "int"; + } else if (t.code == kDLInt) { + str += "unsigned int"; } else if (t.code == kDLFloat) { str += "float"; - } else { + } + else { LOG(FATAL) << "Unknown type"; } return str; } + + inline std::string Type2ExtStr(TVMType t) { std::string str = ""; if (t.code == kDLInt) { @@ -254,6 +278,7 @@ void GenHostCode(TVMArgs& args, std::ofstream stream; stream.open("main.cpp"); + stream << "#define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; stream << "#define CL_HPP_TARGET_OPENCL_VERSION 120\n"; stream << "#define CL_HPP_MINIMUM_OPENCL_VERSION 120\n"; @@ -271,37 +296,54 @@ void GenHostCode(TVMArgs& args, stream << "#include \n"; stream << "#include \n"; stream << "#pragma once\n"; + stream << "\n\n"; // stream << test_file; stream << "int main(void) { \n"; stream << "#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; indent += 2; - stream << "#define STR_VALUE(arg) #arg\n"; - stream << "#define GET_STRING(name) STR_VALUE(name)\n"; - stream << "#define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n"; + stream << " #define STR_VALUE(arg) #arg\n"; + stream << " #define GET_STRING(name) STR_VALUE(name)\n"; + stream << " #define TARGET_DEVICE GET_STRING(SDX_PLATFORM)\n"; stream << "#endif\n"; + // get the krnl code + PrintIndent(stream, indent); stream << "char* xclbinFilename = argv[1];\n"; + // Source Memories + // std::vector source_a(LENGTH); + + + + // Getting First Platform + PrintIndent(stream, indent); stream << "std::vector platforms;\n"; + PrintIndent(stream, indent); stream << "cl::Platform::get(&platforms);\n"; + PrintIndent(stream, indent); stream << "cl::Platform platform = platforms[0];\n"; // Getting ACCELERATOR Devices and selecting 1st such device + PrintIndent(stream, indent); stream << "std::vector devices;\n"; + PrintIndent(stream, indent); stream << "platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);\n"; + PrintIndent(stream, indent); stream << "cl::Device device = devices[0];\n"; // Creating Context and Command Queue for selected Device + PrintIndent(stream, indent); stream << "cl::Context context(device);\n"; + PrintIndent(stream, indent); stream << "cl::CommandQueue q(context, device);\n"; // Loading XCL Bin into char buffer @@ -328,10 +370,16 @@ void GenHostCode(TVMArgs& args, // Creating Buffers inside Device + // cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, vector_size_bytes); + // cl::Buffer buffer_b(context, CL_MEM_WRITE_ONLY, vector_size_bytes); + + // Copying input data to Device buffer from host memory + // q.enqueueWriteBuffer(buffer_a, CL_TRUE, 0, vector_size_bytes, source_a.data()); + @@ -347,10 +395,12 @@ void GenHostCode(TVMArgs& args, } stream << ");\n"; + PrintIndent(stream, indent); stream << "q.finish()\n"; // Copying Device result data to Host memory + // q.enqueueReadBuffer(buffer_c, CL_TRUE, 0, vector_size_bytes, result_krnl.data()); @@ -359,7 +409,8 @@ void GenHostCode(TVMArgs& args, for (int i = 0;i < args.size();i++) { PrintIndent(stream, indent); - stream << Type2Str(arg_types[i]) << " "; + // stream << Type2Str(arg_types[i]) << " "; + stream << arg_types[i] << " "; stream << "arg_" << i; TVMArray* arr = args[i]; for (int j = 0;j < arr->ndim;j++) { @@ -425,9 +476,6 @@ void GenHostCode(TVMArgs& args, // } // stream << ");\n"; - // // Runing Kernel - - // // copy to shared mem @@ -440,11 +488,14 @@ void GenHostCode(TVMArgs& args, // stream << "arg_" << i << ");\n"; // } // } + stream << "}\n"; stream.close(); } } // namespace + + class SDAccelModuleNode final : public ModuleNode { public: SDAccelModuleNode(LoweredFunc func, std::string test_file) From c5239bf3574d6a872e6e95a5e2b99e037b81ce0c Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 4 Sep 2019 12:27:35 -0400 Subject: [PATCH 061/103] test the zhang-05 server --- tvm/src/codegen/opencl/sdaccel_module.cc | 28 ++++++++++++++++-------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index f08932480..89876fcf7 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -63,6 +63,7 @@ inline TVMType Type2TVMType(Type t) { return tt; } + // inline std::string Type2Str(TVMType t) { // std::string str = ""; // if (t.code == kDLInt) { @@ -86,15 +87,24 @@ inline TVMType Type2TVMType(Type t) { // } inline std::string Type2Str(TVMType t) { - std::string = ""; + std::string str = ""; if (t.code == kDLInt) { str += "int"; - } else if (t.code == kDLInt) { - str += "unsigned int"; + // if (t.fracs > 0) str += "ap_fixed<"; + // else str += "ap_int<"; + // str += std::to_string(static_cast(t.bits)); + // if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + // else str += ">"; + } else if (t.code == kDLUInt) { + str += 'unsigned'; + // if (t.fracs > 0) str += "ap_ufixed<"; + // else str += "ap_uint<"; + // str += std::to_string(static_cast(t.bits)); + // if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + // else str += ">"; } else if (t.code == kDLFloat) { str += "float"; - } - else { + } else { LOG(FATAL) << "Unknown type"; } return str; @@ -517,15 +527,15 @@ class SDAccelModuleNode final : public ModuleNode { std::vector arg_types; std::vector shmids; CollectArgInfo(args, func_, arg_sizes, arg_types); - GenSharedMem(args, shmids, arg_sizes); + // GenSharedMem(args, shmids, arg_sizes); GenHostCode(args, shmids, arg_types, func_, test_file_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated SDAccel OpenCL Code ..."; - system("make -f sdaccel.mk run_cpu_em"); + // system("make -f sdaccel.mk run_cpu_em"); LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; - system("make -f sdaccel.mk cleanall"); - FreeSharedMem(args, shmids, arg_sizes); + // system("make -f sdaccel.mk cleanall"); + // FreeSharedMem(args, shmids, arg_sizes); }); } From 7991a81385ba0d5b3e71b9d2f3e211c15fe663d0 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 4 Sep 2019 12:35:13 -0400 Subject: [PATCH 062/103] add indent to the host.cpp --- tvm/src/codegen/opencl/sdaccel_module.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 89876fcf7..40926eab1 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -287,7 +287,7 @@ void GenHostCode(TVMArgs& args, int indent = 0; std::ofstream stream; stream.open("main.cpp"); - + indent += 2; stream << "#define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; stream << "#define CL_HPP_TARGET_OPENCL_VERSION 120\n"; From 104e5e6248052165084626b5a264dd729c96603c Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 4 Sep 2019 21:32:45 -0400 Subject: [PATCH 063/103] automatically generate makefile --- samples/gemm/common.mk | 4 + samples/gemm/kernel.py | 71 ++++++ samples/gemm/main.cpp | 80 +++++++ tvm/src/codegen/opencl/build_opencl.cc | 2 - tvm/src/codegen/opencl/sdaccel_module.cc | 283 ++++++++++++++++++----- 5 files changed, 377 insertions(+), 63 deletions(-) create mode 100644 samples/gemm/common.mk create mode 100644 samples/gemm/kernel.py create mode 100644 samples/gemm/main.cpp diff --git a/samples/gemm/common.mk b/samples/gemm/common.mk new file mode 100644 index 000000000..7d3530a5b --- /dev/null +++ b/samples/gemm/common.mk @@ -0,0 +1,4 @@ +SHELL = /bin/bash +VPATH = ./ +CC = xcpp +CLCC = xocc diff --git a/samples/gemm/kernel.py b/samples/gemm/kernel.py new file mode 100644 index 000000000..1ce0b59a0 --- /dev/null +++ b/samples/gemm/kernel.py @@ -0,0 +1,71 @@ +# Yang.Bai +# yb269@cornell.edu + +import heterocl as hcl +import numpy as np + +hcl.init() + +matrix_size = (16, 16) +# def add_compute(A, B): +# C = hcl.compute(A.shape, lambda x, y: A[x, y] + B[x, y], "C") +# return C + +# def add_compute_2(A, B): +# C = hcl.compute(A.shape, lambda x: A[x] + B[x], "C") +# return C + +# A = hcl.placeholder(matrix_size, "A") +# B = hcl.placeholder(matrix_size, "B") + +# s = hcl.create_schedule([A, B], add_compute) +# # f2 = hcl.build(s, target='sdaccel') +# f2 = hcl.build(s, target='aocl') +# print (f2) + +# hcl_A = hcl.asarray(np.random.random_sample(matrix_size), dtype = hcl.Float()) +# hcl_B = hcl.asarray(np.random.random_sample(matrix_size), dtype = hcl.Float()) +# hcl_C = hcl.asarray(np.zeros(matrix_size), dtype = hcl.Float()) +# hcl_C2 = hcl.asarray(np.zeros(matrix_size), dtype = hcl.Float()) +# f3 = hcl.build(s) + +# A = hcl.placeholder((10, ), "A") +# B = hcl.placeholder((10, ), "B") +# s = hcl.create_schedule([A, B], add_compute_2) +# f4 = hcl.build(s, target='sdaccel') +# print (f4) +# print (hcl_A, hcl_B, hcl_C) + +def gemm_compute(matrix_1, matrix_2): + m = n = k = 3 + r = hcl.reduce_axis(0, k, 'k') + temp = hcl.compute((m, n), + lambda x, y: hcl.sum(matrix_1[x, r] * matrix_2[r, y], + axis = r), name='matrix_3') + return temp + +matrix_1 = hcl.placeholder((3, 3)) +matrix_2 = hcl.placeholder((3, 3)) + +s = hcl.create_schedule([matrix_1, matrix_2], gemm_compute) +f = hcl.build(s, target='sdaccel_sw_emu') + +matrix_1_np = np.array([[1,2,3],[4,5,6],[7,8,9]]) +matrix_2_np = np.array(([4,5,6],[1,2,2],[7,8,9])) +matrix_3_np = np.array([[0,0,0],[0,0,9],[0,0,0]]) + +hcl_matrix_1 = hcl.asarray(matrix_1_np) +hcl_matrix_2 = hcl.asarray(matrix_2_np) +hcl_matrix_3 = hcl.asarray(matrix_3_np) + +f(hcl_matrix_1, hcl_matrix_2, hcl_matrix_3) + + + + +# with open('sdaccel.cl', 'w') as f: +# f.write(code) + + + + diff --git a/samples/gemm/main.cpp b/samples/gemm/main.cpp new file mode 100644 index 000000000..1274a26dc --- /dev/null +++ b/samples/gemm/main.cpp @@ -0,0 +1,80 @@ +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#pragma once + + + + +int main(void) { +#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) + #define STR_VALUE(arg) #arg + #define GET_STRING(name) STR_VALUE(name) + #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) +#endif + char* xclbinFilename = argv[1]; + + std::vector source_0(3 * 3); + std::vector source_1(3 * 3); + std::vector source_2(3 * 3); + + size_t vector_size_bytes_0 = sizeof(int) * 3 * 3; + size_t vector_size_bytes_1 = sizeof(int) * 3 * 3; + size_t vector_size_bytes_2 = sizeof(int) * 3 * 3; + + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform platform = platforms[0]; + + std::vector devices; + platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices); + cl::Device device = devices[0]; + + cl::Context context(device); + cl::CommandQueue q(context, device); + + std::ifstream bin_file(xclbinFilename, std::ifstream::binary); + bin_file.seekg (0, bin_file.end); + unsigned nb = bin_file.tellg(); + bin_file.seekg (0, bin_file.beg); + char *buf = new char [nb]; + bin_file.read(buf, nb); + + cl::Program::Binaries bins; + bins.push_back({buf,nb}); + devices.resize(1); + cl::Program program(context, devices, bins); + + int err1; + cl::Kernel kernel(program, "default_function", &err1); + auto default_function = cl::KernelFunctor(kernel) + + cl::Buffer buffer_0(context, CL_MEM_READWRITE, vector_size_bytes_0) + cl::Buffer buffer_1(context, CL_MEM_READWRITE, vector_size_bytes_1) + cl::Buffer buffer_2(context, CL_MEM_READWRITE, vector_size_bytes_2) + + q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()) + q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()) + q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()) + + default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2); + q.finish() + + q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()) + q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()) + q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()) + +} diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 917a52de8..06dbd7c2a 100755 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -27,7 +27,6 @@ runtime::Module BuildSDAccelSim(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); - std::cout << code; return runtime::CreateSDAccelModule(funcs[0], code); } @@ -38,7 +37,6 @@ TVM_REGISTER_API("codegen.build_sdaccel_sw_emu") #endif - template std::string BuildOpenCL(Array funcs){ using TVM::runtime::Registry; diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 40926eab1..0b6a5be6e 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace TVM { namespace runtime { @@ -96,7 +98,7 @@ inline std::string Type2Str(TVMType t) { // if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; // else str += ">"; } else if (t.code == kDLUInt) { - str += 'unsigned'; + str += "unsigned int"; // if (t.fracs > 0) str += "ap_ufixed<"; // else str += "ap_uint<"; // str += std::to_string(static_cast(t.bits)); @@ -139,13 +141,13 @@ inline std::string Type2Byte(TVMType t) { if (t.code == kDLFloat) { str += "float"; } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "u"; + if (t.code == kDLUInt) str += "unsigned"; str += "int"; if (t.bits <= 8) str += "8"; else if (t.bits <= 16) str += "16"; else if (t.bits <= 32) str += "32"; else str += "64"; - str += "_t"; + // str += "_t"; } return str; } @@ -168,40 +170,40 @@ void CollectArgInfo(TVMArgs& args, } } -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} +// void GenSharedMem(TVMArgs& args, +// std::vector& shmids, +// std::vector& arg_sizes) { +// for (int i = 0; i < args.size(); i++) { +// if (args[i].type_code() == kArrayHandle) { +// TVMArray* arr = args[i]; +// // generate shared memory key and id +// // TODO: maybe get the current path?? +// key_t key = ftok("/", i+1); +// int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); +// shmids.push_back(shmid); +// // copy mem from TVM args to the shared memory +// void* mem = shmat(shmid, nullptr, 0); +// memcpy(mem, arr->data, arg_sizes[i]); +// } else { +// shmids.push_back(0); +// } +// } +// } -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } - } -} +// void FreeSharedMem(TVMArgs& args, +// const std::vector& shmids, +// std::vector& arg_sizes) { +// for (size_t i = 0; i < shmids.size(); i++) { +// if (args[i].type_code() == kArrayHandle) { +// TVMArray* arr = args[i]; +// int shmid = shmids[i]; +// void* mem = shmat(shmid, nullptr, 0); +// memcpy(arr->data, mem, arg_sizes[i]); +// shmdt(mem); +// shmctl(shmid, IPC_RMID, nullptr); +// } +// } +// } // copy values from the shared mem to local mem void PrintCopy(TVMArray* arr, @@ -279,6 +281,75 @@ void PrintCopyBack(TVMArray* arr, } } + +void GenMakFile() { + int indent = 0; + std::ofstream stream; + stream.open("sdaccel.mk"); + indent += 4; + + stream << "ifndef XILINX_SDX\n"; + stream << "$(error Environment variable XILINX_SDX is required and should point to SDAccel install area)\n"; + stream << "endif\n"; + + stream << "SDA_FLOW = cpu_emu\n"; + stream << "HOST_SRCS = host.cpp\n"; + stream << "HOST_EXE_DIR=.\n"; + stream << "HOST_EXE = host\n"; + stream << "HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL\n"; + stream << "HOST_LFLAGS = \n"; + stream << "KERNEL_SRCS = default_function.cl\n"; + stream << "KERNEL_NAME = default_function\n"; + stream << "KERNEL_DEFS =\n"; + stream << "KERNEL_INCS =\n"; + stream << "XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0\n"; + stream << "XDEVICE_REPO_PATH=\n"; + stream << "KEEP_TEMP=1\n"; + stream << "KERNEL_DEBUG=\n"; + stream << "XCLBIN_NAME=bin_krnl\n"; + stream << "HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\"\n"; + stream << "BOARD_SETUP_FILE=setup.sh\n"; + stream << "ifeq (${SDA_FLOW},cpu_emu)\n"; + PrintIndent(stream, indent); + stream << "CLCC_OPT += -t sw_emu\n"; + PrintIndent(stream, indent); + stream << "XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin\n"; + stream << "else ifeq (${SDA_FLOW},hw_emu)\n"; + PrintIndent(stream, indent); + stream << "CLCC_OPT += -t hw_emu\n"; + PrintIndent(stream, indent); + stream << "XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin\n"; + stream << "else ifeq (${SDA_FLOW},hw)\n"; + PrintIndent(stream, indent); + stream << "XCLBIN = ${XCLBIN_NAME}_hw.xclbin\n"; + stream << "CLCC_OPT += -t hw\n"; + stream << "endif"; + + stream << "HOST_ARGS = ${XCLBIN}\n"; + stream << "COMMON_DIR = ./common\n"; + stream << "include ${COMMON_DIR}/common.mk\n"; + + stream.close(); + +} + +void GenCommonFile() { + int indent = 0; + std::ofstream stream; + stream.open("./common/common.mk"); + indent += 4; + stream << "SHELL = /bin/bash\n"; + stream << "VPATH = ./\n"; + stream << "CC = xcpp\n"; + stream << "CLCC = xocc\n"; + + stream.close(); + + +} + + + void GenHostCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, @@ -309,6 +380,7 @@ void GenHostCode(TVMArgs& args, stream << "\n\n"; // stream << test_file; + stream << "\n\n"; stream << "int main(void) { \n"; stream << "#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; @@ -321,15 +393,51 @@ void GenHostCode(TVMArgs& args, // get the krnl code PrintIndent(stream, indent); stream << "char* xclbinFilename = argv[1];\n"; + stream << "\n"; // Source Memories // std::vector source_a(LENGTH); + // for (int i = 0;i < args.size();i++) { + // PrintIndent(stream, indent); + // stream << Type2Str(arg_types[i]) << " "; + // stream << arg_types[i] << " "; + // stream << "arg_" << i; + // TVMArray* arr = args[i]; + // for (int j = 0;j < arr->ndim;j++) { + // stream << "[" << arr->shape[j] << "]"; + // } + // stream << ";\n"; + // } + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << "std::vector<" << Type2Str(arg_types[i]); + stream << "> "; + stream << "source_" << i << "("; + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + if (j == 0) { + stream << arr->shape[j]; + } else { + stream << " * " << arr->shape[j] << ")"; + } + } + stream << ";\n"; + } + stream << "\n"; - - - - + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << "size_t vector_size_bytes_" << i; + stream << " = sizeof(" << Type2Str(arg_types[i]); + stream << ")"; + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + stream << " * " << arr->shape[j]; + } + stream << ";\n"; + } + stream << "\n"; @@ -340,6 +448,7 @@ void GenHostCode(TVMArgs& args, stream << "cl::Platform::get(&platforms);\n"; PrintIndent(stream, indent); stream << "cl::Platform platform = platforms[0];\n"; + stream << "\n"; // Getting ACCELERATOR Devices and selecting 1st such device @@ -349,47 +458,82 @@ void GenHostCode(TVMArgs& args, stream << "platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);\n"; PrintIndent(stream, indent); stream << "cl::Device device = devices[0];\n"; + stream << "\n"; // Creating Context and Command Queue for selected Device PrintIndent(stream, indent); stream << "cl::Context context(device);\n"; PrintIndent(stream, indent); stream << "cl::CommandQueue q(context, device);\n"; + stream << "\n"; + // Loading XCL Bin into char buffer + PrintIndent(stream, indent); stream << "std::ifstream bin_file(xclbinFilename, std::ifstream::binary);\n"; + PrintIndent(stream, indent); stream << "bin_file.seekg (0, bin_file.end);\n"; + PrintIndent(stream, indent); stream << "unsigned nb = bin_file.tellg();\n"; + PrintIndent(stream, indent); stream << "bin_file.seekg (0, bin_file.beg);\n"; + PrintIndent(stream, indent); stream << "char *buf = new char [nb];\n"; + PrintIndent(stream, indent); stream << "bin_file.read(buf, nb);\n"; + stream << "\n"; // Creating Program from Binary File + PrintIndent(stream, indent); stream << "cl::Program::Binaries bins;\n"; + PrintIndent(stream, indent); stream << "bins.push_back({buf,nb});\n"; + PrintIndent(stream, indent); stream << "devices.resize(1);\n"; + PrintIndent(stream, indent); stream << "cl::Program program(context, devices, bins);\n"; + stream << "\n"; // Creating Kernel and Functor of Kernel + PrintIndent(stream, indent); stream << "int err1;\n"; + PrintIndent(stream, indent); stream << "cl::Kernel kernel(program, \"default_function\", &err1);\n"; - stream << "auto default_function = cl::KernelFunctor(kernel);\n"; + PrintIndent(stream, indent); + stream << "auto default_function = cl::KernelFunctor<"; + for (int i = 0;i < args.size();i++) { + if (i == args.size() - 1) { + stream << "cl::Buffer&>(kernel)\n"; + } else { + stream << "cl::Buffer&, "; + } + } + // stream << "auto default_function = cl::KernelFunctor(kernel);\n"; + stream << "\n"; // Creating Buffers inside Device // cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, vector_size_bytes); // cl::Buffer buffer_b(context, CL_MEM_WRITE_ONLY, vector_size_bytes); - - - - + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << "cl::Buffer buffer_" << i; + stream << "(context, CL_MEM_READWRITE, vector_size_bytes_" << i << ")\n"; + } + stream << "\n"; // Copying input data to Device buffer from host memory // q.enqueueWriteBuffer(buffer_a, CL_TRUE, 0, vector_size_bytes, source_a.data()); - + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << "q.enqueueWriteBuffer(buffer_" << i; + stream << ", CL_TRUE, 0, vector_size_bytes_" << i; + stream << ", source_" << i << ".data())\n"; + } + stream << "\n"; @@ -399,7 +543,7 @@ void GenHostCode(TVMArgs& args, stream << func->name << "("; stream << "cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),"; for (int i = 0; i < args.size(); i++) { - stream << "arg_top_" << i; + stream << "buffer_" << i; if (i != args.size()-1) stream << ", "; } @@ -407,28 +551,36 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "q.finish()\n"; + stream << "\n"; // Copying Device result data to Host memory // q.enqueueReadBuffer(buffer_c, CL_TRUE, 0, vector_size_bytes, result_krnl.data()); + for (int i = 0;i < args.size(); i++) { + PrintIndent(stream, indent); + stream << "q.enqueueReadBuffer(buffer_" << i; + stream << ", CL_TRUE, 0, vector_size_bytes_" << i; + stream << ", source_" << i << ".data())\n"; + } + stream << "\n"; + // Print the type of each + // for (int i = 0;i < args.size();i++) { + // PrintIndent(stream, indent); + // // stream << Type2Str(arg_types[i]) << " "; + // stream << arg_types[i] << " "; + // stream << "arg_" << i; + // TVMArray* arr = args[i]; + // for (int j = 0;j < arr->ndim;j++) { + // stream << "[" << arr->shape[j] << "]"; + // } + // stream << ";\n"; + // } - for (int i = 0;i < args.size();i++) { - PrintIndent(stream, indent); - // stream << Type2Str(arg_types[i]) << " "; - stream << arg_types[i] << " "; - stream << "arg_" << i; - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - stream << "[" << arr->shape[j] << "]"; - } - stream << ";\n"; - } - @@ -520,6 +672,7 @@ class SDAccelModuleNode final : public ModuleNode { const std::string& name, const std::shared_ptr& sptr_to_self) final { return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) LOG(FATAL) << "The function should take in " << func_->args.size() << " inputs but get " << args.size(); @@ -528,13 +681,21 @@ class SDAccelModuleNode final : public ModuleNode { std::vector shmids; CollectArgInfo(args, func_, arg_sizes, arg_types); // GenSharedMem(args, shmids, arg_sizes); + LOG(CLEAN) << "Creating a Host file for SDAccel Runtime ..."; GenHostCode(args, shmids, arg_types, func_, test_file_); + + LOG(CLEAN) << "Creating a Common folder for common.mk ..."; + system("mkdir common"); + GenCommonFile(); + + LOG(CLEAN) << "Creating a Makfile for compling the SDAccel OpenCL Code ..."; + GenMakFile(); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated SDAccel OpenCL Code ..."; - // system("make -f sdaccel.mk run_cpu_em"); + system("make -f ./sdaccel.mk run_cpu_em"); LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; - // system("make -f sdaccel.mk cleanall"); + system("make -f sdaccel.mk cleanall"); // FreeSharedMem(args, shmids, arg_sizes); }); } From dad3e756c4211a4f294c85489bca40ea3dd90674 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 4 Sep 2019 21:44:05 -0400 Subject: [PATCH 064/103] delete common folder from opencl --- tvm/src/codegen/opencl/common/common.mk | 150 ------------------------ 1 file changed, 150 deletions(-) delete mode 100755 tvm/src/codegen/opencl/common/common.mk diff --git a/tvm/src/codegen/opencl/common/common.mk b/tvm/src/codegen/opencl/common/common.mk deleted file mode 100755 index baf07f79b..000000000 --- a/tvm/src/codegen/opencl/common/common.mk +++ /dev/null @@ -1,150 +0,0 @@ -#******************************************************************************* -#Vendor: Xilinx -#Associated Filename: common.mk -#Purpose: Common Makefile for SDAccel Compilation -# -#******************************************************************************* -#Copyright (C) 2015-2016 XILINX, Inc. -# -#This file contains confidential and proprietary information of Xilinx, Inc. and -#is protected under U.S. and international copyright and other intellectual -#property laws. -# -#DISCLAIMER -#This disclaimer is not a license and does not grant any rights to the materials -#distributed herewith. Except as otherwise provided in a valid license issued to -#you by Xilinx, and to the maximum extent permitted by applicable law: -#(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX -#HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, -#INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR -#FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether -#in contract or tort, including negligence, or under any other theory of -#liability) for any loss or damage of any kind or nature related to, arising under -#or in connection with these materials, including for any direct, or any indirect, -#special, incidental, or consequential loss or damage (including loss of data, -#profits, goodwill, or any type of loss or damage suffered as a result of any -#action brought by a third party) even if such damage or loss was reasonably -#foreseeable or Xilinx had been advised of the possibility of the same. -# -#CRITICAL APPLICATIONS -#Xilinx products are not designed or intended to be fail-safe, or for use in any -#application requiring fail-safe performance, such as life-support or safety -#devices or systems, Class III medical devices, nuclear facilities, applications -#related to the deployment of airbags, or any other applications that could lead -#to death, personal injury, or severe property or environmental damage -#(individually and collectively, "Critical Applications"). Customer assumes the -#sole risk and liability of any use of Xilinx products in Critical Applications, -#subject only to applicable laws and regulations governing limitations on product -#liability. -# -#THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT -#ALL TIMES. -# -#******************************************************************************* -SHELL = /bin/bash -VPATH = ./ - -#supported flow: cpu_emu, hw_emu, hw -CC = xcpp -CLCC = xocc - -ifeq ($(XDEVICE_REPO_PATH),) -#no device repo path set. do nothing - DEVICE_REPO_OPT = -else - DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} -endif - -#HOST_LFLAGS += ${XILINX_SDACCEL}/lib/lnx64.o/libstdc++.so.6 -HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 -HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread -CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} - -ifeq (${KEEP_TEMP},1) - CLCC_OPT += -s -endif - -ifeq (${KERNEL_DEBUG},1) - CLCC_OPT += -g -endif - -CLCC_OPT += --kernel ${KERNEL_NAME} -OBJECTS := $(HOST_SRCS:.cpp=.o) - -.PHONY: all - -all: run - -host: ${HOST_EXE_DIR}/${HOST_EXE} - -xbin_cpu_em: - make SDA_FLOW=cpu_emu xbin -f sdaccel.mk - -xbin_hw_em: - make SDA_FLOW=hw_emu xbin -f sdaccel.mk - -xbin_hw : - make SDA_FLOW=hw xbin -f sdaccel.mk - -xbin: ${XCLBIN} - -run_cpu_em: - make SDA_FLOW=cpu_emu run_em -f sdaccel.mk - -run_hw_em: - make SDA_FLOW=hw_emu run_em -f sdaccel.mk - -run_hw : - make SDA_FLOW=hw run_hw_int -f sdaccel.mk - -run_em: xconfig host xbin - XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} - -run_hw_int : host xbin_hw - source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} - -estimate : - ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} - -xconfig : emconfig.json - -emconfig.json : - emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . - -${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} - ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ - -${XCLBIN}: - ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} - -%.o: %.cpp - ${CC} ${HOST_CFLAGS} -c $< -o $@ - -clean: - ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil - -cleanall: clean - ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou - - -help: - @echo "Compile and run CPU emulation using default xilinx:adm-pcie-7v3:1ddr:3.0 DSA" - @echo "make -f sdaccel.mk run_cpu_em" - @echo "" - @echo "Compile and run hardware emulation using default xilinx:adm-pcie-7v3:1ddr:3.0 DSA" - @echo "make -f sdaccel.mk run_hw_em" - @echo "" - @echo "Compile host executable only" - @echo "make -f sdaccel.mk host" - @echo "" - @echo "Compile XCLBIN file for system run only" - @echo "make -f sdaccel.mk xbin_hw" - @echo "" - @echo "Compile and run CPU emulation using xilinx:tul-pcie3-ku115:2ddr:3.0 DSA" - @echo "make -f sdaccel.mk XDEVICE=xilinx:tul-pcie3-ku115:2ddr:3.0 run_cpu_em" - @echo "" - @echo "Clean working diretory" - @echo "make -f sdaccel.mk clean" - @echo "" - @echo "Super clean working directory" - @echo "make -f sdaccel.mk cleanall" From 59f5f7e869eef7a8f3577e9be59c20e9cd11453e Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 5 Sep 2019 14:28:23 -0400 Subject: [PATCH 065/103] add shmat to sdaccel runtime --- samples/gemm/common/common.mk | 55 +++++ samples/gemm/main.cpp | 76 +++++-- samples/gemm/sdaccel.mk | 32 +++ tvm/src/codegen/opencl/sdaccel_module.cc | 243 +++++++++++++++-------- 4 files changed, 304 insertions(+), 102 deletions(-) create mode 100644 samples/gemm/common/common.mk create mode 100644 samples/gemm/sdaccel.mk diff --git a/samples/gemm/common/common.mk b/samples/gemm/common/common.mk new file mode 100644 index 000000000..3409e4aa5 --- /dev/null +++ b/samples/gemm/common/common.mk @@ -0,0 +1,55 @@ +SHELL = /bin/bash +VPATH = ./ +CC = xcpp +CLCC = xocc +ifeq ($(XDEVICE_REPO_PATH),) + DEVICE_REPO_OPT = +else +DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} +endif +HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 +HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread +CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} +ifeq (${KEEP_TEMP},1) + CLCC_OPT += -s +endif +ifeq (${KERNEL_DEBUG},1) + CLCC_OPT += -g +endif +CLCC_OPT += --kernel ${KERNEL_NAME} +OBJECTS := $(HOST_SRCS:.cpp=.o) +.PHONY: all +all: run +host: ${HOST_EXE_DIR}/${HOST_EXE} +xbin_cpu_em: + make SDA_FLOW=cpu_emu xbin -f sdaccel.mk +xbin_hw_em: + make SDA_FLOW=hw_emu xbin -f sdaccel.mk +xbin_hw : + make SDA_FLOW=hw xbin -f sdaccel.mk +xbin: ${XCLBIN} +run_cpu_em: + make SDA_FLOW=cpu_emu run_em -f sdaccel.mk +run_hw_em: + make SDA_FLOW=hw_emu run_em -f sdaccel.mk +run_hw : + make SDA_FLOW=hw run_hw_int -f sdaccel.mk +run_em: xconfig host xbin + XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} +run_hw_int : host xbin_hw + source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} +estimate : + ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} +xconfig : emconfig.json +emconfig.json : + emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . +${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} + ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ +${XCLBIN}: + ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} +%.o: %.cpp + ${CC} ${HOST_CFLAGS} -c $< -o $@ +clean: + ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil +cleanall: clean + ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou diff --git a/samples/gemm/main.cpp b/samples/gemm/main.cpp index 1274a26dc..bc38036e6 100644 --- a/samples/gemm/main.cpp +++ b/samples/gemm/main.cpp @@ -27,14 +27,32 @@ int main(void) { #endif char* xclbinFilename = argv[1]; - std::vector source_0(3 * 3); - std::vector source_1(3 * 3); - std::vector source_2(3 * 3); - - size_t vector_size_bytes_0 = sizeof(int) * 3 * 3; - size_t vector_size_bytes_1 = sizeof(int) * 3 * 3; - size_t vector_size_bytes_2 = sizeof(int) * 3 * 3; - + std::vector source_0(10 * 10); + std::vector source_1(10 * 10); + std::vector source_2(10 * 10); + + size_t vector_size_bytes_0 = sizeof(int) * 10 * 10; + size_t vector_size_bytes_1 = sizeof(int) * 10 * 10; + size_t vector_size_bytes_2 = sizeof(int) * 10 * 10; + + int* arg_0 = (int*)shmat(2555905, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_0[i1 + i0*10] = arg_0[i1 + i0*10]; + } + } + int* arg_1 = (int*)shmat(2555904, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_1[i1 + i0*10] = arg_1[i1 + i0*10]; + } + } + int* arg_2 = (int*)shmat(2293765, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_2[i1 + i0*10] = arg_2[i1 + i0*10]; + } + } std::vector platforms; cl::Platform::get(&platforms); cl::Platform platform = platforms[0]; @@ -62,19 +80,37 @@ int main(void) { cl::Kernel kernel(program, "default_function", &err1); auto default_function = cl::KernelFunctor(kernel) - cl::Buffer buffer_0(context, CL_MEM_READWRITE, vector_size_bytes_0) - cl::Buffer buffer_1(context, CL_MEM_READWRITE, vector_size_bytes_1) - cl::Buffer buffer_2(context, CL_MEM_READWRITE, vector_size_bytes_2) + cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); + cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); + cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2); - q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()) - q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()) - q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()) + q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); + q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); + q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2); - q.finish() - - q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()) - q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()) - q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()) - + q.finish(); + + q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); + q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); + q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); + + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_0[i1 + i0*10] = source_0[i1 + i0*10]; + } + } + shmdt(source_0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_1[i1 + i0*10] = source_1[i1 + i0*10]; + } + } + shmdt(source_1); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_2[i1 + i0*10] = source_2[i1 + i0*10]; + } + } + shmdt(source_2); } diff --git a/samples/gemm/sdaccel.mk b/samples/gemm/sdaccel.mk new file mode 100644 index 000000000..ce266d89e --- /dev/null +++ b/samples/gemm/sdaccel.mk @@ -0,0 +1,32 @@ +ifndef XILINX_SDX +$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) +endif +SDA_FLOW = cpu_emu +HOST_SRCS = host.cpp +HOST_EXE_DIR=. +HOST_EXE = host +HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL +HOST_LFLAGS = +KERNEL_SRCS = default_function.cl +KERNEL_NAME = default_function +KERNEL_DEFS = +KERNEL_INCS = +XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 +XDEVICE_REPO_PATH= +KEEP_TEMP=1 +KERNEL_DEBUG= +XCLBIN_NAME=bin_krnl +HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" +BOARD_SETUP_FILE=setup.sh +ifeq (${SDA_FLOW},cpu_emu) + CLCC_OPT += -t sw_emu + XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin +else ifeq (${SDA_FLOW},hw_emu) + CLCC_OPT += -t hw_emu + XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin +else ifeq (${SDA_FLOW},hw) + XCLBIN = ${XCLBIN_NAME}_hw.xclbin +CLCC_OPT += -t hw +endifHOST_ARGS = ${XCLBIN} +COMMON_DIR = ./common +include ${COMMON_DIR}/common.mk diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 0b6a5be6e..022caec86 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -1,8 +1,8 @@ -/*! - * Copyright (c) 2018 by Contributors - * \file build_vhls.cc - * \brief Build HLS C modules from source. - */ +/* + Yang.Bai + yb269@cornell.edu +*/ + #include "./sdaccel_module.h" #include #include @@ -170,40 +170,40 @@ void CollectArgInfo(TVMArgs& args, } } -// void GenSharedMem(TVMArgs& args, -// std::vector& shmids, -// std::vector& arg_sizes) { -// for (int i = 0; i < args.size(); i++) { -// if (args[i].type_code() == kArrayHandle) { -// TVMArray* arr = args[i]; -// // generate shared memory key and id -// // TODO: maybe get the current path?? -// key_t key = ftok("/", i+1); -// int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); -// shmids.push_back(shmid); -// // copy mem from TVM args to the shared memory -// void* mem = shmat(shmid, nullptr, 0); -// memcpy(mem, arr->data, arg_sizes[i]); -// } else { -// shmids.push_back(0); -// } -// } -// } +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} -// void FreeSharedMem(TVMArgs& args, -// const std::vector& shmids, -// std::vector& arg_sizes) { -// for (size_t i = 0; i < shmids.size(); i++) { -// if (args[i].type_code() == kArrayHandle) { -// TVMArray* arr = args[i]; -// int shmid = shmids[i]; -// void* mem = shmat(shmid, nullptr, 0); -// memcpy(arr->data, mem, arg_sizes[i]); -// shmdt(mem); -// shmctl(shmid, IPC_RMID, nullptr); -// } -// } -// } +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} // copy values from the shared mem to local mem void PrintCopy(TVMArray* arr, @@ -217,20 +217,23 @@ void PrintCopy(TVMArray* arr, indent += 2; if (i == arr->ndim-1) { PrintIndent(stream, indent); - stream << "arg_top_" << nth_arr; - for (int j = 0; j < arr->ndim; j++) { - stream << "[i" << j << "]"; - } - stream << " = ("; - stream << Type2ExtStr(arr->dtype); - stream << ")(arg_" << nth_arr; + stream << "source_" << nth_arr; stream << "[i" << arr->ndim-1; int mul = 1; - for (int j = arr->ndim-2; j >= 0; j--) { + for (int j = arr->ndim-2;j >= 0;j--) { mul *= arr->shape[j+1]; stream << " + i" << j << "*" << mul; } - stream << "])"; + stream << "] = "; + stream << "arg_" << nth_arr; + stream << "[i" << arr->ndim - 1; + + int mul2 = 1; + for (int j = arr->ndim-2;j >= 0;j--) { + mul2 *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul2; + } + stream << "]"; if (arr->dtype.fracs > 0) stream << " >> " << static_cast(arr->dtype.fracs); stream << ";\n"; @@ -262,13 +265,16 @@ void PrintCopyBack(TVMArray* arr, mul *= arr->shape[j+1]; stream << " + i" << j << "*" << mul; } - stream << "] = ("; - stream << Type2ExtStr(arr->dtype); - stream << ")(arg_top_" << nth_arr; - for (int j = 0; j < arr->ndim; j++) { - stream << "[i" << j << "]"; + stream << "] = "; + // stream << Type2ExtStr(arr->dtype); + stream << "source_" << nth_arr; + stream << "[i" << arr->ndim - 1; + int mul2 = 1; + for (int j = arr->ndim-2;j >=0;j--) { + mul2 *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul2; } - stream << ")"; + stream << "]"; if (arr->dtype.fracs > 0) stream << " << " << static_cast(arr->dtype.fracs); stream << ";\n"; @@ -307,7 +313,7 @@ void GenMakFile() { stream << "KEEP_TEMP=1\n"; stream << "KERNEL_DEBUG=\n"; stream << "XCLBIN_NAME=bin_krnl\n"; - stream << "HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\"\n"; + stream << "HOST_CFLAGS+=-DTARGET_DEVICE=\\\"${XDEVICE}\\\"\n"; stream << "BOARD_SETUP_FILE=setup.sh\n"; stream << "ifeq (${SDA_FLOW},cpu_emu)\n"; PrintIndent(stream, indent); @@ -342,10 +348,77 @@ void GenCommonFile() { stream << "VPATH = ./\n"; stream << "CC = xcpp\n"; stream << "CLCC = xocc\n"; + stream << "ifeq ($(XDEVICE_REPO_PATH),)\n"; + PrintIndent(stream, indent); + stream << "DEVICE_REPO_OPT = \n"; + stream << "else\n"; + stream << "DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH}\n"; + stream << "endif\n"; + stream << "HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2\n"; + stream << "HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread\n"; + stream << "CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS}\n"; + stream << "ifeq (${KEEP_TEMP},1)\n"; + PrintIndent(stream, indent); + stream << "CLCC_OPT += -s\n"; + stream << "endif\n"; + stream << "ifeq (${KERNEL_DEBUG},1)\n"; + PrintIndent(stream, indent); + stream << "CLCC_OPT += -g\n"; + stream << "endif\n"; + stream << "CLCC_OPT += --kernel ${KERNEL_NAME}\n"; + stream << "OBJECTS := $(HOST_SRCS:.cpp=.o)\n"; + stream << ".PHONY: all\n"; + stream << "all: run\n"; + stream << "host: ${HOST_EXE_DIR}/${HOST_EXE}\n"; + stream << "xbin_cpu_em:\n"; + PrintIndent(stream, indent); + stream << "make SDA_FLOW=cpu_emu xbin -f sdaccel.mk\n"; + stream << "xbin_hw_em:\n"; + PrintIndent(stream, indent); + stream << "make SDA_FLOW=hw_emu xbin -f sdaccel.mk\n"; + stream << "xbin_hw :\n"; + PrintIndent(stream, indent); + stream << "make SDA_FLOW=hw xbin -f sdaccel.mk\n"; + stream << "xbin: ${XCLBIN}\n"; + stream << "run_cpu_em: \n"; + PrintIndent(stream, indent); + stream << "make SDA_FLOW=cpu_emu run_em -f sdaccel.mk\n"; + stream << "run_hw_em: \n"; + PrintIndent(stream, indent); + stream << "make SDA_FLOW=hw_emu run_em -f sdaccel.mk\n"; + stream << "run_hw : \n"; + PrintIndent(stream, indent); + stream << "make SDA_FLOW=hw run_hw_int -f sdaccel.mk\n"; + stream << "run_em: xconfig host xbin\n"; + PrintIndent(stream, indent); + stream << "XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS}\n"; + stream << "run_hw_int : host xbin_hw\n"; + PrintIndent(stream, indent); + stream << "source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS}\n"; + stream << "estimate : \n"; + PrintIndent(stream, indent); + stream << "${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS}\n"; + stream << "xconfig : emconfig.json\n"; + stream << "emconfig.json :\n"; + PrintIndent(stream, indent); + stream << "emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od .\n"; + stream << "${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS}\n"; + PrintIndent(stream, indent); + stream << "${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@\n"; + stream << "${XCLBIN}:\n"; + PrintIndent(stream, indent); + stream << "${CLCC} ${CLCC_OPT} ${KERNEL_SRCS}\n"; + stream << "%.o: %.cpp\n"; + PrintIndent(stream, indent); + stream << "${CC} ${HOST_CFLAGS} -c $< -o $@\n"; + stream << "clean:\n"; + PrintIndent(stream, indent); + stream << "${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil\n"; + stream << "cleanall: clean\n"; + PrintIndent(stream, indent); + stream << "${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou\n"; stream.close(); - - } @@ -383,6 +456,7 @@ void GenHostCode(TVMArgs& args, stream << "\n\n"; stream << "int main(void) { \n"; + stream << "#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE)\n"; indent += 2; stream << " #define STR_VALUE(arg) #arg\n"; @@ -439,6 +513,20 @@ void GenHostCode(TVMArgs& args, } stream << "\n"; + for (int i = 0;i < args.size();i++ ) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Str(arg_types[i]) << "* "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Str(arg_types[i]) << "*)"; + stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + TVMArray* arr = args[i]; + // copy from shared mem + PrintCopy(arr, stream, indent, i); + } + } + // Getting First Platform @@ -505,7 +593,7 @@ void GenHostCode(TVMArgs& args, stream << "auto default_function = cl::KernelFunctor<"; for (int i = 0;i < args.size();i++) { if (i == args.size() - 1) { - stream << "cl::Buffer&>(kernel)\n"; + stream << "cl::Buffer&>(kernel);\n"; } else { stream << "cl::Buffer&, "; } @@ -521,7 +609,7 @@ void GenHostCode(TVMArgs& args, for (int i = 0;i < args.size();i++) { PrintIndent(stream, indent); stream << "cl::Buffer buffer_" << i; - stream << "(context, CL_MEM_READWRITE, vector_size_bytes_" << i << ")\n"; + stream << "(context, CL_MEM_READ_WRITE, vector_size_bytes_" << i << ");\n"; } stream << "\n"; @@ -531,7 +619,7 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "q.enqueueWriteBuffer(buffer_" << i; stream << ", CL_TRUE, 0, vector_size_bytes_" << i; - stream << ", source_" << i << ".data())\n"; + stream << ", source_" << i << ".data());\n"; } stream << "\n"; @@ -550,7 +638,7 @@ void GenHostCode(TVMArgs& args, stream << ");\n"; PrintIndent(stream, indent); - stream << "q.finish()\n"; + stream << "q.finish();\n"; stream << "\n"; @@ -560,7 +648,7 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "q.enqueueReadBuffer(buffer_" << i; stream << ", CL_TRUE, 0, vector_size_bytes_" << i; - stream << ", source_" << i << ".data())\n"; + stream << ", source_" << i << ".data());\n"; } stream << "\n"; @@ -628,28 +716,19 @@ void GenHostCode(TVMArgs& args, // } - // // call the function - // PrintIndent(stream, indent); - // stream << func->name << "("; - // for (int i = 0; i < args.size(); i++) { - // stream << "arg_top_" << i; - // if (i != args.size()-1) - // stream << ", "; - // } - // stream << ");\n"; - // // copy to shared mem - // for (int i = 0; i < args.size(); i++) { - // if (args[i].type_code() == kArrayHandle) { - // TVMArray* arr = args[i]; - // PrintCopyBack(arr, stream, indent, i); - // PrintIndent(stream, indent); - // stream << "shmdt("; - // stream << "arg_" << i << ");\n"; - // } - // } + // copy to shared mem + for (int i = 0;i < args.size();i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + PrintIndent(stream, indent); + stream << "shmdt("; + stream << "source_" << i << ");\n"; + } + } stream << "}\n"; stream.close(); @@ -680,7 +759,7 @@ class SDAccelModuleNode final : public ModuleNode { std::vector arg_types; std::vector shmids; CollectArgInfo(args, func_, arg_sizes, arg_types); - // GenSharedMem(args, shmids, arg_sizes); + GenSharedMem(args, shmids, arg_sizes); LOG(CLEAN) << "Creating a Host file for SDAccel Runtime ..."; GenHostCode(args, shmids, arg_types, func_, test_file_); @@ -696,7 +775,7 @@ class SDAccelModuleNode final : public ModuleNode { LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; system("make -f sdaccel.mk cleanall"); - // FreeSharedMem(args, shmids, arg_sizes); + FreeSharedMem(args, shmids, arg_sizes); }); } From e2dd3a22635f24ec9afbfc84b20ee239e5208dbd Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 5 Sep 2019 16:26:35 -0400 Subject: [PATCH 066/103] fixed bug for sdaccel runtime seg fault --- samples/gemm/common.mk | 4 - samples/gemm/gemm_main.py | 2 - samples/gemm/main.cpp | 66 ++++----- samples/kmeans/kmeans_sdaccel.py | 27 ++++ samples/smith_waterman/common/common.mk | 55 +++++++ samples/smith_waterman/main.cpp | 135 ++++++++++++++++++ samples/smith_waterman/sdaccel.mk | 32 +++++ .../smith_waterman/smith_waterman_sdaccel.py | 24 ++++ samples/smith_waterman/smith_waterman_vhls.py | 3 +- tvm/src/codegen/hlsc/build_hlsc.cc | 1 - tvm/src/codegen/hlsc/vhls_module.cc | 4 +- tvm/src/codegen/opencl/sdaccel.mk | 84 ----------- tvm/src/codegen/opencl/sdaccel_module.cc | 72 +--------- 13 files changed, 316 insertions(+), 193 deletions(-) delete mode 100644 samples/gemm/common.mk create mode 100644 samples/kmeans/kmeans_sdaccel.py create mode 100644 samples/smith_waterman/common/common.mk create mode 100644 samples/smith_waterman/main.cpp create mode 100644 samples/smith_waterman/sdaccel.mk create mode 100644 samples/smith_waterman/smith_waterman_sdaccel.py delete mode 100755 tvm/src/codegen/opencl/sdaccel.mk diff --git a/samples/gemm/common.mk b/samples/gemm/common.mk deleted file mode 100644 index 7d3530a5b..000000000 --- a/samples/gemm/common.mk +++ /dev/null @@ -1,4 +0,0 @@ -SHELL = /bin/bash -VPATH = ./ -CC = xcpp -CLCC = xocc diff --git a/samples/gemm/gemm_main.py b/samples/gemm/gemm_main.py index 53305b98c..4796bf2fb 100644 --- a/samples/gemm/gemm_main.py +++ b/samples/gemm/gemm_main.py @@ -33,8 +33,6 @@ def kernel(matrix_1, matrix_2): def time_gemm(dtype, m=1024, n=1024, k=1024, target=None): hcl.init(dtype) f = gemm(m, n, k, dtype, target) - - print (f) np_1 = np.random.randint(10, size=(m, k)) np_2 = np.random.randint(10, size=(k, n)) np_3 = np.matmul(np_1, np_2) diff --git a/samples/gemm/main.cpp b/samples/gemm/main.cpp index bc38036e6..9e09fbc1d 100644 --- a/samples/gemm/main.cpp +++ b/samples/gemm/main.cpp @@ -13,7 +13,9 @@ #include #include #include -#include +#include +#include +#include #pragma once @@ -27,30 +29,30 @@ int main(void) { #endif char* xclbinFilename = argv[1]; - std::vector source_0(10 * 10); - std::vector source_1(10 * 10); - std::vector source_2(10 * 10); + std::vector source_0(3 * 3); + std::vector source_1(3 * 3); + std::vector source_2(3 * 3); - size_t vector_size_bytes_0 = sizeof(int) * 10 * 10; - size_t vector_size_bytes_1 = sizeof(int) * 10 * 10; - size_t vector_size_bytes_2 = sizeof(int) * 10 * 10; + size_t vector_size_bytes_0 = sizeof(int) * 3 * 3; + size_t vector_size_bytes_1 = sizeof(int) * 3 * 3; + size_t vector_size_bytes_2 = sizeof(int) * 3 * 3; - int* arg_0 = (int*)shmat(2555905, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_0[i1 + i0*10] = arg_0[i1 + i0*10]; + int* arg_0 = (int*)shmat(1441798, nullptr, 0); + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + source_0[i1 + i0*3] = arg_0[i1 + i0*3]; } } - int* arg_1 = (int*)shmat(2555904, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_1[i1 + i0*10] = arg_1[i1 + i0*10]; + int* arg_1 = (int*)shmat(1441799, nullptr, 0); + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + source_1[i1 + i0*3] = arg_1[i1 + i0*3]; } } - int* arg_2 = (int*)shmat(2293765, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_2[i1 + i0*10] = arg_2[i1 + i0*10]; + int* arg_2 = (int*)shmat(1441800, nullptr, 0); + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + source_2[i1 + i0*3] = arg_2[i1 + i0*3]; } } std::vector platforms; @@ -78,7 +80,7 @@ int main(void) { int err1; cl::Kernel kernel(program, "default_function", &err1); - auto default_function = cl::KernelFunctor(kernel) + auto default_function = cl::KernelFunctor(kernel); cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); @@ -95,22 +97,22 @@ int main(void) { q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_0[i1 + i0*10] = source_0[i1 + i0*10]; + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + arg_0[i1 + i0*3] = source_0[i1 + i0*3]; } } - shmdt(source_0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_1[i1 + i0*10] = source_1[i1 + i0*10]; + shmdt(arg_0); + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + arg_1[i1 + i0*3] = source_1[i1 + i0*3]; } } - shmdt(source_1); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_2[i1 + i0*10] = source_2[i1 + i0*10]; + shmdt(arg_1); + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + arg_2[i1 + i0*3] = source_2[i1 + i0*3]; } } - shmdt(source_2); + shmdt(arg_2); } diff --git a/samples/kmeans/kmeans_sdaccel.py b/samples/kmeans/kmeans_sdaccel.py new file mode 100644 index 000000000..c204c592e --- /dev/null +++ b/samples/kmeans/kmeans_sdaccel.py @@ -0,0 +1,27 @@ +import numpy as np +import random +import heterocl as hcl +from kmeans_main import top + +K = 16 +N = 320 +dim = 32 + +f1 = top('sdaccel_sw_emu') +#f2 = top() +points_np = np.random.randint(100, size=(N, dim)) +labels_np = np.zeros(N) +means_np = points_np[random.sample(range(N), K),:] + +hcl_points1 = hcl.asarray(points_np) +hcl_means1 = hcl.asarray(means_np) +hcl_labels1 = hcl.asarray(labels_np) + +hcl_points2 = hcl.asarray(points_np) +hcl_means2 = hcl.asarray(means_np) +hcl_labels2 = hcl.asarray(labels_np) + +f1(hcl_points1, hcl_means1, hcl_labels1) +#f2(hcl_points2, hcl_means2, hcl_labels2) + +#assert np.array_equal(hcl_labels1.asnumpy(), hcl_labels2.asnumpy()) diff --git a/samples/smith_waterman/common/common.mk b/samples/smith_waterman/common/common.mk new file mode 100644 index 000000000..3409e4aa5 --- /dev/null +++ b/samples/smith_waterman/common/common.mk @@ -0,0 +1,55 @@ +SHELL = /bin/bash +VPATH = ./ +CC = xcpp +CLCC = xocc +ifeq ($(XDEVICE_REPO_PATH),) + DEVICE_REPO_OPT = +else +DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} +endif +HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 +HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread +CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} +ifeq (${KEEP_TEMP},1) + CLCC_OPT += -s +endif +ifeq (${KERNEL_DEBUG},1) + CLCC_OPT += -g +endif +CLCC_OPT += --kernel ${KERNEL_NAME} +OBJECTS := $(HOST_SRCS:.cpp=.o) +.PHONY: all +all: run +host: ${HOST_EXE_DIR}/${HOST_EXE} +xbin_cpu_em: + make SDA_FLOW=cpu_emu xbin -f sdaccel.mk +xbin_hw_em: + make SDA_FLOW=hw_emu xbin -f sdaccel.mk +xbin_hw : + make SDA_FLOW=hw xbin -f sdaccel.mk +xbin: ${XCLBIN} +run_cpu_em: + make SDA_FLOW=cpu_emu run_em -f sdaccel.mk +run_hw_em: + make SDA_FLOW=hw_emu run_em -f sdaccel.mk +run_hw : + make SDA_FLOW=hw run_hw_int -f sdaccel.mk +run_em: xconfig host xbin + XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} +run_hw_int : host xbin_hw + source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} +estimate : + ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} +xconfig : emconfig.json +emconfig.json : + emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . +${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} + ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ +${XCLBIN}: + ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} +%.o: %.cpp + ${CC} ${HOST_CFLAGS} -c $< -o $@ +clean: + ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil +cleanall: clean + ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou diff --git a/samples/smith_waterman/main.cpp b/samples/smith_waterman/main.cpp new file mode 100644 index 000000000..f9c52809b --- /dev/null +++ b/samples/smith_waterman/main.cpp @@ -0,0 +1,135 @@ +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#pragma once + + + + +int main(void) { +#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) + #define STR_VALUE(arg) #arg + #define GET_STRING(name) STR_VALUE(name) + #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) +#endif + char* xclbinFilename = argv[1]; + + std::vector source_0(1024 * 128); + std::vector source_1(1024 * 128); + std::vector source_2(1024 * 256); + std::vector source_3(1024 * 256); + + size_t vector_size_bytes_0 = sizeof(unsigned int) * 1024 * 128; + size_t vector_size_bytes_1 = sizeof(unsigned int) * 1024 * 128; + size_t vector_size_bytes_2 = sizeof(unsigned int) * 1024 * 256; + size_t vector_size_bytes_3 = sizeof(unsigned int) * 1024 * 256; + + unsigned int* arg_0 = (unsigned int*)shmat(1507336, nullptr, 0); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 128; i1++) { + source_0[i1 + i0*128] = arg_0[i1 + i0*128]; + } + } + unsigned int* arg_1 = (unsigned int*)shmat(3145728, nullptr, 0); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 128; i1++) { + source_1[i1 + i0*128] = arg_1[i1 + i0*128]; + } + } + unsigned int* arg_2 = (unsigned int*)shmat(3145729, nullptr, 0); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 256; i1++) { + source_2[i1 + i0*256] = arg_2[i1 + i0*256]; + } + } + unsigned int* arg_3 = (unsigned int*)shmat(1769474, nullptr, 0); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 256; i1++) { + source_3[i1 + i0*256] = arg_3[i1 + i0*256]; + } + } + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform platform = platforms[0]; + + std::vector devices; + platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices); + cl::Device device = devices[0]; + + cl::Context context(device); + cl::CommandQueue q(context, device); + + std::ifstream bin_file(xclbinFilename, std::ifstream::binary); + bin_file.seekg (0, bin_file.end); + unsigned nb = bin_file.tellg(); + bin_file.seekg (0, bin_file.beg); + char *buf = new char [nb]; + bin_file.read(buf, nb); + + cl::Program::Binaries bins; + bins.push_back({buf,nb}); + devices.resize(1); + cl::Program program(context, devices, bins); + + int err1; + cl::Kernel kernel(program, "default_function", &err1); + auto default_function = cl::KernelFunctor(kernel); + + cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); + cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); + cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2); + cl::Buffer buffer_3(context, CL_MEM_READ_WRITE, vector_size_bytes_3); + + q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); + q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); + q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); + q.enqueueWriteBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); + + default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2, buffer_3); + q.finish(); + + q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); + q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); + q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); + q.enqueueReadBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); + + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 128; i1++) { + arg_0[i1 + i0*128] = source_0[i1 + i0*128]; + } + } + shmdt(arg_0); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 128; i1++) { + arg_1[i1 + i0*128] = source_1[i1 + i0*128]; + } + } + shmdt(arg_1); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 256; i1++) { + arg_2[i1 + i0*256] = source_2[i1 + i0*256]; + } + } + shmdt(arg_2); + for (size_t i0 = 0; i0 < 1024; i0++) { + for (size_t i1 = 0; i1 < 256; i1++) { + arg_3[i1 + i0*256] = source_3[i1 + i0*256]; + } + } + shmdt(arg_3); +} diff --git a/samples/smith_waterman/sdaccel.mk b/samples/smith_waterman/sdaccel.mk new file mode 100644 index 000000000..ce266d89e --- /dev/null +++ b/samples/smith_waterman/sdaccel.mk @@ -0,0 +1,32 @@ +ifndef XILINX_SDX +$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) +endif +SDA_FLOW = cpu_emu +HOST_SRCS = host.cpp +HOST_EXE_DIR=. +HOST_EXE = host +HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL +HOST_LFLAGS = +KERNEL_SRCS = default_function.cl +KERNEL_NAME = default_function +KERNEL_DEFS = +KERNEL_INCS = +XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 +XDEVICE_REPO_PATH= +KEEP_TEMP=1 +KERNEL_DEBUG= +XCLBIN_NAME=bin_krnl +HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" +BOARD_SETUP_FILE=setup.sh +ifeq (${SDA_FLOW},cpu_emu) + CLCC_OPT += -t sw_emu + XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin +else ifeq (${SDA_FLOW},hw_emu) + CLCC_OPT += -t hw_emu + XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin +else ifeq (${SDA_FLOW},hw) + XCLBIN = ${XCLBIN_NAME}_hw.xclbin +CLCC_OPT += -t hw +endifHOST_ARGS = ${XCLBIN} +COMMON_DIR = ./common +include ${COMMON_DIR}/common.mk diff --git a/samples/smith_waterman/smith_waterman_sdaccel.py b/samples/smith_waterman/smith_waterman_sdaccel.py new file mode 100644 index 000000000..354cac757 --- /dev/null +++ b/samples/smith_waterman/smith_waterman_sdaccel.py @@ -0,0 +1,24 @@ +import heterocl as hcl +import numpy as np +from smith_waterman_main import * + +# f = top("vhls_csim") +f = top("sdaccel_sw_emu") + +# add a very simple test +_seqA_np = np.ones((num, lenA)) +for i in range(0, 4): + _seqA_np[0][i] = 2 +_seqB_np = np.ones((num, lenB)) +_seqA = hcl.asarray(_seqA_np, dtype) +_seqB = hcl.asarray(_seqB_np, dtype) +_consensusA = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) +_consensusB = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) +f(_seqA, _seqB, _consensusA, _consensusB) +_consensusA_np = _consensusA.asnumpy() +_consensusB_np = _consensusB.asnumpy() +for i in range(0, 256): + if i < 124: + assert _consensusA_np[0][i] == 1 + else: + assert _consensusA_np[0][i] == 0 diff --git a/samples/smith_waterman/smith_waterman_vhls.py b/samples/smith_waterman/smith_waterman_vhls.py index 8cec359dd..354cac757 100644 --- a/samples/smith_waterman/smith_waterman_vhls.py +++ b/samples/smith_waterman/smith_waterman_vhls.py @@ -2,7 +2,8 @@ import numpy as np from smith_waterman_main import * -f = top("vhls_csim") +# f = top("vhls_csim") +f = top("sdaccel_sw_emu") # add a very simple test _seqA_np = np.ones((num, lenA)) diff --git a/tvm/src/codegen/hlsc/build_hlsc.cc b/tvm/src/codegen/hlsc/build_hlsc.cc index ce7903553..e373371a2 100644 --- a/tvm/src/codegen/hlsc/build_hlsc.cc +++ b/tvm/src/codegen/hlsc/build_hlsc.cc @@ -24,7 +24,6 @@ runtime::Module BuildVivadoHLSCSim(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); - std::cout << code; return runtime::CreateVivadoHLSModule(funcs[0], code); } diff --git a/tvm/src/codegen/hlsc/vhls_module.cc b/tvm/src/codegen/hlsc/vhls_module.cc index fd28234db..c5f004a93 100644 --- a/tvm/src/codegen/hlsc/vhls_module.cc +++ b/tvm/src/codegen/hlsc/vhls_module.cc @@ -345,9 +345,9 @@ class VivadoHLSModuleNode final : public ModuleNode { GenHostCode(args, shmids, arg_types, func_, test_file_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; - // system("g++ main.cpp -o out"); + system("g++ main.cpp -o out"); LOG(CLEAN) << "Running C simulation ..."; - // system("./out"); + system("./out"); LOG(CLEAN) << "Finished C simulation"; // system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); diff --git a/tvm/src/codegen/opencl/sdaccel.mk b/tvm/src/codegen/opencl/sdaccel.mk deleted file mode 100755 index 7c361cf6b..000000000 --- a/tvm/src/codegen/opencl/sdaccel.mk +++ /dev/null @@ -1,84 +0,0 @@ -#******************************************************************************* -#Vendor: Xilinx -#Associated Filename: sdaccel.mk -#Purpose: Makefile exmaple for SDAccel Compilation -# -#******************************************************************************* -#Copyright (C) 2015-2016 XILINX, Inc. -# -#This file contains confidential and proprietary information of Xilinx, Inc. and -#is protected under U.S. and international copyright and other intellectual -#property laws. -# -#DISCLAIMER -#This disclaimer is not a license and does not grant any rights to the materials -#distributed herewith. Except as otherwise provided in a valid license issued to -#you by Xilinx, and to the maximum extent permitted by applicable law: -#(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX -#HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, -#INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR -#FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether -#in contract or tort, including negligence, or under any other theory of -#liability) for any loss or damage of any kind or nature related to, arising under -#or in connection with these materials, including for any direct, or any indirect, -#special, incidental, or consequential loss or damage (including loss of data, -#profits, goodwill, or any type of loss or damage suffered as a result of any -#action brought by a third party) even if such damage or loss was reasonably -#foreseeable or Xilinx had been advised of the possibility of the same. -# -#CRITICAL APPLICATIONS -#Xilinx products are not designed or intended to be fail-safe, or for use in any -#application requiring fail-safe performance, such as life-support or safety -#devices or systems, Class III medical devices, nuclear facilities, applications -#related to the deployment of airbags, or any other applications that could lead -#to death, personal injury, or severe property or environmental damage -#(individually and collectively, "Critical Applications"). Customer assumes the -#sole risk and liability of any use of Xilinx products in Critical Applications, -#subject only to applicable laws and regulations governing limitations on product -#liability. -# -#THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT -#ALL TIMES. -# -#****************************************************************************** -ifndef XILINX_SDX -$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) -endif - -SDA_FLOW = cpu_emu -HOST_SRCS = vadd.cpp -HOST_EXE_DIR=. -HOST_EXE = vadd -HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL -HOST_LFLAGS = - -KERNEL_SRCS = default_function.cl -KERNEL_NAME = default_function -KERNEL_DEFS = -KERNEL_INCS = -#set target device for XCLBIN -XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 -XDEVICE_REPO_PATH= -KEEP_TEMP=1 -KERNEL_DEBUG= -XCLBIN_NAME=bin_vadd -HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" -#BOARD_SETUP_FILE needs to point to setup.sh generated by xbinst command -BOARD_SETUP_FILE=setup.sh - -ifeq (${SDA_FLOW},cpu_emu) - CLCC_OPT += -t sw_emu - XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin -else ifeq (${SDA_FLOW},hw_emu) - CLCC_OPT += -t hw_emu - XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin -else ifeq (${SDA_FLOW},hw) - XCLBIN = ${XCLBIN_NAME}_hw.xclbin - CLCC_OPT += -t hw -endif - -HOST_ARGS = ${XCLBIN} - -COMMON_DIR = ./common -include ${COMMON_DIR}/common.mk - diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 022caec86..3e6f77d63 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -448,7 +448,10 @@ void GenHostCode(TVMArgs& args, stream << "#include \n"; stream << "#include \n"; stream << "#include \n"; - stream << "#include \n"; + // stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; stream << "#pragma once\n"; stream << "\n\n"; @@ -654,71 +657,6 @@ void GenHostCode(TVMArgs& args, - - // Print the type of each - // for (int i = 0;i < args.size();i++) { - // PrintIndent(stream, indent); - // // stream << Type2Str(arg_types[i]) << " "; - // stream << arg_types[i] << " "; - // stream << "arg_" << i; - // TVMArray* arr = args[i]; - // for (int j = 0;j < arr->ndim;j++) { - // stream << "[" << arr->shape[j] << "]"; - // } - // stream << ";\n"; - // } - - - - - - - - - // for (int i = 0; i < args.size(); i++) { - // if (args[i].type_code() == kArrayHandle) { - // // read from the shared memory - // PrintIndent(stream, indent); - // stream << Type2Byte(arg_types[i]) << "* "; - // stream << "arg_" << i << " = "; - // stream << "(" << Type2Byte(arg_types[i]) << "*)"; - // stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - // PrintIndent(stream, indent); - // stream << Type2Str(arg_types[i]) << " "; - // stream << "arg_top_" << i; - // TVMArray* arr = args[i]; - // for (int j = 0; j < arr->ndim; j++) - // stream << "[" << arr->shape[j] << "]"; - // stream << ";\n"; - // // copy from shared mem - // PrintCopy(arr, stream, indent, i); - // } else { - // // directly assign the value to the variable - // PrintIndent(stream, indent); - // stream << Type2Byte(arg_types[i]) << " "; - // stream << "arg_" << i << " = "; - // stream << "(" << Type2Byte(arg_types[i]) << ")"; - // if (args[i].type_code() == kDLInt || - // args[i].type_code() == kDLUInt) { - // stream << int64_t(args[i]); - // } - // stream << ";\n"; - // PrintIndent(stream, indent); - // stream << Type2Str(arg_types[i]) << " "; - // stream << "arg_top_" << i; - // stream << " = ("; - // stream << Type2ExtStr(arg_types[i]); - // stream << ")(arg_" << i << ")"; - // if (arg_types[i].fracs > 0) - // stream << " >> " << static_cast(arg_types[i].fracs); - // stream << ";\n"; - // } - // } - - - - - // copy to shared mem for (int i = 0;i < args.size();i++) { if (args[i].type_code() == kArrayHandle) { @@ -726,7 +664,7 @@ void GenHostCode(TVMArgs& args, PrintCopyBack(arr, stream, indent, i); PrintIndent(stream, indent); stream << "shmdt("; - stream << "source_" << i << ");\n"; + stream << "arg_" << i << ");\n"; } } From f405ec361296230a6d8717325b4709aab81db14d Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 5 Sep 2019 16:37:07 -0400 Subject: [PATCH 067/103] fixed the bug of host.cpp multiple --- samples/lenet/common/common.mk | 55 +++++ samples/lenet/lenet_sdaccel.py | 23 +++ samples/lenet/main.cpp | 193 ++++++++++++++++++ samples/lenet/sdaccel.mk | 32 +++ samples/smith_waterman/main.cpp | 8 +- samples/smith_waterman/smith_waterman_vhls.py | 3 +- tvm/src/codegen/opencl/sdaccel_module.cc | 7 +- 7 files changed, 312 insertions(+), 9 deletions(-) create mode 100644 samples/lenet/common/common.mk create mode 100644 samples/lenet/lenet_sdaccel.py create mode 100644 samples/lenet/main.cpp create mode 100644 samples/lenet/sdaccel.mk diff --git a/samples/lenet/common/common.mk b/samples/lenet/common/common.mk new file mode 100644 index 000000000..3409e4aa5 --- /dev/null +++ b/samples/lenet/common/common.mk @@ -0,0 +1,55 @@ +SHELL = /bin/bash +VPATH = ./ +CC = xcpp +CLCC = xocc +ifeq ($(XDEVICE_REPO_PATH),) + DEVICE_REPO_OPT = +else +DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH} +endif +HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2 +HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread +CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS} +ifeq (${KEEP_TEMP},1) + CLCC_OPT += -s +endif +ifeq (${KERNEL_DEBUG},1) + CLCC_OPT += -g +endif +CLCC_OPT += --kernel ${KERNEL_NAME} +OBJECTS := $(HOST_SRCS:.cpp=.o) +.PHONY: all +all: run +host: ${HOST_EXE_DIR}/${HOST_EXE} +xbin_cpu_em: + make SDA_FLOW=cpu_emu xbin -f sdaccel.mk +xbin_hw_em: + make SDA_FLOW=hw_emu xbin -f sdaccel.mk +xbin_hw : + make SDA_FLOW=hw xbin -f sdaccel.mk +xbin: ${XCLBIN} +run_cpu_em: + make SDA_FLOW=cpu_emu run_em -f sdaccel.mk +run_hw_em: + make SDA_FLOW=hw_emu run_em -f sdaccel.mk +run_hw : + make SDA_FLOW=hw run_hw_int -f sdaccel.mk +run_em: xconfig host xbin + XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} +run_hw_int : host xbin_hw + source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS} +estimate : + ${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS} +xconfig : emconfig.json +emconfig.json : + emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od . +${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS} + ${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@ +${XCLBIN}: + ${CLCC} ${CLCC_OPT} ${KERNEL_SRCS} +%.o: %.cpp + ${CC} ${HOST_CFLAGS} -c $< -o $@ +clean: + ${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil +cleanall: clean + ${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou diff --git a/samples/lenet/lenet_sdaccel.py b/samples/lenet/lenet_sdaccel.py new file mode 100644 index 000000000..917b2b625 --- /dev/null +++ b/samples/lenet/lenet_sdaccel.py @@ -0,0 +1,23 @@ +import heterocl as hcl +import numpy as np +from lenet_main import * + +batch_size = 50 + +# f = build_lenet_inf(batch_size, 'vhls_csim') +f = build_lenet_inf(batch_size, 'sdaccel_sw_emu') + +mnist = mx.test_utils.get_mnist() +correct_sum = 0 + +for i in range(50 // batch_size): + label = mnist['test_label'][i*batch_size:(i+1)*batch_size] + input_image_np = mnist['test_data'][i*batch_size:(i+1)*batch_size] + input_image_hcl = hcl.asarray(input_image_np) + output_hcl = hcl.asarray(np.zeros((batch_size,10))) + f(input_image_hcl, weight_conv1_hcl, weight_conv2_hcl, weight_fc1_hcl, weight_fc2_hcl, output_hcl) + prediction = np.argmax(output_hcl.asnumpy(), axis=1) + correct_sum += np.sum(np.equal(prediction, label)) + +print(str(qtype1) + ", " + str(qtype2) + ": Accuracy over 10000 test images is: {}".format(correct_sum / 10000.)) +assert correct_sum == 9882 diff --git a/samples/lenet/main.cpp b/samples/lenet/main.cpp new file mode 100644 index 000000000..cdf254a18 --- /dev/null +++ b/samples/lenet/main.cpp @@ -0,0 +1,193 @@ +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#pragma once + + + + +int main(void) { +#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) + #define STR_VALUE(arg) #arg + #define GET_STRING(name) STR_VALUE(name) + #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) +#endif + char* xclbinFilename = argv[1]; + + std::vector source_0(50 * 1 * 28 * 28); + std::vector source_1(20 * 1 * 5 * 5); + std::vector source_2(50 * 20 * 5 * 5); + std::vector source_3(500 * 800); + std::vector source_4(10 * 500); + std::vector source_5(50 * 10); + + size_t vector_size_bytes_0 = sizeof(float) * 50 * 1 * 28 * 28; + size_t vector_size_bytes_1 = sizeof(int) * 20 * 1 * 5 * 5; + size_t vector_size_bytes_2 = sizeof(int) * 50 * 20 * 5 * 5; + size_t vector_size_bytes_3 = sizeof(int) * 500 * 800; + size_t vector_size_bytes_4 = sizeof(int) * 10 * 500; + size_t vector_size_bytes_5 = sizeof(float) * 50 * 10; + + float* arg_0 = (float*)shmat(2949125, nullptr, 0); + for (size_t i0 = 0; i0 < 50; i0++) { + for (size_t i1 = 0; i1 < 1; i1++) { + for (size_t i2 = 0; i2 < 28; i2++) { + for (size_t i3 = 0; i3 < 28; i3++) { + source_0[i3 + i2*28 + i1*784 + i0*784] = arg_0[i3 + i2*28 + i1*784 + i0*784]; + } + } + } + } + int* arg_1 = (int*)shmat(3473408, nullptr, 0); + for (size_t i0 = 0; i0 < 20; i0++) { + for (size_t i1 = 0; i1 < 1; i1++) { + for (size_t i2 = 0; i2 < 5; i2++) { + for (size_t i3 = 0; i3 < 5; i3++) { + source_1[i3 + i2*5 + i1*25 + i0*25] = arg_1[i3 + i2*5 + i1*25 + i0*25] >> 14; + } + } + } + } + int* arg_2 = (int*)shmat(3473409, nullptr, 0); + for (size_t i0 = 0; i0 < 50; i0++) { + for (size_t i1 = 0; i1 < 20; i1++) { + for (size_t i2 = 0; i2 < 5; i2++) { + for (size_t i3 = 0; i3 < 5; i3++) { + source_2[i3 + i2*5 + i1*25 + i0*500] = arg_2[i3 + i2*5 + i1*25 + i0*500] >> 14; + } + } + } + } + int* arg_3 = (int*)shmat(2097154, nullptr, 0); + for (size_t i0 = 0; i0 < 500; i0++) { + for (size_t i1 = 0; i1 < 800; i1++) { + source_3[i1 + i0*800] = arg_3[i1 + i0*800] >> 14; + } + } + int* arg_4 = (int*)shmat(1835011, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 500; i1++) { + source_4[i1 + i0*500] = arg_4[i1 + i0*500] >> 14; + } + } + float* arg_5 = (float*)shmat(1703940, nullptr, 0); + for (size_t i0 = 0; i0 < 50; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_5[i1 + i0*10] = arg_5[i1 + i0*10]; + } + } + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform platform = platforms[0]; + + std::vector devices; + platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices); + cl::Device device = devices[0]; + + cl::Context context(device); + cl::CommandQueue q(context, device); + + std::ifstream bin_file(xclbinFilename, std::ifstream::binary); + bin_file.seekg (0, bin_file.end); + unsigned nb = bin_file.tellg(); + bin_file.seekg (0, bin_file.beg); + char *buf = new char [nb]; + bin_file.read(buf, nb); + + cl::Program::Binaries bins; + bins.push_back({buf,nb}); + devices.resize(1); + cl::Program program(context, devices, bins); + + int err1; + cl::Kernel kernel(program, "default_function", &err1); + auto default_function = cl::KernelFunctor(kernel); + + cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); + cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); + cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2); + cl::Buffer buffer_3(context, CL_MEM_READ_WRITE, vector_size_bytes_3); + cl::Buffer buffer_4(context, CL_MEM_READ_WRITE, vector_size_bytes_4); + cl::Buffer buffer_5(context, CL_MEM_READ_WRITE, vector_size_bytes_5); + + q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); + q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); + q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); + q.enqueueWriteBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); + q.enqueueWriteBuffer(buffer_4, CL_TRUE, 0, vector_size_bytes_4, source_4.data()); + q.enqueueWriteBuffer(buffer_5, CL_TRUE, 0, vector_size_bytes_5, source_5.data()); + + default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2, buffer_3, buffer_4, buffer_5); + q.finish(); + + q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); + q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); + q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); + q.enqueueReadBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); + q.enqueueReadBuffer(buffer_4, CL_TRUE, 0, vector_size_bytes_4, source_4.data()); + q.enqueueReadBuffer(buffer_5, CL_TRUE, 0, vector_size_bytes_5, source_5.data()); + + for (size_t i0 = 0; i0 < 50; i0++) { + for (size_t i1 = 0; i1 < 1; i1++) { + for (size_t i2 = 0; i2 < 28; i2++) { + for (size_t i3 = 0; i3 < 28; i3++) { + arg_0[i3 + i2*28 + i1*784 + i0*784] = source_0[i3 + i2*28 + i1*784 + i0*784]; + } + } + } + } + shmdt(arg_0); + for (size_t i0 = 0; i0 < 20; i0++) { + for (size_t i1 = 0; i1 < 1; i1++) { + for (size_t i2 = 0; i2 < 5; i2++) { + for (size_t i3 = 0; i3 < 5; i3++) { + arg_1[i3 + i2*5 + i1*25 + i0*25] = source_1[i3 + i2*5 + i1*25 + i0*25] << 14; + } + } + } + } + shmdt(arg_1); + for (size_t i0 = 0; i0 < 50; i0++) { + for (size_t i1 = 0; i1 < 20; i1++) { + for (size_t i2 = 0; i2 < 5; i2++) { + for (size_t i3 = 0; i3 < 5; i3++) { + arg_2[i3 + i2*5 + i1*25 + i0*500] = source_2[i3 + i2*5 + i1*25 + i0*500] << 14; + } + } + } + } + shmdt(arg_2); + for (size_t i0 = 0; i0 < 500; i0++) { + for (size_t i1 = 0; i1 < 800; i1++) { + arg_3[i1 + i0*800] = source_3[i1 + i0*800] << 14; + } + } + shmdt(arg_3); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 500; i1++) { + arg_4[i1 + i0*500] = source_4[i1 + i0*500] << 14; + } + } + shmdt(arg_4); + for (size_t i0 = 0; i0 < 50; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_5[i1 + i0*10] = source_5[i1 + i0*10]; + } + } + shmdt(arg_5); +} diff --git a/samples/lenet/sdaccel.mk b/samples/lenet/sdaccel.mk new file mode 100644 index 000000000..ce266d89e --- /dev/null +++ b/samples/lenet/sdaccel.mk @@ -0,0 +1,32 @@ +ifndef XILINX_SDX +$(error Environment variable XILINX_SDX is required and should point to SDAccel install area) +endif +SDA_FLOW = cpu_emu +HOST_SRCS = host.cpp +HOST_EXE_DIR=. +HOST_EXE = host +HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL +HOST_LFLAGS = +KERNEL_SRCS = default_function.cl +KERNEL_NAME = default_function +KERNEL_DEFS = +KERNEL_INCS = +XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0 +XDEVICE_REPO_PATH= +KEEP_TEMP=1 +KERNEL_DEBUG= +XCLBIN_NAME=bin_krnl +HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\" +BOARD_SETUP_FILE=setup.sh +ifeq (${SDA_FLOW},cpu_emu) + CLCC_OPT += -t sw_emu + XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin +else ifeq (${SDA_FLOW},hw_emu) + CLCC_OPT += -t hw_emu + XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin +else ifeq (${SDA_FLOW},hw) + XCLBIN = ${XCLBIN_NAME}_hw.xclbin +CLCC_OPT += -t hw +endifHOST_ARGS = ${XCLBIN} +COMMON_DIR = ./common +include ${COMMON_DIR}/common.mk diff --git a/samples/smith_waterman/main.cpp b/samples/smith_waterman/main.cpp index f9c52809b..40bf1057a 100644 --- a/samples/smith_waterman/main.cpp +++ b/samples/smith_waterman/main.cpp @@ -39,25 +39,25 @@ int main(void) { size_t vector_size_bytes_2 = sizeof(unsigned int) * 1024 * 256; size_t vector_size_bytes_3 = sizeof(unsigned int) * 1024 * 256; - unsigned int* arg_0 = (unsigned int*)shmat(1507336, nullptr, 0); + unsigned int* arg_0 = (unsigned int*)shmat(1966082, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 128; i1++) { source_0[i1 + i0*128] = arg_0[i1 + i0*128]; } } - unsigned int* arg_1 = (unsigned int*)shmat(3145728, nullptr, 0); + unsigned int* arg_1 = (unsigned int*)shmat(3342336, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 128; i1++) { source_1[i1 + i0*128] = arg_1[i1 + i0*128]; } } - unsigned int* arg_2 = (unsigned int*)shmat(3145729, nullptr, 0); + unsigned int* arg_2 = (unsigned int*)shmat(3342337, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 256; i1++) { source_2[i1 + i0*256] = arg_2[i1 + i0*256]; } } - unsigned int* arg_3 = (unsigned int*)shmat(1769474, nullptr, 0); + unsigned int* arg_3 = (unsigned int*)shmat(1703939, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 256; i1++) { source_3[i1 + i0*256] = arg_3[i1 + i0*256]; diff --git a/samples/smith_waterman/smith_waterman_vhls.py b/samples/smith_waterman/smith_waterman_vhls.py index 354cac757..8cec359dd 100644 --- a/samples/smith_waterman/smith_waterman_vhls.py +++ b/samples/smith_waterman/smith_waterman_vhls.py @@ -2,8 +2,7 @@ import numpy as np from smith_waterman_main import * -# f = top("vhls_csim") -f = top("sdaccel_sw_emu") +f = top("vhls_csim") # add a very simple test _seqA_np = np.ones((num, lenA)) diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 3e6f77d63..2463c1c7d 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -493,10 +493,11 @@ void GenHostCode(TVMArgs& args, stream << "source_" << i << "("; TVMArray* arr = args[i]; for (int j = 0;j < arr->ndim;j++) { - if (j == 0) { - stream << arr->shape[j]; + if (j == arr->ndim-1) { + stream << arr->shape[j] << ")"; } else { - stream << " * " << arr->shape[j] << ")"; + // stream << " * " << arr->shape[j] << ")"; + stream << arr->shape[j] << " * "; } } stream << ";\n"; From edfa9ce7a8d59862711700e9837e6432ac119329 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 5 Sep 2019 16:55:32 -0400 Subject: [PATCH 068/103] fixed host.cpp multiple bug --- samples/gemm/main.cpp | 58 ++++++++++++------------ samples/smith_waterman/main.cpp | 8 ++-- tvm/src/codegen/opencl/sdaccel_module.cc | 1 + 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/samples/gemm/main.cpp b/samples/gemm/main.cpp index 9e09fbc1d..e3099966c 100644 --- a/samples/gemm/main.cpp +++ b/samples/gemm/main.cpp @@ -29,30 +29,30 @@ int main(void) { #endif char* xclbinFilename = argv[1]; - std::vector source_0(3 * 3); - std::vector source_1(3 * 3); - std::vector source_2(3 * 3); - - size_t vector_size_bytes_0 = sizeof(int) * 3 * 3; - size_t vector_size_bytes_1 = sizeof(int) * 3 * 3; - size_t vector_size_bytes_2 = sizeof(int) * 3 * 3; - - int* arg_0 = (int*)shmat(1441798, nullptr, 0); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - source_0[i1 + i0*3] = arg_0[i1 + i0*3]; + std::vector source_0(10 * 10); + std::vector source_1(10 * 10); + std::vector source_2(10 * 10); + + size_t vector_size_bytes_0 = sizeof(int) * 10 * 10; + size_t vector_size_bytes_1 = sizeof(int) * 10 * 10; + size_t vector_size_bytes_2 = sizeof(int) * 10 * 10; + + int* arg_0 = (int*)shmat(1900547, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_0[i1 + i0*10] = arg_0[i1 + i0*10]; } } - int* arg_1 = (int*)shmat(1441799, nullptr, 0); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - source_1[i1 + i0*3] = arg_1[i1 + i0*3]; + int* arg_1 = (int*)shmat(1835012, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_1[i1 + i0*10] = arg_1[i1 + i0*10]; } } - int* arg_2 = (int*)shmat(1441800, nullptr, 0); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - source_2[i1 + i0*3] = arg_2[i1 + i0*3]; + int* arg_2 = (int*)shmat(3014661, nullptr, 0); + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + source_2[i1 + i0*10] = arg_2[i1 + i0*10]; } } std::vector platforms; @@ -97,21 +97,21 @@ int main(void) { q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - arg_0[i1 + i0*3] = source_0[i1 + i0*3]; + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_0[i1 + i0*10] = source_0[i1 + i0*10]; } } shmdt(arg_0); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - arg_1[i1 + i0*3] = source_1[i1 + i0*3]; + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_1[i1 + i0*10] = source_1[i1 + i0*10]; } } shmdt(arg_1); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - arg_2[i1 + i0*3] = source_2[i1 + i0*3]; + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 10; i1++) { + arg_2[i1 + i0*10] = source_2[i1 + i0*10]; } } shmdt(arg_2); diff --git a/samples/smith_waterman/main.cpp b/samples/smith_waterman/main.cpp index 40bf1057a..851a98bf7 100644 --- a/samples/smith_waterman/main.cpp +++ b/samples/smith_waterman/main.cpp @@ -39,25 +39,25 @@ int main(void) { size_t vector_size_bytes_2 = sizeof(unsigned int) * 1024 * 256; size_t vector_size_bytes_3 = sizeof(unsigned int) * 1024 * 256; - unsigned int* arg_0 = (unsigned int*)shmat(1966082, nullptr, 0); + unsigned int* arg_0 = (unsigned int*)shmat(1769476, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 128; i1++) { source_0[i1 + i0*128] = arg_0[i1 + i0*128]; } } - unsigned int* arg_1 = (unsigned int*)shmat(3342336, nullptr, 0); + unsigned int* arg_1 = (unsigned int*)shmat(3538944, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 128; i1++) { source_1[i1 + i0*128] = arg_1[i1 + i0*128]; } } - unsigned int* arg_2 = (unsigned int*)shmat(3342337, nullptr, 0); + unsigned int* arg_2 = (unsigned int*)shmat(3538945, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 256; i1++) { source_2[i1 + i0*256] = arg_2[i1 + i0*256]; } } - unsigned int* arg_3 = (unsigned int*)shmat(1703939, nullptr, 0); + unsigned int* arg_3 = (unsigned int*)shmat(2162690, nullptr, 0); for (size_t i0 = 0; i0 < 1024; i0++) { for (size_t i1 = 0; i1 < 256; i1++) { source_3[i1 + i0*256] = arg_3[i1 + i0*256]; diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 2463c1c7d..2e27e993d 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -369,6 +369,7 @@ void GenCommonFile() { stream << "OBJECTS := $(HOST_SRCS:.cpp=.o)\n"; stream << ".PHONY: all\n"; stream << "all: run\n"; + stream << "host: ${HOST_EXE_DIR}/${HOST_EXE}\n"; stream << "xbin_cpu_em:\n"; PrintIndent(stream, indent); From 4bb58fa090a726f9d6372c04ecbb551c2c22b551 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Thu, 5 Sep 2019 19:16:20 -0400 Subject: [PATCH 069/103] fixed endif for makefile --- samples/gemm/main.cpp | 6 +- samples/gemm/sdaccel.mk | 3 +- samples/lenet/lenet_main_withoutq.py | 4 +- samples/lenet/main.cpp | 193 ----------------------- samples/lenet/merlinc_code.cl | 68 ++++---- samples/lenet/sdaccel_code.cl | 68 ++++---- samples/lenet/vhls_code.cl | 68 ++++---- tvm/src/codegen/opencl/sdaccel_module.cc | 4 +- 8 files changed, 111 insertions(+), 303 deletions(-) delete mode 100644 samples/lenet/main.cpp diff --git a/samples/gemm/main.cpp b/samples/gemm/main.cpp index e3099966c..d03fe2548 100644 --- a/samples/gemm/main.cpp +++ b/samples/gemm/main.cpp @@ -37,19 +37,19 @@ int main(void) { size_t vector_size_bytes_1 = sizeof(int) * 10 * 10; size_t vector_size_bytes_2 = sizeof(int) * 10 * 10; - int* arg_0 = (int*)shmat(1900547, nullptr, 0); + int* arg_0 = (int*)shmat(3866625, nullptr, 0); for (size_t i0 = 0; i0 < 10; i0++) { for (size_t i1 = 0; i1 < 10; i1++) { source_0[i1 + i0*10] = arg_0[i1 + i0*10]; } } - int* arg_1 = (int*)shmat(1835012, nullptr, 0); + int* arg_1 = (int*)shmat(3866624, nullptr, 0); for (size_t i0 = 0; i0 < 10; i0++) { for (size_t i1 = 0; i1 < 10; i1++) { source_1[i1 + i0*10] = arg_1[i1 + i0*10]; } } - int* arg_2 = (int*)shmat(3014661, nullptr, 0); + int* arg_2 = (int*)shmat(3276805, nullptr, 0); for (size_t i0 = 0; i0 < 10; i0++) { for (size_t i1 = 0; i1 < 10; i1++) { source_2[i1 + i0*10] = arg_2[i1 + i0*10]; diff --git a/samples/gemm/sdaccel.mk b/samples/gemm/sdaccel.mk index ce266d89e..9cf0dafd7 100644 --- a/samples/gemm/sdaccel.mk +++ b/samples/gemm/sdaccel.mk @@ -27,6 +27,7 @@ else ifeq (${SDA_FLOW},hw_emu) else ifeq (${SDA_FLOW},hw) XCLBIN = ${XCLBIN_NAME}_hw.xclbin CLCC_OPT += -t hw -endifHOST_ARGS = ${XCLBIN} +endif +HOST_ARGS = ${XCLBIN} COMMON_DIR = ./common include ${COMMON_DIR}/common.mk diff --git a/samples/lenet/lenet_main_withoutq.py b/samples/lenet/lenet_main_withoutq.py index f20ca8c94..205c01e39 100644 --- a/samples/lenet/lenet_main_withoutq.py +++ b/samples/lenet/lenet_main_withoutq.py @@ -2,7 +2,7 @@ import hlib import numpy as np -hcl.init(hcl.Float()) +hcl.init() def softmax(out, x): assert len(x.shape) == 2, "only support 2-dim softmax" @@ -97,7 +97,7 @@ def build_lenet_inf(batch_size=batch_size, target=None): with open('vhls_code.cl', 'w') as f: f.write(code3) - +f = build_lenet_inf(batch_size, 'sdaccel_sw_emu') # weight_conv1_hcl = hcl.asarray(weight_conv1_np, dtype=qtype1) # weight_conv2_hcl = hcl.asarray(weight_conv2_np, dtype=qtype1) diff --git a/samples/lenet/main.cpp b/samples/lenet/main.cpp deleted file mode 100644 index cdf254a18..000000000 --- a/samples/lenet/main.cpp +++ /dev/null @@ -1,193 +0,0 @@ -#define CL_HPP_CL_1_2_DEFAULT_BUILD -#define CL_HPP_TARGET_OPENCL_VERSION 120 -#define CL_HPP_MINIMUM_OPENCL_VERSION 120 -#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#pragma once - - - - -int main(void) { -#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) - #define STR_VALUE(arg) #arg - #define GET_STRING(name) STR_VALUE(name) - #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) -#endif - char* xclbinFilename = argv[1]; - - std::vector source_0(50 * 1 * 28 * 28); - std::vector source_1(20 * 1 * 5 * 5); - std::vector source_2(50 * 20 * 5 * 5); - std::vector source_3(500 * 800); - std::vector source_4(10 * 500); - std::vector source_5(50 * 10); - - size_t vector_size_bytes_0 = sizeof(float) * 50 * 1 * 28 * 28; - size_t vector_size_bytes_1 = sizeof(int) * 20 * 1 * 5 * 5; - size_t vector_size_bytes_2 = sizeof(int) * 50 * 20 * 5 * 5; - size_t vector_size_bytes_3 = sizeof(int) * 500 * 800; - size_t vector_size_bytes_4 = sizeof(int) * 10 * 500; - size_t vector_size_bytes_5 = sizeof(float) * 50 * 10; - - float* arg_0 = (float*)shmat(2949125, nullptr, 0); - for (size_t i0 = 0; i0 < 50; i0++) { - for (size_t i1 = 0; i1 < 1; i1++) { - for (size_t i2 = 0; i2 < 28; i2++) { - for (size_t i3 = 0; i3 < 28; i3++) { - source_0[i3 + i2*28 + i1*784 + i0*784] = arg_0[i3 + i2*28 + i1*784 + i0*784]; - } - } - } - } - int* arg_1 = (int*)shmat(3473408, nullptr, 0); - for (size_t i0 = 0; i0 < 20; i0++) { - for (size_t i1 = 0; i1 < 1; i1++) { - for (size_t i2 = 0; i2 < 5; i2++) { - for (size_t i3 = 0; i3 < 5; i3++) { - source_1[i3 + i2*5 + i1*25 + i0*25] = arg_1[i3 + i2*5 + i1*25 + i0*25] >> 14; - } - } - } - } - int* arg_2 = (int*)shmat(3473409, nullptr, 0); - for (size_t i0 = 0; i0 < 50; i0++) { - for (size_t i1 = 0; i1 < 20; i1++) { - for (size_t i2 = 0; i2 < 5; i2++) { - for (size_t i3 = 0; i3 < 5; i3++) { - source_2[i3 + i2*5 + i1*25 + i0*500] = arg_2[i3 + i2*5 + i1*25 + i0*500] >> 14; - } - } - } - } - int* arg_3 = (int*)shmat(2097154, nullptr, 0); - for (size_t i0 = 0; i0 < 500; i0++) { - for (size_t i1 = 0; i1 < 800; i1++) { - source_3[i1 + i0*800] = arg_3[i1 + i0*800] >> 14; - } - } - int* arg_4 = (int*)shmat(1835011, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 500; i1++) { - source_4[i1 + i0*500] = arg_4[i1 + i0*500] >> 14; - } - } - float* arg_5 = (float*)shmat(1703940, nullptr, 0); - for (size_t i0 = 0; i0 < 50; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_5[i1 + i0*10] = arg_5[i1 + i0*10]; - } - } - std::vector platforms; - cl::Platform::get(&platforms); - cl::Platform platform = platforms[0]; - - std::vector devices; - platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices); - cl::Device device = devices[0]; - - cl::Context context(device); - cl::CommandQueue q(context, device); - - std::ifstream bin_file(xclbinFilename, std::ifstream::binary); - bin_file.seekg (0, bin_file.end); - unsigned nb = bin_file.tellg(); - bin_file.seekg (0, bin_file.beg); - char *buf = new char [nb]; - bin_file.read(buf, nb); - - cl::Program::Binaries bins; - bins.push_back({buf,nb}); - devices.resize(1); - cl::Program program(context, devices, bins); - - int err1; - cl::Kernel kernel(program, "default_function", &err1); - auto default_function = cl::KernelFunctor(kernel); - - cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0); - cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1); - cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2); - cl::Buffer buffer_3(context, CL_MEM_READ_WRITE, vector_size_bytes_3); - cl::Buffer buffer_4(context, CL_MEM_READ_WRITE, vector_size_bytes_4); - cl::Buffer buffer_5(context, CL_MEM_READ_WRITE, vector_size_bytes_5); - - q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); - q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); - q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - q.enqueueWriteBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); - q.enqueueWriteBuffer(buffer_4, CL_TRUE, 0, vector_size_bytes_4, source_4.data()); - q.enqueueWriteBuffer(buffer_5, CL_TRUE, 0, vector_size_bytes_5, source_5.data()); - - default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2, buffer_3, buffer_4, buffer_5); - q.finish(); - - q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data()); - q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); - q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - q.enqueueReadBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data()); - q.enqueueReadBuffer(buffer_4, CL_TRUE, 0, vector_size_bytes_4, source_4.data()); - q.enqueueReadBuffer(buffer_5, CL_TRUE, 0, vector_size_bytes_5, source_5.data()); - - for (size_t i0 = 0; i0 < 50; i0++) { - for (size_t i1 = 0; i1 < 1; i1++) { - for (size_t i2 = 0; i2 < 28; i2++) { - for (size_t i3 = 0; i3 < 28; i3++) { - arg_0[i3 + i2*28 + i1*784 + i0*784] = source_0[i3 + i2*28 + i1*784 + i0*784]; - } - } - } - } - shmdt(arg_0); - for (size_t i0 = 0; i0 < 20; i0++) { - for (size_t i1 = 0; i1 < 1; i1++) { - for (size_t i2 = 0; i2 < 5; i2++) { - for (size_t i3 = 0; i3 < 5; i3++) { - arg_1[i3 + i2*5 + i1*25 + i0*25] = source_1[i3 + i2*5 + i1*25 + i0*25] << 14; - } - } - } - } - shmdt(arg_1); - for (size_t i0 = 0; i0 < 50; i0++) { - for (size_t i1 = 0; i1 < 20; i1++) { - for (size_t i2 = 0; i2 < 5; i2++) { - for (size_t i3 = 0; i3 < 5; i3++) { - arg_2[i3 + i2*5 + i1*25 + i0*500] = source_2[i3 + i2*5 + i1*25 + i0*500] << 14; - } - } - } - } - shmdt(arg_2); - for (size_t i0 = 0; i0 < 500; i0++) { - for (size_t i1 = 0; i1 < 800; i1++) { - arg_3[i1 + i0*800] = source_3[i1 + i0*800] << 14; - } - } - shmdt(arg_3); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 500; i1++) { - arg_4[i1 + i0*500] = source_4[i1 + i0*500] << 14; - } - } - shmdt(arg_4); - for (size_t i0 = 0; i0 < 50; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_5[i1 + i0*10] = source_5[i1 + i0*10]; - } - } - shmdt(arg_5); -} diff --git a/samples/lenet/merlinc_code.cl b/samples/lenet/merlinc_code.cl index e8a24c836..1c5118707 100644 --- a/samples/lenet/merlinc_code.cl +++ b/samples/lenet/merlinc_code.cl @@ -2,8 +2,8 @@ #include #include #pragma ACCEL kernel -void default_function(float* input_image, float* weight_conv1, float* weight_conv2, float* weight_fc1, float* weight_fc2, float* lenet) { - float conv2d[11520000]; +void default_function(int* input_image, int* weight_conv1, int* weight_conv2, int* weight_fc1, int* weight_fc2, int* lenet) { + int conv2d[11520000]; for (int nn = 0; nn < 1000; ++nn) { for (int ff = 0; ff < 20; ++ff) { for (int yy = 0; yy < 24; ++yy) { @@ -12,25 +12,25 @@ void default_function(float* input_image, float* weight_conv1, float* weight_con reducer0 = 0.000000e+00f; for (int ra1 = 0; ra1 < 5; ++ra1) { for (int ra2 = 0; ra2 < 5; ++ra2) { - reducer0 = ((input_image[(((xx + ra2) + ((yy + ra1) * 28)) + (nn * 784))] * weight_conv1[((ra2 + (ra1 * 5)) + (ff * 25))]) + reducer0); + reducer0 = (((float)(((long)input_image[(((xx + ra2) + ((yy + ra1) * 28)) + (nn * 784))]) * ((long)weight_conv1[((ra2 + (ra1 * 5)) + (ff * 25))]))) + reducer0); } } - conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = reducer0; + conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = ((int)reducer0); } } } } - float tanh1[11520000]; + int tanh1[11520000]; for (int args = 0; args < 1000; ++args) { for (int args0 = 0; args0 < 20; ++args0) { for (int args1 = 0; args1 < 24; ++args1) { for (int args2 = 0; args2 < 24; ++args2) { - tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((float)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); + tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((int)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); } } } } - float max_pool[2880000]; + int max_pool[2880000]; for (int i = 0; i < 1000; ++i) { for (int c = 0; c < 20; ++c) { for (int h = 0; h < 12; ++h) { @@ -39,15 +39,15 @@ void default_function(float* input_image, float* weight_conv1, float* weight_con reducer1 = -1.000000e+00f; for (int ra3 = 0; ra3 < 2; ++ra3) { for (int ra4 = 0; ra4 < 2; ++ra4) { - reducer1 = max(tanh1[(((((w * 2) + ra4) + (((h * 2) + ra3) * 24)) + (c * 576)) + (i * 11520))], reducer1); + reducer1 = max(((float)tanh1[(((((w * 2) + ra4) + (((h * 2) + ra3) * 24)) + (c * 576)) + (i * 11520))]), reducer1); } } - max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = reducer1; + max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = ((int)reducer1); } } } } - float conv2d1[3200000]; + int conv2d1[3200000]; for (int nn1 = 0; nn1 < 1000; ++nn1) { for (int ff1 = 0; ff1 < 50; ++ff1) { for (int yy1 = 0; yy1 < 8; ++yy1) { @@ -57,26 +57,26 @@ void default_function(float* input_image, float* weight_conv1, float* weight_con for (int ra5 = 0; ra5 < 20; ++ra5) { for (int ra6 = 0; ra6 < 5; ++ra6) { for (int ra7 = 0; ra7 < 5; ++ra7) { - reducer2 = ((max_pool[((((xx1 + ra7) + ((yy1 + ra6) * 12)) + (ra5 * 144)) + (nn1 * 2880))] * weight_conv2[(((ra7 + (ra6 * 5)) + (ra5 * 25)) + (ff1 * 500))]) + reducer2); + reducer2 = (((float)(((long)max_pool[((((xx1 + ra7) + ((yy1 + ra6) * 12)) + (ra5 * 144)) + (nn1 * 2880))]) * ((long)weight_conv2[(((ra7 + (ra6 * 5)) + (ra5 * 25)) + (ff1 * 500))]))) + reducer2); } } } - conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = reducer2; + conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = ((int)reducer2); } } } } - float tanh2[3200000]; + int tanh2[3200000]; for (int args3 = 0; args3 < 1000; ++args3) { for (int args01 = 0; args01 < 50; ++args01) { for (int args11 = 0; args11 < 8; ++args11) { for (int args21 = 0; args21 < 8; ++args21) { - tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((float)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); + tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((int)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); } } } } - float max_pool1[800000]; + int max_pool1[800000]; for (int i1 = 0; i1 < 1000; ++i1) { for (int c1 = 0; c1 < 50; ++c1) { for (int h1 = 0; h1 < 4; ++h1) { @@ -85,70 +85,70 @@ void default_function(float* input_image, float* weight_conv1, float* weight_con reducer3 = -1.000000e+00f; for (int ra8 = 0; ra8 < 2; ++ra8) { for (int ra9 = 0; ra9 < 2; ++ra9) { - reducer3 = max(tanh2[(((((w1 * 2) + ra9) + (((h1 * 2) + ra8) * 8)) + (c1 * 64)) + (i1 * 3200))], reducer3); + reducer3 = max(((float)tanh2[(((((w1 * 2) + ra9) + (((h1 * 2) + ra8) * 8)) + (c1 * 64)) + (i1 * 3200))]), reducer3); } } - max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = reducer3; + max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = ((int)reducer3); } } } } - float compute0[800000]; + int compute0[800000]; for (int i2 = 0; i2 < 1000; ++i2) { for (int j = 0; j < 800; ++j) { compute0[(j + (i2 * 800))] = max_pool1[((((((j / 4) % 4) * 4) + (j % 4)) + ((j / 16) * 16)) + (i2 * 800))]; } } - float dense[500000]; + int dense[500000]; for (int i3 = 0; i3 < 1000; ++i3) { for (int j1 = 0; j1 < 500; ++j1) { float reducer4; reducer4 = 0.000000e+00f; for (int ra10 = 0; ra10 < 800; ++ra10) { - reducer4 = ((compute0[(ra10 + (i3 * 800))] * weight_fc1[(ra10 + (j1 * 800))]) + reducer4); + reducer4 = (((float)(((long)compute0[(ra10 + (i3 * 800))]) * ((long)weight_fc1[(ra10 + (j1 * 800))]))) + reducer4); } - dense[(j1 + (i3 * 500))] = reducer4; + dense[(j1 + (i3 * 500))] = ((int)reducer4); } } - float tanh3[500000]; + int tanh3[500000]; for (int args4 = 0; args4 < 1000; ++args4) { for (int args02 = 0; args02 < 500; ++args02) { - tanh3[(args02 + (args4 * 500))] = ((float)tanh(((double)dense[(args02 + (args4 * 500))]))); + tanh3[(args02 + (args4 * 500))] = ((int)tanh(((double)dense[(args02 + (args4 * 500))]))); } } - float dense1[10000]; + int dense1[10000]; for (int i4 = 0; i4 < 1000; ++i4) { for (int j2 = 0; j2 < 10; ++j2) { float reducer5; reducer5 = 0.000000e+00f; for (int ra11 = 0; ra11 < 500; ++ra11) { - reducer5 = ((tanh3[(ra11 + (i4 * 500))] * weight_fc2[(ra11 + (j2 * 500))]) + reducer5); + reducer5 = (((float)(((long)tanh3[(ra11 + (i4 * 500))]) * ((long)weight_fc2[(ra11 + (j2 * 500))]))) + reducer5); } - dense1[(j2 + (i4 * 10))] = reducer5; + dense1[(j2 + (i4 * 10))] = ((int)reducer5); } } - float compute1[1000]; + int compute1[1000]; for (int i5 = 0; i5 < 1000; ++i5) { int max; max = 0; for (int ra12 = 0; ra12 < 10; ++ra12) { - max = ((int)max(dense1[(ra12 + (i5 * 10))], ((float)max))); + max = max(dense1[(ra12 + (i5 * 10))], max); } - compute1[i5] = ((float)max); + compute1[i5] = max; } - float compute2[1000]; + int compute2[1000]; for (int i6 = 0; i6 < 1000; ++i6) { int sum; sum = 0; for (int ra13 = 0; ra13 < 10; ++ra13) { - sum = ((int)(exp(((double)(dense1[(ra13 + (i6 * 10))] - compute1[i6]))) + ((double)sum))); + sum = ((int)(exp(((double)((long)(dense1[(ra13 + (i6 * 10))] - compute1[i6])))) + ((double)sum))); } - compute2[i6] = ((float)sum); + compute2[i6] = sum; } - float update0; + int update0; for (int i7 = 0; i7 < 1000; ++i7) { for (int j3 = 0; j3 < 10; ++j3) { - lenet[(j3 + (i7 * 10))] = ((float)(exp(((double)(dense1[(j3 + (i7 * 10))] - compute1[i7]))) / ((double)compute2[i7]))); + lenet[(j3 + (i7 * 10))] = ((int)(exp(((double)((long)(dense1[(j3 + (i7 * 10))] - compute1[i7])))) / ((double)compute2[i7]))); } } } diff --git a/samples/lenet/sdaccel_code.cl b/samples/lenet/sdaccel_code.cl index 160ecbd18..114880df0 100644 --- a/samples/lenet/sdaccel_code.cl +++ b/samples/lenet/sdaccel_code.cl @@ -1,5 +1,5 @@ -__kernel void default_function(__global float* input_image, __global float* weight_conv1, __global float* weight_conv2, __global float* weight_fc1, __global float* weight_fc2, __global float* lenet) { - __local float conv2d[11520000]; +__kernel void default_function(__global int* input_image, __global int* weight_conv1, __global int* weight_conv2, __global int* weight_fc1, __global int* weight_fc2, __global int* lenet) { + __local int conv2d[11520000]; for (int nn = 0; nn < 1000; ++nn) { for (int ff = 0; ff < 20; ++ff) { for (int yy = 0; yy < 24; ++yy) { @@ -8,25 +8,25 @@ __kernel void default_function(__global float* input_image, __global float* weig reducer6 = 0.000000e+00f; for (int ra15 = 0; ra15 < 5; ++ra15) { for (int ra16 = 0; ra16 < 5; ++ra16) { - reducer6 = ((input_image[(((xx + ra16) + ((yy + ra15) * 28)) + (nn * 784))] * weight_conv1[((ra16 + (ra15 * 5)) + (ff * 25))]) + reducer6); + reducer6 = (((float)(((long)input_image[(((xx + ra16) + ((yy + ra15) * 28)) + (nn * 784))]) * ((long)weight_conv1[((ra16 + (ra15 * 5)) + (ff * 25))]))) + reducer6); } } - conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = reducer6; + conv2d[(((xx + (yy * 24)) + (ff * 576)) + (nn * 11520))] = ((int)reducer6); } } } } - __local float tanh1[11520000]; + __local int tanh1[11520000]; for (int args = 0; args < 1000; ++args) { for (int args0 = 0; args0 < 20; ++args0) { for (int args1 = 0; args1 < 24; ++args1) { for (int args2 = 0; args2 < 24; ++args2) { - tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((float)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); + tanh1[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))] = ((int)tanh(((double)conv2d[(((args2 + (args1 * 24)) + (args0 * 576)) + (args * 11520))]))); } } } } - __local float max_pool[2880000]; + __local int max_pool[2880000]; for (int i = 0; i < 1000; ++i) { for (int c = 0; c < 20; ++c) { for (int h = 0; h < 12; ++h) { @@ -35,15 +35,15 @@ __kernel void default_function(__global float* input_image, __global float* weig reducer7 = -1.000000e+00f; for (int ra17 = 0; ra17 < 2; ++ra17) { for (int ra18 = 0; ra18 < 2; ++ra18) { - reducer7 = max(tanh1[(((((w * 2) + ra18) + (((h * 2) + ra17) * 24)) + (c * 576)) + (i * 11520))], reducer7); + reducer7 = max(((float)tanh1[(((((w * 2) + ra18) + (((h * 2) + ra17) * 24)) + (c * 576)) + (i * 11520))]), reducer7); } } - max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = reducer7; + max_pool[(((w + (h * 12)) + (c * 144)) + (i * 2880))] = ((int)reducer7); } } } } - __local float conv2d1[3200000]; + __local int conv2d1[3200000]; for (int nn1 = 0; nn1 < 1000; ++nn1) { for (int ff1 = 0; ff1 < 50; ++ff1) { for (int yy1 = 0; yy1 < 8; ++yy1) { @@ -53,26 +53,26 @@ __kernel void default_function(__global float* input_image, __global float* weig for (int ra19 = 0; ra19 < 20; ++ra19) { for (int ra20 = 0; ra20 < 5; ++ra20) { for (int ra21 = 0; ra21 < 5; ++ra21) { - reducer8 = ((max_pool[((((xx1 + ra21) + ((yy1 + ra20) * 12)) + (ra19 * 144)) + (nn1 * 2880))] * weight_conv2[(((ra21 + (ra20 * 5)) + (ra19 * 25)) + (ff1 * 500))]) + reducer8); + reducer8 = (((float)(((long)max_pool[((((xx1 + ra21) + ((yy1 + ra20) * 12)) + (ra19 * 144)) + (nn1 * 2880))]) * ((long)weight_conv2[(((ra21 + (ra20 * 5)) + (ra19 * 25)) + (ff1 * 500))]))) + reducer8); } } } - conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = reducer8; + conv2d1[(((xx1 + (yy1 * 8)) + (ff1 * 64)) + (nn1 * 3200))] = ((int)reducer8); } } } } - __local float tanh2[3200000]; + __local int tanh2[3200000]; for (int args3 = 0; args3 < 1000; ++args3) { for (int args01 = 0; args01 < 50; ++args01) { for (int args11 = 0; args11 < 8; ++args11) { for (int args21 = 0; args21 < 8; ++args21) { - tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((float)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); + tanh2[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))] = ((int)tanh(((double)conv2d1[(((args21 + (args11 * 8)) + (args01 * 64)) + (args3 * 3200))]))); } } } } - __local float max_pool1[800000]; + __local int max_pool1[800000]; for (int i1 = 0; i1 < 1000; ++i1) { for (int c1 = 0; c1 < 50; ++c1) { for (int h1 = 0; h1 < 4; ++h1) { @@ -81,70 +81,70 @@ __kernel void default_function(__global float* input_image, __global float* weig reducer9 = -1.000000e+00f; for (int ra22 = 0; ra22 < 2; ++ra22) { for (int ra23 = 0; ra23 < 2; ++ra23) { - reducer9 = max(tanh2[(((((w1 * 2) + ra23) + (((h1 * 2) + ra22) * 8)) + (c1 * 64)) + (i1 * 3200))], reducer9); + reducer9 = max(((float)tanh2[(((((w1 * 2) + ra23) + (((h1 * 2) + ra22) * 8)) + (c1 * 64)) + (i1 * 3200))]), reducer9); } } - max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = reducer9; + max_pool1[(((w1 + (h1 * 4)) + (c1 * 16)) + (i1 * 800))] = ((int)reducer9); } } } } - __local float compute3[800000]; + __local int compute3[800000]; for (int i2 = 0; i2 < 1000; ++i2) { for (int j = 0; j < 800; ++j) { compute3[(j + (i2 * 800))] = max_pool1[((((((j / 4) % 4) * 4) + (j % 4)) + ((j / 16) * 16)) + (i2 * 800))]; } } - __local float dense[500000]; + __local int dense[500000]; for (int i3 = 0; i3 < 1000; ++i3) { for (int j1 = 0; j1 < 500; ++j1) { __local float reducer10; reducer10 = 0.000000e+00f; for (int ra24 = 0; ra24 < 800; ++ra24) { - reducer10 = ((compute3[(ra24 + (i3 * 800))] * weight_fc1[(ra24 + (j1 * 800))]) + reducer10); + reducer10 = (((float)(((long)compute3[(ra24 + (i3 * 800))]) * ((long)weight_fc1[(ra24 + (j1 * 800))]))) + reducer10); } - dense[(j1 + (i3 * 500))] = reducer10; + dense[(j1 + (i3 * 500))] = ((int)reducer10); } } - __local float tanh3[500000]; + __local int tanh3[500000]; for (int args4 = 0; args4 < 1000; ++args4) { for (int args02 = 0; args02 < 500; ++args02) { - tanh3[(args02 + (args4 * 500))] = ((float)tanh(((double)dense[(args02 + (args4 * 500))]))); + tanh3[(args02 + (args4 * 500))] = ((int)tanh(((double)dense[(args02 + (args4 * 500))]))); } } - __local float dense1[10000]; + __local int dense1[10000]; for (int i4 = 0; i4 < 1000; ++i4) { for (int j2 = 0; j2 < 10; ++j2) { __local float reducer11; reducer11 = 0.000000e+00f; for (int ra25 = 0; ra25 < 500; ++ra25) { - reducer11 = ((tanh3[(ra25 + (i4 * 500))] * weight_fc2[(ra25 + (j2 * 500))]) + reducer11); + reducer11 = (((float)(((long)tanh3[(ra25 + (i4 * 500))]) * ((long)weight_fc2[(ra25 + (j2 * 500))]))) + reducer11); } - dense1[(j2 + (i4 * 10))] = reducer11; + dense1[(j2 + (i4 * 10))] = ((int)reducer11); } } - __local float compute4[1000]; + __local int compute4[1000]; for (int i5 = 0; i5 < 1000; ++i5) { __local int max; max = 0; for (int ra26 = 0; ra26 < 10; ++ra26) { - max = ((int)max(dense1[(ra26 + (i5 * 10))], ((float)max))); + max = max(dense1[(ra26 + (i5 * 10))], max); } - compute4[i5] = ((float)max); + compute4[i5] = max; } - __local float compute5[1000]; + __local int compute5[1000]; for (int i6 = 0; i6 < 1000; ++i6) { __local int sum; sum = 0; for (int ra27 = 0; ra27 < 10; ++ra27) { - sum = ((int)(exp(((double)(dense1[(ra27 + (i6 * 10))] - compute4[i6]))) + ((double)sum))); + sum = ((int)(exp(((double)((long)(dense1[(ra27 + (i6 * 10))] - compute4[i6])))) + ((double)sum))); } - compute5[i6] = ((float)sum); + compute5[i6] = sum; } - __local float update1; + __local int update1; for (int i7 = 0; i7 < 1000; ++i7) { for (int j3 = 0; j3 < 10; ++j3) { - lenet[(j3 + (i7 * 10))] = ((float)(exp(((double)(dense1[(j3 + (i7 * 10))] - compute4[i7]))) / ((double)compute5[i7]))); + lenet[(j3 + (i7 * 10))] = ((int)(exp(((double)((long)(dense1[(j3 + (i7 * 10))] - compute4[i7])))) / ((double)compute5[i7]))); } } } diff --git a/samples/lenet/vhls_code.cl b/samples/lenet/vhls_code.cl index 95e8313b4..3d85466b4 100644 --- a/samples/lenet/vhls_code.cl +++ b/samples/lenet/vhls_code.cl @@ -2,8 +2,8 @@ #include #include -void default_function(float input_image[1000][1][28][28], float weight_conv1[20][1][5][5], float weight_conv2[50][20][5][5], float weight_fc1[500][800], float weight_fc2[10][500], float lenet[1000][10]) { - float conv2d[1000][20][24][24]; +void default_function(ap_int<32> input_image[1000][1][28][28], ap_int<32> weight_conv1[20][1][5][5], ap_int<32> weight_conv2[50][20][5][5], ap_int<32> weight_fc1[500][800], ap_int<32> weight_fc2[10][500], ap_int<32> lenet[1000][10]) { + ap_int<32> conv2d[1000][20][24][24]; for (ap_int<32> nn = 0; nn < 1000; ++nn) { for (ap_int<32> ff = 0; ff < 20; ++ff) { for (ap_int<32> yy = 0; yy < 24; ++yy) { @@ -12,25 +12,25 @@ void default_function(float input_image[1000][1][28][28], float weight_conv1[20] reducer12 = 0.000000e+00f; for (ap_int<32> ra29 = 0; ra29 < 5; ++ra29) { for (ap_int<32> ra30 = 0; ra30 < 5; ++ra30) { - reducer12 = ((input_image[nn][0][(yy + ra29)][(xx + ra30)] * weight_conv1[ff][0][ra29][ra30]) + reducer12); + reducer12 = (((float)(((ap_int<64>)input_image[nn][0][(yy + ra29)][(xx + ra30)]) * ((ap_int<64>)weight_conv1[ff][0][ra29][ra30]))) + reducer12); } } - conv2d[nn][ff][yy][xx] = reducer12; + conv2d[nn][ff][yy][xx] = ((ap_int<32>)reducer12); } } } } - float tanh1[1000][20][24][24]; + ap_int<32> tanh1[1000][20][24][24]; for (ap_int<32> args = 0; args < 1000; ++args) { for (ap_int<32> args0 = 0; args0 < 20; ++args0) { for (ap_int<32> args1 = 0; args1 < 24; ++args1) { for (ap_int<32> args2 = 0; args2 < 24; ++args2) { - tanh1[args][args0][args1][args2] = ((float)tanh(((double)conv2d[args][args0][args1][args2]))); + tanh1[args][args0][args1][args2] = ((ap_int<32>)tanh(((double)conv2d[args][args0][args1][args2]))); } } } } - float max_pool[1000][20][12][12]; + ap_int<32> max_pool[1000][20][12][12]; for (ap_int<32> i = 0; i < 1000; ++i) { for (ap_int<32> c = 0; c < 20; ++c) { for (ap_int<32> h = 0; h < 12; ++h) { @@ -39,15 +39,15 @@ void default_function(float input_image[1000][1][28][28], float weight_conv1[20] reducer13 = -1.000000e+00f; for (ap_int<32> ra31 = 0; ra31 < 2; ++ra31) { for (ap_int<32> ra32 = 0; ra32 < 2; ++ra32) { - reducer13 = std::max(tanh1[i][c][((h * 2) + ra31)][((w * 2) + ra32)], reducer13); + reducer13 = std::max(((float)tanh1[i][c][((h * 2) + ra31)][((w * 2) + ra32)]), reducer13); } } - max_pool[i][c][h][w] = reducer13; + max_pool[i][c][h][w] = ((ap_int<32>)reducer13); } } } } - float conv2d1[1000][50][8][8]; + ap_int<32> conv2d1[1000][50][8][8]; for (ap_int<32> nn1 = 0; nn1 < 1000; ++nn1) { for (ap_int<32> ff1 = 0; ff1 < 50; ++ff1) { for (ap_int<32> yy1 = 0; yy1 < 8; ++yy1) { @@ -57,26 +57,26 @@ void default_function(float input_image[1000][1][28][28], float weight_conv1[20] for (ap_int<32> ra33 = 0; ra33 < 20; ++ra33) { for (ap_int<32> ra34 = 0; ra34 < 5; ++ra34) { for (ap_int<32> ra35 = 0; ra35 < 5; ++ra35) { - reducer14 = ((max_pool[nn1][ra33][(yy1 + ra34)][(xx1 + ra35)] * weight_conv2[ff1][ra33][ra34][ra35]) + reducer14); + reducer14 = (((float)(((ap_int<64>)max_pool[nn1][ra33][(yy1 + ra34)][(xx1 + ra35)]) * ((ap_int<64>)weight_conv2[ff1][ra33][ra34][ra35]))) + reducer14); } } } - conv2d1[nn1][ff1][yy1][xx1] = reducer14; + conv2d1[nn1][ff1][yy1][xx1] = ((ap_int<32>)reducer14); } } } } - float tanh2[1000][50][8][8]; + ap_int<32> tanh2[1000][50][8][8]; for (ap_int<32> args3 = 0; args3 < 1000; ++args3) { for (ap_int<32> args01 = 0; args01 < 50; ++args01) { for (ap_int<32> args11 = 0; args11 < 8; ++args11) { for (ap_int<32> args21 = 0; args21 < 8; ++args21) { - tanh2[args3][args01][args11][args21] = ((float)tanh(((double)conv2d1[args3][args01][args11][args21]))); + tanh2[args3][args01][args11][args21] = ((ap_int<32>)tanh(((double)conv2d1[args3][args01][args11][args21]))); } } } } - float max_pool1[1000][50][4][4]; + ap_int<32> max_pool1[1000][50][4][4]; for (ap_int<32> i1 = 0; i1 < 1000; ++i1) { for (ap_int<32> c1 = 0; c1 < 50; ++c1) { for (ap_int<32> h1 = 0; h1 < 4; ++h1) { @@ -85,70 +85,70 @@ void default_function(float input_image[1000][1][28][28], float weight_conv1[20] reducer15 = -1.000000e+00f; for (ap_int<32> ra36 = 0; ra36 < 2; ++ra36) { for (ap_int<32> ra37 = 0; ra37 < 2; ++ra37) { - reducer15 = std::max(tanh2[i1][c1][((h1 * 2) + ra36)][((w1 * 2) + ra37)], reducer15); + reducer15 = std::max(((float)tanh2[i1][c1][((h1 * 2) + ra36)][((w1 * 2) + ra37)]), reducer15); } } - max_pool1[i1][c1][h1][w1] = reducer15; + max_pool1[i1][c1][h1][w1] = ((ap_int<32>)reducer15); } } } } - float compute6[1000][800]; + ap_int<32> compute6[1000][800]; for (ap_int<32> i2 = 0; i2 < 1000; ++i2) { for (ap_int<32> j = 0; j < 800; ++j) { compute6[i2][j] = max_pool1[i2][(j / 16)][((j / 4) % 4)][(j % 4)]; } } - float dense[1000][500]; + ap_int<32> dense[1000][500]; for (ap_int<32> i3 = 0; i3 < 1000; ++i3) { for (ap_int<32> j1 = 0; j1 < 500; ++j1) { float reducer16; reducer16 = 0.000000e+00f; for (ap_int<32> ra38 = 0; ra38 < 800; ++ra38) { - reducer16 = ((compute6[i3][ra38] * weight_fc1[j1][ra38]) + reducer16); + reducer16 = (((float)(((ap_int<64>)compute6[i3][ra38]) * ((ap_int<64>)weight_fc1[j1][ra38]))) + reducer16); } - dense[i3][j1] = reducer16; + dense[i3][j1] = ((ap_int<32>)reducer16); } } - float tanh3[1000][500]; + ap_int<32> tanh3[1000][500]; for (ap_int<32> args4 = 0; args4 < 1000; ++args4) { for (ap_int<32> args02 = 0; args02 < 500; ++args02) { - tanh3[args4][args02] = ((float)tanh(((double)dense[args4][args02]))); + tanh3[args4][args02] = ((ap_int<32>)tanh(((double)dense[args4][args02]))); } } - float dense1[1000][10]; + ap_int<32> dense1[1000][10]; for (ap_int<32> i4 = 0; i4 < 1000; ++i4) { for (ap_int<32> j2 = 0; j2 < 10; ++j2) { float reducer17; reducer17 = 0.000000e+00f; for (ap_int<32> ra39 = 0; ra39 < 500; ++ra39) { - reducer17 = ((tanh3[i4][ra39] * weight_fc2[j2][ra39]) + reducer17); + reducer17 = (((float)(((ap_int<64>)tanh3[i4][ra39]) * ((ap_int<64>)weight_fc2[j2][ra39]))) + reducer17); } - dense1[i4][j2] = reducer17; + dense1[i4][j2] = ((ap_int<32>)reducer17); } } - float compute7[1000]; + ap_int<32> compute7[1000]; for (ap_int<32> i5 = 0; i5 < 1000; ++i5) { ap_int<32> max; max = 0; for (ap_int<32> ra40 = 0; ra40 < 10; ++ra40) { - max = ((ap_int<32>)std::max(dense1[i5][ra40], ((float)max))); + max = std::max(dense1[i5][ra40], max); } - compute7[i5] = ((float)max); + compute7[i5] = max; } - float compute8[1000]; + ap_int<32> compute8[1000]; for (ap_int<32> i6 = 0; i6 < 1000; ++i6) { ap_int<32> sum; sum = 0; for (ap_int<32> ra41 = 0; ra41 < 10; ++ra41) { - sum = ((ap_int<32>)(exp(((double)(dense1[i6][ra41] - compute7[i6]))) + ((double)sum))); + sum = ((ap_int<32>)(exp(((double)((ap_int<33>)(dense1[i6][ra41] - compute7[i6])))) + ((double)sum))); } - compute8[i6] = ((float)sum); + compute8[i6] = sum; } - float update2; + ap_int<32> update2; for (ap_int<32> i7 = 0; i7 < 1000; ++i7) { for (ap_int<32> j3 = 0; j3 < 10; ++j3) { - lenet[i7][j3] = ((float)(exp(((double)(dense1[i7][j3] - compute7[i7]))) / ((double)compute8[i7]))); + lenet[i7][j3] = ((ap_int<32>)(exp(((double)((ap_int<33>)(dense1[i7][j3] - compute7[i7])))) / ((double)compute8[i7]))); } } } diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 2e27e993d..294e9cecb 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -329,7 +329,7 @@ void GenMakFile() { PrintIndent(stream, indent); stream << "XCLBIN = ${XCLBIN_NAME}_hw.xclbin\n"; stream << "CLCC_OPT += -t hw\n"; - stream << "endif"; + stream << "endif\n"; stream << "HOST_ARGS = ${XCLBIN}\n"; stream << "COMMON_DIR = ./common\n"; @@ -369,7 +369,7 @@ void GenCommonFile() { stream << "OBJECTS := $(HOST_SRCS:.cpp=.o)\n"; stream << ".PHONY: all\n"; stream << "all: run\n"; - + stream << "host: ${HOST_EXE_DIR}/${HOST_EXE}\n"; stream << "xbin_cpu_em:\n"; PrintIndent(stream, indent); From edf784febcfd2e3f39f130ef87e0b0da278d9d55 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 6 Sep 2019 13:37:58 -0400 Subject: [PATCH 070/103] modify sdaccel_sw_emu -> sdaccel_csim --- python/heterocl/tvm/target.py | 2 +- samples/gemm/{kernel.py => gemm_runtime.py} | 22 ++- samples/gemm/gemm_sdaccel.py | 4 +- samples/gemm/{main.cpp => host.cpp} | 58 +++---- samples/lenet/lenet_main_withoutq.py | 1 + samples/smith_waterman/sdaccel_code.cl | 1 + .../smith_waterman/sdaccel_code_nounroll.cl | 142 +++++++++++++++++ samples/smith_waterman/smith_waterman_main.py | 18 ++- samples/smith_waterman/vhls_code.cl | 146 ++++++++++++++++++ tvm/src/codegen/hlsc/vhls_module.cc | 4 +- tvm/src/codegen/opencl/build_opencl.cc | 2 +- tvm/src/codegen/opencl/sdaccel_module.cc | 35 +++-- tvm/src/codegen/opencl/sdaccel_module.h | 9 +- 13 files changed, 378 insertions(+), 66 deletions(-) rename samples/gemm/{kernel.py => gemm_runtime.py} (75%) rename samples/gemm/{main.cpp => host.cpp} (67%) create mode 100644 samples/smith_waterman/sdaccel_code_nounroll.cl create mode 100644 samples/smith_waterman/vhls_code.cl diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index ab8b92f79..19c34934a 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -18,7 +18,7 @@ raise err_msg FPGA_TARGETS = ['merlinc', 'soda', 'soda_xhls', 'vhls', 'ihls', 'vhls_csim', - 'opencl', 'sdaccel', 'sdaccel_sw_emu', 'aocl', 'aocl_sw_emu'] + 'opencl', 'sdaccel', 'sdaccel_csim', 'aocl', 'aocl_csim'] def _merge_opts(opts, new_opts): """Helper function to merge options""" diff --git a/samples/gemm/kernel.py b/samples/gemm/gemm_runtime.py similarity index 75% rename from samples/gemm/kernel.py rename to samples/gemm/gemm_runtime.py index 1ce0b59a0..1cee08a48 100644 --- a/samples/gemm/kernel.py +++ b/samples/gemm/gemm_runtime.py @@ -6,7 +6,7 @@ hcl.init() -matrix_size = (16, 16) +# matrix_size = (16, 16) # def add_compute(A, B): # C = hcl.compute(A.shape, lambda x, y: A[x, y] + B[x, y], "C") # return C @@ -36,23 +36,29 @@ # print (f4) # print (hcl_A, hcl_B, hcl_C) +matrix_1_size = (5, 3) +matrix_2_size = (3, 5) +matrix_3_size = (matrix_1_size[0], matrix_2_size[1]) + def gemm_compute(matrix_1, matrix_2): - m = n = k = 3 + m = matrix_1.shape[0]; + k = matrix_1.shape[1]; + n = matrix_2.shape[1]; r = hcl.reduce_axis(0, k, 'k') temp = hcl.compute((m, n), lambda x, y: hcl.sum(matrix_1[x, r] * matrix_2[r, y], axis = r), name='matrix_3') return temp -matrix_1 = hcl.placeholder((3, 3)) -matrix_2 = hcl.placeholder((3, 3)) +matrix_1 = hcl.placeholder(matrix_1_size) +matrix_2 = hcl.placeholder(matrix_2_size) s = hcl.create_schedule([matrix_1, matrix_2], gemm_compute) -f = hcl.build(s, target='sdaccel_sw_emu') +f = hcl.build(s, target='sdaccel_csim') -matrix_1_np = np.array([[1,2,3],[4,5,6],[7,8,9]]) -matrix_2_np = np.array(([4,5,6],[1,2,2],[7,8,9])) -matrix_3_np = np.array([[0,0,0],[0,0,9],[0,0,0]]) +matrix_1_np = np.random.randint(10, size=matrix_1_size) +matrix_2_np = np.random.randint(10, size=matrix_2_size) +matrix_3_np = np.random.randint(10, size=matrix_3_size) hcl_matrix_1 = hcl.asarray(matrix_1_np) hcl_matrix_2 = hcl.asarray(matrix_2_np) diff --git a/samples/gemm/gemm_sdaccel.py b/samples/gemm/gemm_sdaccel.py index 37da40980..85c318120 100644 --- a/samples/gemm/gemm_sdaccel.py +++ b/samples/gemm/gemm_sdaccel.py @@ -4,5 +4,5 @@ #dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] #for dtype in dtypes: -#time_gemm(hcl.Int(32), 10, 10, 10, 'sdaccel_sw_emu') -time_gemm(hcl.Int(32), 10, 10, 10, 'sdaccel_sw_emu') +time_gemm(hcl.Int(32), 15, 15, 15, 'sdaccel_sw_emu') +# time_gemm(hcl.Float(), 100, 100, 100, 'sdaccel_sw_emu') diff --git a/samples/gemm/main.cpp b/samples/gemm/host.cpp similarity index 67% rename from samples/gemm/main.cpp rename to samples/gemm/host.cpp index d03fe2548..64c3c4184 100644 --- a/samples/gemm/main.cpp +++ b/samples/gemm/host.cpp @@ -29,30 +29,30 @@ int main(void) { #endif char* xclbinFilename = argv[1]; - std::vector source_0(10 * 10); - std::vector source_1(10 * 10); - std::vector source_2(10 * 10); - - size_t vector_size_bytes_0 = sizeof(int) * 10 * 10; - size_t vector_size_bytes_1 = sizeof(int) * 10 * 10; - size_t vector_size_bytes_2 = sizeof(int) * 10 * 10; - - int* arg_0 = (int*)shmat(3866625, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_0[i1 + i0*10] = arg_0[i1 + i0*10]; + std::vector source_0(5 * 3); + std::vector source_1(3 * 5); + std::vector source_2(5 * 5); + + size_t vector_size_bytes_0 = sizeof(int) * 5 * 3; + size_t vector_size_bytes_1 = sizeof(int) * 3 * 5; + size_t vector_size_bytes_2 = sizeof(int) * 5 * 5; + + int* arg_0 = (int*)shmat(7340033, nullptr, 0); + for (size_t i0 = 0; i0 < 5; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + source_0[i1 + i0*3] = arg_0[i1 + i0*3]; } } - int* arg_1 = (int*)shmat(3866624, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_1[i1 + i0*10] = arg_1[i1 + i0*10]; + int* arg_1 = (int*)shmat(7340032, nullptr, 0); + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 5; i1++) { + source_1[i1 + i0*5] = arg_1[i1 + i0*5]; } } - int* arg_2 = (int*)shmat(3276805, nullptr, 0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - source_2[i1 + i0*10] = arg_2[i1 + i0*10]; + int* arg_2 = (int*)shmat(4521986, nullptr, 0); + for (size_t i0 = 0; i0 < 5; i0++) { + for (size_t i1 = 0; i1 < 5; i1++) { + source_2[i1 + i0*5] = arg_2[i1 + i0*5]; } } std::vector platforms; @@ -97,21 +97,21 @@ int main(void) { q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_0[i1 + i0*10] = source_0[i1 + i0*10]; + for (size_t i0 = 0; i0 < 5; i0++) { + for (size_t i1 = 0; i1 < 3; i1++) { + arg_0[i1 + i0*3] = source_0[i1 + i0*3]; } } shmdt(arg_0); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_1[i1 + i0*10] = source_1[i1 + i0*10]; + for (size_t i0 = 0; i0 < 3; i0++) { + for (size_t i1 = 0; i1 < 5; i1++) { + arg_1[i1 + i0*5] = source_1[i1 + i0*5]; } } shmdt(arg_1); - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 10; i1++) { - arg_2[i1 + i0*10] = source_2[i1 + i0*10]; + for (size_t i0 = 0; i0 < 5; i0++) { + for (size_t i1 = 0; i1 < 5; i1++) { + arg_2[i1 + i0*5] = source_2[i1 + i0*5]; } } shmdt(arg_2); diff --git a/samples/lenet/lenet_main_withoutq.py b/samples/lenet/lenet_main_withoutq.py index 205c01e39..b16bdd6c3 100644 --- a/samples/lenet/lenet_main_withoutq.py +++ b/samples/lenet/lenet_main_withoutq.py @@ -117,6 +117,7 @@ def build_lenet_inf(batch_size=batch_size, target=None): output_hcl = hcl.asarray(np.zeros((batch_size,10))) f(input_image_hcl, weight_conv1_hcl, weight_conv2_hcl, weight_fc1_hcl, weight_fc2_hcl, output_hcl) + print (output_hcl.asnumpy()) prediction = np.argmax(output_hcl.asnumpy(), axis=1) correct_sum += np.sum(np.equal(prediction, label)) diff --git a/samples/smith_waterman/sdaccel_code.cl b/samples/smith_waterman/sdaccel_code.cl index d71ee9230..0fb6ea32d 100644 --- a/samples/smith_waterman/sdaccel_code.cl +++ b/samples/smith_waterman/sdaccel_code.cl @@ -2,6 +2,7 @@ __kernel void default_function(__global unsigned char* seqAs, __global unsigned __local int B; __attribute__((xcl_pipeline_loop(1))) for (int t_outer = 0; t_outer < 32; ++t_outer) { + for (int t_inner = 0; t_inner < 32; ++t_inner) { __local int maxtrix_max; maxtrix_max = 0; diff --git a/samples/smith_waterman/sdaccel_code_nounroll.cl b/samples/smith_waterman/sdaccel_code_nounroll.cl new file mode 100644 index 000000000..d5e145c05 --- /dev/null +++ b/samples/smith_waterman/sdaccel_code_nounroll.cl @@ -0,0 +1,142 @@ +__kernel void default_function(__global unsigned char* seqAs, __global unsigned char* seqBs, __global unsigned char* outAs, __global unsigned char* outBs) { + __local int B; + __attribute__((xcl_pipeline_loop(1))) + for (int t_outer = 0; t_outer < 32; ++t_outer) { + __attribute__((opencl_unroll_hint(2))) + for (int t_inner = 0; t_inner < 32; ++t_inner) { + __local int maxtrix_max; + maxtrix_max = 0; + __local int i_max; + i_max = 0; + __local int j_max; + j_max = 0; + __local short matrix[16641]; + for (int x = 0; x < 129; ++x) { + for (int y = 0; y < 129; ++y) { + matrix[(y + (x * 129))] = (short)0; + } + } + __local short action[16641]; + for (int x1 = 0; x1 < 129; ++x1) { + for (int y1 = 0; y1 < 129; ++y1) { + action[(y1 + (x1 * 129))] = (short)3; + } + } + __local int mutate1; + for (int i = 0; i < 129; ++i) { + for (int j = 0; j < 129; ++j) { + __local int trace_back[4]; + for (int x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((int)(((long)matrix[((j + (i * 129)) + -130)]) + ((long)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[3] = 0; + __local int max; + max = trace_back[0]; + __local int act; + act = 0; + for (int i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[(j + (i * 129))] = ((short)max); + action[(j + (i * 129))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { + maxtrix_max = ((int)matrix[(j + (i * 129))]); + i_max = i; + j_max = j; + } + } + } + } + __local int T; + __local int curr_i; + curr_i = i_max; + __local int curr_j; + curr_j = j_max; + __local int next_i; + next_i = 0; + __local int next_j; + next_j = 0; + __local int act1; + act1 = ((int)action[(curr_j + (curr_i * 129))]); + __local int next_i1; + next_i1 = 0; + __local int next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + __local int tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + __local int a; + a = 0; + __local int b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)b); + curr_i = next_i; + curr_j = next_j; + __local int act2; + act2 = ((int)action[(curr_j + (curr_i * 129))]); + __local int next_i2; + next_i2 = 0; + __local int next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/samples/smith_waterman/smith_waterman_main.py b/samples/smith_waterman/smith_waterman_main.py index 22926e096..d0b1b8d3c 100644 --- a/samples/smith_waterman/smith_waterman_main.py +++ b/samples/smith_waterman/smith_waterman_main.py @@ -128,7 +128,8 @@ def batch_sw(seqAs, seqBs, outAs, outBs): s = hcl.create_schedule_from_scheme(scheme) o, p = s[batch_sw.B].split(batch_sw.B.axis[0], factor=32) s[batch_sw.B].pipeline(o) - s[batch_sw.B].parallel(p) + # s[batch_sw.B].parallel(p) + s[batch_sw.B].unroll(p) return hcl.build(s, target=target) ############################################################################### @@ -145,10 +146,16 @@ def batch_sw(seqAs, seqBs, outAs, outBs): -f = top() -# code = top('sdaccel'); -# with open('sdaccel_code.cl', 'w') as f: -# f.write(code) + +# f = top() +code = top('sdaccel'); +with open('sdaccel_code.cl', 'w') as f: + f.write(code) + +# code3 = top('vhls'); +# with open('vhls_code.cl', 'w') as f: +# f.write(code3) + # code2 = top('merlinc') # with open('merlinc_code.cl', 'w') as f: @@ -181,6 +188,7 @@ def batch_sw(seqAs, seqBs, outAs, outBs): f(_seqA, _seqB, _consA, _consB) _consA_np = _consA.asnumpy() _consB_np = _consB.asnumpy() + for i in range(0, 256): if i < 124: assert _consA_np[0][i] == 1 diff --git a/samples/smith_waterman/vhls_code.cl b/samples/smith_waterman/vhls_code.cl new file mode 100644 index 000000000..8066bc2c2 --- /dev/null +++ b/samples/smith_waterman/vhls_code.cl @@ -0,0 +1,146 @@ +#include +#include +#include + +void default_function(ap_uint<3> seqAs[1024][128], ap_uint<3> seqBs[1024][128], ap_uint<3> outAs[1024][256], ap_uint<3> outBs[1024][256]) { + ap_int<32> B; + for (ap_int<32> t_outer = 0; t_outer < 32; ++t_outer) { + #pragma HLS pipeline + for (ap_int<32> t_inner = 0; t_inner < 32; ++t_inner) { + #pragma HLS unroll + ap_int<32> maxtrix_max; + maxtrix_max = 0; + ap_int<32> i_max; + i_max = 0; + ap_int<32> j_max; + j_max = 0; + ap_int<16> matrix[129][129]; + for (ap_int<32> x = 0; x < 129; ++x) { + for (ap_int<32> y = 0; y < 129; ++y) { + matrix[x][y] = (ap_int<16>)0; + } + } + ap_int<16> action[129][129]; + for (ap_int<32> x1 = 0; x1 < 129; ++x1) { + for (ap_int<32> y1 = 0; y1 < 129; ++y1) { + action[x1][y1] = (ap_int<16>)3; + } + } + ap_int<32> mutate3; + for (ap_int<32> i = 0; i < 129; ++i) { + for (ap_int<32> j = 0; j < 129; ++j) { + ap_int<32> trace_back[4]; + for (ap_int<32> x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((ap_int<32>)(((ap_int<33>)matrix[(i + -1)][(j + -1)]) + ((ap_int<33>)((seqAs[(t_inner + (t_outer * 32))][(i + -1)] == seqBs[(t_inner + (t_outer * 32))][(j + -1)]) ? 1 : -4)))); + trace_back[1] = (((ap_int<32>)matrix[(i + -1)][j]) + -4); + trace_back[2] = (((ap_int<32>)matrix[i][(j + -1)]) + -4); + trace_back[3] = 0; + ap_int<32> max; + max = trace_back[0]; + ap_int<32> act; + act = 0; + for (ap_int<32> i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[i][j] = ((ap_int<16>)max); + action[i][j] = ((ap_int<16>)act); + if (maxtrix_max < ((ap_int<32>)matrix[i][j])) { + maxtrix_max = ((ap_int<32>)matrix[i][j]); + i_max = i; + j_max = j; + } + } + } + } + ap_int<32> T; + ap_int<32> curr_i; + curr_i = i_max; + ap_int<32> curr_j; + curr_j = j_max; + ap_int<32> next_i; + next_i = 0; + ap_int<32> next_j; + next_j = 0; + ap_int<32> act1; + act1 = ((ap_int<32>)action[((curr_j / 129) + curr_i)][(curr_j % 129)]); + ap_int<32> next_i1; + next_i1 = 0; + ap_int<32> next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + ap_int<32> tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + ap_int<32> a; + a = 0; + ap_int<32> b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((ap_int<32>)seqAs[((((curr_i - ((curr_i + -1) % 128)) + ((t_inner + (t_outer * 32)) * 128)) + -1) / 128)][((curr_i + -1) % 128)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((ap_int<32>)seqBs[((((curr_j - ((curr_j + -1) % 128)) + ((t_inner + (t_outer * 32)) * 128)) + -1) / 128)][((curr_j + -1) % 128)]); + } + outAs[((tick / 256) + (t_inner + (t_outer * 32)))][(tick % 256)] = ((ap_uint<3>)a); + outBs[((tick / 256) + (t_inner + (t_outer * 32)))][(tick % 256)] = ((ap_uint<3>)b); + curr_i = next_i; + curr_j = next_j; + ap_int<32> act2; + act2 = ((ap_int<32>)action[((curr_j / 129) + curr_i)][(curr_j % 129)]); + ap_int<32> next_i2; + next_i2 = 0; + ap_int<32> next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/tvm/src/codegen/hlsc/vhls_module.cc b/tvm/src/codegen/hlsc/vhls_module.cc index c5f004a93..fd28234db 100644 --- a/tvm/src/codegen/hlsc/vhls_module.cc +++ b/tvm/src/codegen/hlsc/vhls_module.cc @@ -345,9 +345,9 @@ class VivadoHLSModuleNode final : public ModuleNode { GenHostCode(args, shmids, arg_types, func_, test_file_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; - system("g++ main.cpp -o out"); + // system("g++ main.cpp -o out"); LOG(CLEAN) << "Running C simulation ..."; - system("./out"); + // system("./out"); LOG(CLEAN) << "Finished C simulation"; // system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 06dbd7c2a..1ccfb0d32 100755 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -30,7 +30,7 @@ runtime::Module BuildSDAccelSim(Array funcs) { return runtime::CreateSDAccelModule(funcs[0], code); } -TVM_REGISTER_API("codegen.build_sdaccel_sw_emu") +TVM_REGISTER_API("codegen.build_sdaccel_csim") .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = BuildSDAccelSim(args[0]); }); diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 294e9cecb..2ada9cc0d 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -194,14 +194,20 @@ void FreeSharedMem(TVMArgs& args, const std::vector& shmids, std::vector& arg_sizes) { for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { + // if (args[i].type_code() == kArrayHandle) { + // TVMArray* arr = args[i]; + // int shmid = shmids[i]; + // void* mem = shmat(shmid, nullptr, 0); + // memcpy(arr->data, mem, arg_sizes[i]); + // shmdt(mem); + // shmctl(shmid, IPC_RMID, nullptr); + // } TVMArray* arr = args[i]; int shmid = shmids[i]; void* mem = shmat(shmid, nullptr, 0); memcpy(arr->data, mem, arg_sizes[i]); shmdt(mem); shmctl(shmid, IPC_RMID, nullptr); - } } } @@ -431,7 +437,7 @@ void GenHostCode(TVMArgs& args, std::string test_file) { int indent = 0; std::ofstream stream; - stream.open("main.cpp"); + stream.open("host.cpp"); indent += 2; stream << "#define CL_HPP_CL_1_2_DEFAULT_BUILD\n"; @@ -519,7 +525,17 @@ void GenHostCode(TVMArgs& args, stream << "\n"; for (int i = 0;i < args.size();i++ ) { - if (args[i].type_code() == kArrayHandle) { + // if (args[i].type_code() == kArrayHandle) { + // // read from the shared memory + // PrintIndent(stream, indent); + // stream << Type2Str(arg_types[i]) << "* "; + // stream << "arg_" << i << " = "; + // stream << "(" << Type2Str(arg_types[i]) << "*)"; + // stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + // TVMArray* arr = args[i]; + // // copy from shared mem + // PrintCopy(arr, stream, indent, i); + // } // read from the shared memory PrintIndent(stream, indent); stream << Type2Str(arg_types[i]) << "* "; @@ -529,7 +545,6 @@ void GenHostCode(TVMArgs& args, TVMArray* arr = args[i]; // copy from shared mem PrintCopy(arr, stream, indent, i); - } } @@ -607,7 +622,6 @@ void GenHostCode(TVMArgs& args, stream << "\n"; - // Creating Buffers inside Device // cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, vector_size_bytes); // cl::Buffer buffer_b(context, CL_MEM_WRITE_ONLY, vector_size_bytes); @@ -628,9 +642,6 @@ void GenHostCode(TVMArgs& args, } stream << "\n"; - - - // Running Kernel PrintIndent(stream, indent); stream << func->name << "("; @@ -657,8 +668,6 @@ void GenHostCode(TVMArgs& args, } stream << "\n"; - - // copy to shared mem for (int i = 0;i < args.size();i++) { if (args[i].type_code() == kArrayHandle) { @@ -711,10 +720,10 @@ class SDAccelModuleNode final : public ModuleNode { GenMakFile(); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated SDAccel OpenCL Code ..."; - system("make -f ./sdaccel.mk run_cpu_em"); + // system("make -f ./sdaccel.mk run_cpu_em"); LOG(CLEAN) << "Running SDAccel OpenCL Software Simulation ..."; LOG(CLEAN) << "Finished SDAccel OpenCL Software Simulation ..."; - system("make -f sdaccel.mk cleanall"); + // system("make -f sdaccel.mk cleanall"); FreeSharedMem(args, shmids, arg_sizes); }); } diff --git a/tvm/src/codegen/opencl/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel_module.h index 6a2a89cd3..313f08214 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.h +++ b/tvm/src/codegen/opencl/sdaccel_module.h @@ -1,8 +1,7 @@ -/*! - * Copyright (c) 2018 by Contributors - * \file build_vhls.cc - * \brief Build HLS C modules from source. - */ +/* + Yang.Bai + yb269@cornell.edu +*/ #ifndef SDACCEL_MODULE_H #define SDACCEL_MODULE_H From aa67e489cc2f112b64869d501d5b85be7a73c881 Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Wed, 11 Sep 2019 21:26:26 -0400 Subject: [PATCH 071/103] fix the __local and __global for intel opencl back-end --- samples/gemm/gemm_aocl.cl | 14 + samples/gemm/gemm_runtime.py | 15 +- samples/gemm/gemm_sdaccel.cl | 13 + samples/gemm/host.cpp | 58 +-- samples/kmeans/kmeans_aocl.cl | 49 +++ samples/kmeans/kmeans_main.py | 17 +- samples/kmeans/submit.sh | 3 + samples/smith_waterman/lenet_aocl.cl | 143 +++++++ samples/smith_waterman/smith_aocl.cl | 143 +++++++ samples/smith_waterman/smith_waterman_main.py | 7 + tvm/src/codegen/opencl/codegen_aocl.cc | 387 +++++++++++------- tvm/src/codegen/opencl/codegen_opencl.cc | 4 +- tvm/src/codegen/opencl/codegen_sdaccel.cc | 0 13 files changed, 659 insertions(+), 194 deletions(-) create mode 100644 samples/gemm/gemm_aocl.cl create mode 100644 samples/gemm/gemm_sdaccel.cl create mode 100644 samples/kmeans/kmeans_aocl.cl create mode 100644 samples/kmeans/submit.sh create mode 100644 samples/smith_waterman/lenet_aocl.cl create mode 100644 samples/smith_waterman/smith_aocl.cl mode change 100755 => 100644 tvm/src/codegen/opencl/codegen_aocl.cc mode change 100755 => 100644 tvm/src/codegen/opencl/codegen_sdaccel.cc diff --git a/samples/gemm/gemm_aocl.cl b/samples/gemm/gemm_aocl.cl new file mode 100644 index 000000000..f444a453c --- /dev/null +++ b/samples/gemm/gemm_aocl.cl @@ -0,0 +1,14 @@ +#include "ihc_apint.h" +__kernel void default_function(__global int* restrict placeholder0, __global int* restrict placeholder1, __global int* restrict matrix_3) { + for (int x = 0; x < 10; ++x) { + for (int y = 0; y < 10; ++y) { + global int sum; + sum = 0; + for (int k = 0; k < 10; ++k) { + sum = ((int)(((int65_t)(((long)placeholder0[(k + (x * 10))]) * ((long)placeholder1[(y + (k * 10))]))) + ((int65_t)sum))); + } + matrix_3[(y + (x * 10))] = sum; + } + } +} + diff --git a/samples/gemm/gemm_runtime.py b/samples/gemm/gemm_runtime.py index 1cee08a48..49947fa4c 100644 --- a/samples/gemm/gemm_runtime.py +++ b/samples/gemm/gemm_runtime.py @@ -36,8 +36,8 @@ # print (f4) # print (hcl_A, hcl_B, hcl_C) -matrix_1_size = (5, 3) -matrix_2_size = (3, 5) +matrix_1_size = (10, 10) +matrix_2_size = (10, 10) matrix_3_size = (matrix_1_size[0], matrix_2_size[1]) def gemm_compute(matrix_1, matrix_2): @@ -55,6 +55,14 @@ def gemm_compute(matrix_1, matrix_2): s = hcl.create_schedule([matrix_1, matrix_2], gemm_compute) f = hcl.build(s, target='sdaccel_csim') +code = hcl.build(s, target='aocl') +with open('gemm_aocl.cl', 'w') as fin: + fin.write(code) + +code2 = hcl.build(s, target='sdaccel') +with open('gemm_sdaccel.cl', 'w') as fin2: + fin2.write(code2) + matrix_1_np = np.random.randint(10, size=matrix_1_size) matrix_2_np = np.random.randint(10, size=matrix_2_size) @@ -64,7 +72,8 @@ def gemm_compute(matrix_1, matrix_2): hcl_matrix_2 = hcl.asarray(matrix_2_np) hcl_matrix_3 = hcl.asarray(matrix_3_np) -f(hcl_matrix_1, hcl_matrix_2, hcl_matrix_3) +# f(hcl_matrix_1, hcl_matrix_2, hcl_matrix_3) + diff --git a/samples/gemm/gemm_sdaccel.cl b/samples/gemm/gemm_sdaccel.cl new file mode 100644 index 000000000..f46a88426 --- /dev/null +++ b/samples/gemm/gemm_sdaccel.cl @@ -0,0 +1,13 @@ +__kernel void default_function(__global int* placeholder0, __global int* placeholder1, __global int* matrix_3) { + for (int x = 0; x < 10; ++x) { + for (int y = 0; y < 10; ++y) { + __local int sum; + sum = 0; + for (int k = 0; k < 10; ++k) { + sum = ((int)(((long)(((long)placeholder0[(k + (x * 10))]) * ((long)placeholder1[(y + (k * 10))]))) + ((long)sum))); + } + matrix_3[(y + (x * 10))] = sum; + } + } +} + diff --git a/samples/gemm/host.cpp b/samples/gemm/host.cpp index 64c3c4184..914b2aa26 100644 --- a/samples/gemm/host.cpp +++ b/samples/gemm/host.cpp @@ -29,30 +29,30 @@ int main(void) { #endif char* xclbinFilename = argv[1]; - std::vector source_0(5 * 3); - std::vector source_1(3 * 5); - std::vector source_2(5 * 5); - - size_t vector_size_bytes_0 = sizeof(int) * 5 * 3; - size_t vector_size_bytes_1 = sizeof(int) * 3 * 5; - size_t vector_size_bytes_2 = sizeof(int) * 5 * 5; - - int* arg_0 = (int*)shmat(7340033, nullptr, 0); - for (size_t i0 = 0; i0 < 5; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - source_0[i1 + i0*3] = arg_0[i1 + i0*3]; + std::vector source_0(6 * 2); + std::vector source_1(2 * 7); + std::vector source_2(6 * 7); + + size_t vector_size_bytes_0 = sizeof(int) * 6 * 2; + size_t vector_size_bytes_1 = sizeof(int) * 2 * 7; + size_t vector_size_bytes_2 = sizeof(int) * 6 * 7; + + int* arg_0 = (int*)shmat(4849666, nullptr, 0); + for (size_t i0 = 0; i0 < 6; i0++) { + for (size_t i1 = 0; i1 < 2; i1++) { + source_0[i1 + i0*2] = arg_0[i1 + i0*2]; } } - int* arg_1 = (int*)shmat(7340032, nullptr, 0); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 5; i1++) { - source_1[i1 + i0*5] = arg_1[i1 + i0*5]; + int* arg_1 = (int*)shmat(7667712, nullptr, 0); + for (size_t i0 = 0; i0 < 2; i0++) { + for (size_t i1 = 0; i1 < 7; i1++) { + source_1[i1 + i0*7] = arg_1[i1 + i0*7]; } } - int* arg_2 = (int*)shmat(4521986, nullptr, 0); - for (size_t i0 = 0; i0 < 5; i0++) { - for (size_t i1 = 0; i1 < 5; i1++) { - source_2[i1 + i0*5] = arg_2[i1 + i0*5]; + int* arg_2 = (int*)shmat(7667713, nullptr, 0); + for (size_t i0 = 0; i0 < 6; i0++) { + for (size_t i1 = 0; i1 < 7; i1++) { + source_2[i1 + i0*7] = arg_2[i1 + i0*7]; } } std::vector platforms; @@ -97,21 +97,21 @@ int main(void) { q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data()); q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data()); - for (size_t i0 = 0; i0 < 5; i0++) { - for (size_t i1 = 0; i1 < 3; i1++) { - arg_0[i1 + i0*3] = source_0[i1 + i0*3]; + for (size_t i0 = 0; i0 < 6; i0++) { + for (size_t i1 = 0; i1 < 2; i1++) { + arg_0[i1 + i0*2] = source_0[i1 + i0*2]; } } shmdt(arg_0); - for (size_t i0 = 0; i0 < 3; i0++) { - for (size_t i1 = 0; i1 < 5; i1++) { - arg_1[i1 + i0*5] = source_1[i1 + i0*5]; + for (size_t i0 = 0; i0 < 2; i0++) { + for (size_t i1 = 0; i1 < 7; i1++) { + arg_1[i1 + i0*7] = source_1[i1 + i0*7]; } } shmdt(arg_1); - for (size_t i0 = 0; i0 < 5; i0++) { - for (size_t i1 = 0; i1 < 5; i1++) { - arg_2[i1 + i0*5] = source_2[i1 + i0*5]; + for (size_t i0 = 0; i0 < 6; i0++) { + for (size_t i1 = 0; i1 < 7; i1++) { + arg_2[i1 + i0*7] = source_2[i1 + i0*7]; } } shmdt(arg_2); diff --git a/samples/kmeans/kmeans_aocl.cl b/samples/kmeans/kmeans_aocl.cl new file mode 100644 index 000000000..e64b116f4 --- /dev/null +++ b/samples/kmeans/kmeans_aocl.cl @@ -0,0 +1,49 @@ +#include "ihc_apint.h" +__kernel void default_function(__global int* restrict placeholder2, __global int* restrict placeholder3, __global int* restrict compute3) { + for (int x = 0; x < 32; ++x) { + compute3[x] = 0; + } + int main_loop; + for (int _1 = 0; _1 < 10; ++_1) { + #pragma ii 1 + for (int N = 0; N < 32; ++N) { + int local2; + local2 = 100000; + for (int i = 0; i < 6; ++i) { + int local3; + local3 = 0; + for (int i1 = 0; i1 < 3; ++i1) { + local3 = ((int)(((int64_t)local3) + ((int64_t)(((int64_t)((int33_t)(placeholder2[(i1 + (N * 3))] - placeholder3[(i1 + (i * 3))]))) * ((int64_t)((int33_t)(placeholder2[(i1 + (N * 3))] - placeholder3[(i1 + (i * 3))]))))))); + } + if (local3 < local2) { + local2 = local3; + compute3[N] = i; + } + } + } + int compute4[6]; + for (int x1 = 0; x1 < 6; ++x1) { + compute4[x1] = 0; + } + int compute5[18]; + for (int x2 = 0; x2 < 6; ++x2) { + for (int y = 0; y < 3; ++y) { + compute5[(y + (x2 * 3))] = 0; + } + } + int calc_sum; + #pragma unroll + for (int n = 0; n < 32; ++n) { + compute4[compute3[n]] = (compute4[compute3[n]] + 1); + for (int i2 = 0; i2 < 3; ++i2) { + compute5[(i2 + (compute3[n] * 3))] = ((int)(((int33_t)compute5[(i2 + (compute3[n] * 3))]) + ((int33_t)placeholder2[(i2 + (n * 3))]))); + } + } + int update_mean; + #pragma unroll + for (int k_d_fused = 0; k_d_fused < 18; ++k_d_fused) { + placeholder3[k_d_fused] = (compute5[k_d_fused] / compute4[(k_d_fused / 3)]); + } + } +} + diff --git a/samples/kmeans/kmeans_main.py b/samples/kmeans/kmeans_main.py index eb11f2fde..4779e0e5e 100644 --- a/samples/kmeans/kmeans_main.py +++ b/samples/kmeans/kmeans_main.py @@ -13,10 +13,15 @@ ############################################################################## # Define the number of the clustering means as K, the number of points as N, # the number of dimensions as dim, and the number of iterations as niter -K = 16 -N = 320 -dim = 32 -niter = 200 +# K = 16 +# N = 320 +# dim = 32 +# niter = 200 + +K = 6 +N = 32 +dim = 3 +niter = 10 hcl.init() #hcl.init(hcl.Float()) @@ -77,6 +82,10 @@ def calc_sum(n): # code3 = top('vhls') # with open('vhls_code.cl', 'w') as f: # f.write(code3) +code = top('aocl') +with open('kmeans_aocl.cl', 'w') as f: + f.write(code) +assert 1==2 points_np = np.random.randint(100, size=(N, dim)) labels_np = np.zeros(N) diff --git a/samples/kmeans/submit.sh b/samples/kmeans/submit.sh new file mode 100644 index 000000000..a4345a542 --- /dev/null +++ b/samples/kmeans/submit.sh @@ -0,0 +1,3 @@ +unset DISPLAY +aoc -board=a10gx -time time.out -time-passes -regtest_mode -v -fpc -fp-relaxed --opt-arg -nocaching -regtest_mode -report -I $INTELFPGAOCLSDKROOT/include/kernel_headers kmeans_aocl.cl + diff --git a/samples/smith_waterman/lenet_aocl.cl b/samples/smith_waterman/lenet_aocl.cl new file mode 100644 index 000000000..bf8608082 --- /dev/null +++ b/samples/smith_waterman/lenet_aocl.cl @@ -0,0 +1,143 @@ +#include "ihc_apint.h" +__kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_t* restrict seqBs, __global uint3_t* restrict outAs, __global uint3_t* restrict outBs) { + int B; + #pragma ii 1 + for (int t_outer = 0; t_outer < 32; ++t_outer) { + #pragma unroll + for (int t_inner = 0; t_inner < 32; ++t_inner) { + int maxtrix_max; + maxtrix_max = 0; + int i_max; + i_max = 0; + int j_max; + j_max = 0; + short matrix[16641]; + for (int x = 0; x < 129; ++x) { + for (int y = 0; y < 129; ++y) { + matrix[(y + (x * 129))] = (short)0; + } + } + short action[16641]; + for (int x1 = 0; x1 < 129; ++x1) { + for (int y1 = 0; y1 < 129; ++y1) { + action[(y1 + (x1 * 129))] = (short)3; + } + } + int mutate3; + for (int i = 0; i < 129; ++i) { + for (int j = 0; j < 129; ++j) { + int trace_back[4]; + for (int x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((int)(((int33_t)matrix[((j + (i * 129)) + -130)]) + ((int33_t)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[3] = 0; + int max; + max = trace_back[0]; + int act; + act = 0; + for (int i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[(j + (i * 129))] = ((short)max); + action[(j + (i * 129))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { + maxtrix_max = ((int)matrix[(j + (i * 129))]); + i_max = i; + j_max = j; + } + } + } + } + int T; + int curr_i; + curr_i = i_max; + int curr_j; + curr_j = j_max; + int next_i; + next_i = 0; + int next_j; + next_j = 0; + int act1; + act1 = ((int)action[(curr_j + (curr_i * 129))]); + int next_i1; + next_i1 = 0; + int next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + int tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + int a; + a = 0; + int b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)b); + curr_i = next_i; + curr_j = next_j; + int act2; + act2 = ((int)action[(curr_j + (curr_i * 129))]); + int next_i2; + next_i2 = 0; + int next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/samples/smith_waterman/smith_aocl.cl b/samples/smith_waterman/smith_aocl.cl new file mode 100644 index 000000000..bf8608082 --- /dev/null +++ b/samples/smith_waterman/smith_aocl.cl @@ -0,0 +1,143 @@ +#include "ihc_apint.h" +__kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_t* restrict seqBs, __global uint3_t* restrict outAs, __global uint3_t* restrict outBs) { + int B; + #pragma ii 1 + for (int t_outer = 0; t_outer < 32; ++t_outer) { + #pragma unroll + for (int t_inner = 0; t_inner < 32; ++t_inner) { + int maxtrix_max; + maxtrix_max = 0; + int i_max; + i_max = 0; + int j_max; + j_max = 0; + short matrix[16641]; + for (int x = 0; x < 129; ++x) { + for (int y = 0; y < 129; ++y) { + matrix[(y + (x * 129))] = (short)0; + } + } + short action[16641]; + for (int x1 = 0; x1 < 129; ++x1) { + for (int y1 = 0; y1 < 129; ++y1) { + action[(y1 + (x1 * 129))] = (short)3; + } + } + int mutate3; + for (int i = 0; i < 129; ++i) { + for (int j = 0; j < 129; ++j) { + int trace_back[4]; + for (int x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((int)(((int33_t)matrix[((j + (i * 129)) + -130)]) + ((int33_t)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[3] = 0; + int max; + max = trace_back[0]; + int act; + act = 0; + for (int i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[(j + (i * 129))] = ((short)max); + action[(j + (i * 129))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { + maxtrix_max = ((int)matrix[(j + (i * 129))]); + i_max = i; + j_max = j; + } + } + } + } + int T; + int curr_i; + curr_i = i_max; + int curr_j; + curr_j = j_max; + int next_i; + next_i = 0; + int next_j; + next_j = 0; + int act1; + act1 = ((int)action[(curr_j + (curr_i * 129))]); + int next_i1; + next_i1 = 0; + int next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + int tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + int a; + a = 0; + int b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + } + outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)b); + curr_i = next_i; + curr_j = next_j; + int act2; + act2 = ((int)action[(curr_j + (curr_i * 129))]); + int next_i2; + next_i2 = 0; + int next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/samples/smith_waterman/smith_waterman_main.py b/samples/smith_waterman/smith_waterman_main.py index d0b1b8d3c..7a93849fe 100644 --- a/samples/smith_waterman/smith_waterman_main.py +++ b/samples/smith_waterman/smith_waterman_main.py @@ -152,6 +152,13 @@ def batch_sw(seqAs, seqBs, outAs, outBs): with open('sdaccel_code.cl', 'w') as f: f.write(code) +code2 = top('aocl') +with open('smith_aocl.cl', 'w') as fin: + fin.write(code2) + +assert 1==2 + + # code3 = top('vhls'); # with open('vhls_code.cl', 'w') as f: # f.write(code3) diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc old mode 100755 new mode 100644 index 4ae3015d9..7af5dafc0 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -1,156 +1,231 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include -# include -# include -# include -# include -# include -# include "./codegen_aocl.h" -# include "../../runtime/thread_storage_scope.h" - -namespace TVM { -namespace codegen { - - -void CodeGenAOCL::AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - // Clear previous generated state - this->InitFuncState(f); - - // Skip the first underscore, so SSA variable starts from _1 - GetUniqueName("_"); - - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - - this->stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" << "\n"; - this->stream << "__kernel " << "void " << f->name << "("; - - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - this->stream << "__global "; - // this->stream << "global "; - PrintType(std::get<1>(arg), this->stream); - if (v.type().is_handle()) - this->stream << "*"; - this->stream << ' ' << std::get<0>(arg); - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - // this->stream << ' '<< ' ' << "return;\n"; - this->stream << "}\n\n"; -} - - - -void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - - if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; - } - else if ( t.is_int()) { - os << "ap_int<" << t.bits() << ">" << "intd_t"; - } - else { - if (t.is_float()) { - if (t.bits() == 16) { - enable_fp16_ = true; - os << "half"; return; - } - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - enable_fp64_ = true; - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_uint<" << t.bits() << ">" << "uintd_t"; return; - // os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "ap_int<" << t.bits() << ">" << "intd_t"; return; - // os << "int" << t.bits() << "_t"; return; - } - } - } - } - } -} - - -void CodeGenAOCL::VisitStmt_(const For* op) { - std::ostringstream os; - if (op->for_type == ForType::Unrolled) { - int unroll_factor = 0, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto factor = op->annotate_values[i].as(); - if (str->value == "factor" && factor != nullptr && factor->value > 1) { - unroll_factor = factor->value; - break; - } - } - i++; - } - os << "#pragma unroll"; - if (unroll_factor > 0) os << " " << unroll_factor << "\n"; - else os << "\n"; - } - else if (op->for_type == ForType::Pipelined) { - int II = 1, i = 0; - for (auto key : op->annotate_keys) { - if (auto str = key.as()) { - auto initiation_interval = op->annotate_values[i].as(); - if (str->value == "initiation_interval" && - initiation_interval != nullptr && - initiation_interval->value > 1) { - II = initiation_interval->value; - break; - } - } - i++; - } - os << "#pragma"; - os << " ii " << II << "\n"; - } - CodeGenAOCL::GenForStmt(op, os.str(), true); -} - - - - -} // namespace codegen -} // namespace TVM +/* + Yang.Bai + yb269@cornell.edu +*/ +# include +# include +# include +# include +# include +# include +# include "./codegen_aocl.h" +# include "../../runtime/thread_storage_scope.h" + +namespace TVM { +namespace codegen { + +void CodeGenAOCL::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + // Clear previous generated state + this->InitFuncState(f); + + // Skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + + // Register alloc buffer type + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + + this->stream << "#include \"ihc_apint.h\"" << "\n"; + this->stream << "__kernel " << "void " << f->name << "("; + + // Write arguments + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) this->stream << ", "; + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; + } + else { + auto arg = map_arg_type[vid]; + this->stream << "__global "; + // this->stream << "global "; + PrintType(std::get<1>(arg), this->stream); + if (v.type().is_handle()) + this->stream << "*"; + this->stream << ' ' << "restrict "; + this->stream << std::get<0>(arg); + } + } + stream << ") {\n"; + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + this->PrintIndent(); + // this->stream << ' '<< ' ' << "return;\n"; + this->stream << "}\n\n"; +} + +/* 1st edition +void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) + CHECK_EQ(t.lanes(), 1) + << "do not yet support vector types"; + if (t.is_handle()) { + os << "void*"; return; + } + + if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << "ap_uint<" << t.bits() << ">" <<" "<<"uint"< "<<"int"<"<<" "<< "uint"< "<<"int"<= 2 && lanes <=16)) + { + os<=2 && lanes <= 16)) + { + os<for_type == ForType::Unrolled) { + int unroll_factor = 0, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto factor = op->annotate_values[i].as(); + if (str->value == "factor" && factor != nullptr && factor->value > 1) { + unroll_factor = factor->value; + break; + } + } + i++; + } + os << "#pragma unroll"; + if (unroll_factor > 0) os << " " << unroll_factor << "\n"; + else os << "\n"; + } + else if (op->for_type == ForType::Pipelined) { + int II = 1, i = 0; + for (auto key : op->annotate_keys) { + if (auto str = key.as()) { + auto initiation_interval = op->annotate_values[i].as(); + if (str->value == "initiation_interval" && + initiation_interval != nullptr && + initiation_interval->value > 1) { + II = initiation_interval->value; + break; + } + } + i++; + } + os << "#pragma"; + os << " ii " << II << "\n"; + } + CodeGenAOCL::GenForStmt(op, os.str(), true); +} + + + + +} // namespace codegen +} // namespace TVM \ No newline at end of file diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index ab14fc0bf..1feecdc8f 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -120,9 +120,9 @@ void CodeGenOpenCL::PrintStorageSync(const Call* op) { void CodeGenOpenCL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global") { - os << "global "; + // os << "global "; } else if (scope == "shared") { - os << "local "; + // os << "local "; } } diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc old mode 100755 new mode 100644 From c46b9320289057a1804d2dfaf3761a39022b524d Mon Sep 17 00:00:00 2001 From: ybai62868 Date: Fri, 13 Sep 2019 15:53:25 -0400 Subject: [PATCH 072/103] Fix the arbitrary integer precision for aocl --- samples/digitrec/digitrec_main.py | 17 +- samples/digitrec/knn_aocl.cl | 35 +++++ samples/gemm/gemm_aocl.cl | 4 +- samples/lenet/lenet_aocl.cl | 138 +++++++++++++++++ samples/lenet/lenet_main.py | 41 +++-- samples/smith_waterman/sdaccel_code.cl | 48 +++--- samples/smith_waterman/smith_aocl.cl | 50 +++--- samples/smith_waterman/smith_vhls.cl | 146 ++++++++++++++++++ samples/smith_waterman/smith_waterman_main.py | 13 +- tvm/src/codegen/opencl/codegen_aocl.cc | 16 +- 10 files changed, 434 insertions(+), 74 deletions(-) create mode 100644 samples/digitrec/knn_aocl.cl create mode 100644 samples/lenet/lenet_aocl.cl create mode 100644 samples/smith_waterman/smith_vhls.cl diff --git a/samples/digitrec/digitrec_main.py b/samples/digitrec/digitrec_main.py index d8f7bccd4..373200fc1 100644 --- a/samples/digitrec/digitrec_main.py +++ b/samples/digitrec/digitrec_main.py @@ -65,9 +65,13 @@ # Declare some constants and data types. For images, we need unsigned 49-bit # integers, while for knn matrices, we need unsigned 6-bit integers. -N = 7 * 7 +# N = 7 * 7 +N = 2 * 2 max_bit = int(math.ceil(math.log(N, 2))) -data_size = (10, 1800) +# data_size = (10, 1800) +data_size = (10, 20) + + # HeteroCL provides users with a set of bit-accurate data types, which include # unsigned/signed arbitrary-bit integers and unsigned/signed fixed-points. @@ -159,7 +163,8 @@ def update_knn(dist, knn_mat, i, j): s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) # Parallel outer loop and pipeline inner loop - s[knn_update].parallel(knn_update.axis[1]) + # s[knn_update].parallel(knn_update.axis[1]) + s[knn_update].unroll(knn_update.axis[1]) s[knn_update].pipeline(knn_update.axis[0]) # At the end, we build the whole offloaded function. @@ -334,6 +339,12 @@ def update_knn(dist, knn_mat, i, j): # This is the main function. Namely, the complete algorithm we want to run. We # get the offloaded function with the provided data types offload = top() +code = top('aocl') +with open('knn_aocl.cl', 'w') as f: + f.write(code) + + +assert 1==2 ############################################################################### # Voting algorithm diff --git a/samples/digitrec/knn_aocl.cl b/samples/digitrec/knn_aocl.cl new file mode 100644 index 000000000..760fa87ae --- /dev/null +++ b/samples/digitrec/knn_aocl.cl @@ -0,0 +1,35 @@ + +#include "ihc_apint.h" +__kernel void default_function(__global uint* restrict test_image, __global uint* restrict train_images, __global uint* restrict knn_mat) { + for (int x = 0; x < 10; ++x) { + for (int y = 0; y < 3; ++y) { + knn_mat[(y + (x * 3))] = (uint2_t)2; + } + } + uint4_t knn_update; + #pragma unroll + for (int y1 = 0; y1 < 20; ++y1) { + for (int x1 = 0; x1 < 10; ++x1) { + uint2_t dist; + uint4_t diff; + diff = ((uint4_t)(train_images[(y1 + (x1 * 20))]) ^ (uint4_t)(test_image)); + uint2_t out; + out = (uint2_t)0; + for (int i = 0; i < 4; ++i) { + out = ((uint2_t)(((uint5_t)out) + ((uint5_t)((diff & (1L << i)) >> i)))); + } + dist = out; + uint4_t max_id; + max_id = (uint4_t)0; + for (int i1 = 0; i1 < 3; ++i1) { + if (knn_mat[(((int)max_id) + (x1 * 3))] < knn_mat[(i1 + (x1 * 3))]) { + max_id = ((uint4_t)i1); + } + } + if (dist < knn_mat[(((int)max_id) + (x1 * 3))]) { + knn_mat[(((int)max_id) + (x1 * 3))] = dist; + } + } + } +} + diff --git a/samples/gemm/gemm_aocl.cl b/samples/gemm/gemm_aocl.cl index f444a453c..198757823 100644 --- a/samples/gemm/gemm_aocl.cl +++ b/samples/gemm/gemm_aocl.cl @@ -2,10 +2,10 @@ __kernel void default_function(__global int* restrict placeholder0, __global int* restrict placeholder1, __global int* restrict matrix_3) { for (int x = 0; x < 10; ++x) { for (int y = 0; y < 10; ++y) { - global int sum; + int sum; sum = 0; for (int k = 0; k < 10; ++k) { - sum = ((int)(((int65_t)(((long)placeholder0[(k + (x * 10))]) * ((long)placeholder1[(y + (k * 10))]))) + ((int65_t)sum))); + sum = ((int)(((int64_t)(((long)placeholder0[(k + (x * 10))]) * ((long)placeholder1[(y + (k * 10))]))) + ((int64_t)sum))); } matrix_3[(y + (x * 10))] = sum; } diff --git a/samples/lenet/lenet_aocl.cl b/samples/lenet/lenet_aocl.cl new file mode 100644 index 000000000..9b2a200f8 --- /dev/null +++ b/samples/lenet/lenet_aocl.cl @@ -0,0 +1,138 @@ +#include "ihc_apint.h" +__kernel void default_function(__global float* restrict input_image, __global float* restrict weight_conv1, __global float* restrict weight_conv2, __global float* restrict weight_fc1, __global float* restrict weight_fc2, __global float* restrict lenet) { + float conv2d; + for (int nn = 0; nn < 1; ++nn) { + for (int yy = 0; yy < -1; ++yy) { + for (int xx = 0; xx < -1; ++xx) { + float reducer0; + reducer0 = 0.000000e+00f; + for (int ra1 = 0; ra1 < 5; ++ra1) { + for (int ra2 = 0; ra2 < 5; ++ra2) { + reducer0 = ((input_image[(((xx + ra2) + ((yy + ra1) * 3)) + (nn * 9))] * weight_conv1[(ra2 + (ra1 * 5))]) + reducer0); + } + } + conv2d = reducer0; + } + } + } + float tanh1; + for (int args = 0; args < 1; ++args) { + for (int args1 = 0; args1 < -1; ++args1) { + for (int args2 = 0; args2 < -1; ++args2) { + tanh1 = ((float)tanh(((float)conv2d))); + } + } + } + float max_pool; + for (int i = 0; i < 1; ++i) { + for (int h = 0; h < -1; ++h) { + for (int w = 0; w < -1; ++w) { + float reducer1; + reducer1 = -1.000000e+00f; + for (int ra3 = 0; ra3 < 2; ++ra3) { + for (int ra4 = 0; ra4 < 2; ++ra4) { + reducer1 = max(tanh1, reducer1); + } + } + max_pool = reducer1; + } + } + } + float conv2d1[250]; + for (int nn1 = 0; nn1 < 1; ++nn1) { + for (int ff = 0; ff < 10; ++ff) { + for (int yy1 = 0; yy1 < -5; ++yy1) { + for (int xx1 = 0; xx1 < -5; ++xx1) { + float reducer2; + reducer2 = 0.000000e+00f; + for (int ra6 = 0; ra6 < 5; ++ra6) { + for (int ra7 = 0; ra7 < 5; ++ra7) { + reducer2 = ((max_pool * weight_conv2[((ra7 + (ra6 * 5)) + (ff * 25))]) + reducer2); + } + } + conv2d1[(((xx1 - (yy1 * 5)) + (ff * 25)) + (nn1 * 250))] = reducer2; + } + } + } + } + float tanh2[250]; + for (int args3 = 0; args3 < 1; ++args3) { + for (int args0 = 0; args0 < 10; ++args0) { + for (int args11 = 0; args11 < -5; ++args11) { + for (int args21 = 0; args21 < -5; ++args21) { + tanh2[(((args21 - (args11 * 5)) + (args0 * 25)) + (args3 * 250))] = ((float)tanh(((float)conv2d1[(((args21 - (args11 * 5)) + (args0 * 25)) + (args3 * 250))]))); + } + } + } + } + float max_pool1[90]; + for (int i1 = 0; i1 < 1; ++i1) { + for (int c = 0; c < 10; ++c) { + for (int h1 = 0; h1 < -3; ++h1) { + for (int w1 = 0; w1 < -3; ++w1) { + float reducer3; + reducer3 = -1.000000e+00f; + for (int ra8 = 0; ra8 < 2; ++ra8) { + for (int ra9 = 0; ra9 < 2; ++ra9) { + reducer3 = max(tanh2[(((((w1 * 2) - (((h1 * 2) + ra8) * 5)) + ra9) + (c * 25)) + (i1 * 250))], reducer3); + } + } + max_pool1[(((w1 - (h1 * 3)) + (c * 9)) + (i1 * 90))] = reducer3; + } + } + } + } + float compute0[90]; + for (int i2 = 0; i2 < 1; ++i2) { + for (int j = 0; j < 90; ++j) { + compute0[(j + (i2 * 90))] = max_pool1[((((j % -3) - (((j / -3) % -3) * 3)) + ((((j / -3) / -3) % 10) * 9)) + (i2 * 90))]; + } + } + float dense[25]; + for (int i3 = 0; i3 < 1; ++i3) { + for (int j1 = 0; j1 < 25; ++j1) { + float reducer4; + reducer4 = 0.000000e+00f; + for (int ra10 = 0; ra10 < 90; ++ra10) { + reducer4 = ((compute0[(ra10 + (i3 * 90))] * weight_fc1[(ra10 + (j1 * 40))]) + reducer4); + } + dense[(j1 + (i3 * 25))] = reducer4; + } + } + float tanh3[25]; + for (int args4 = 0; args4 < 1; ++args4) { + for (int args01 = 0; args01 < 25; ++args01) { + tanh3[(args01 + (args4 * 25))] = ((float)tanh(((float)dense[(args01 + (args4 * 25))]))); + } + } + float dense1[10]; + for (int i4 = 0; i4 < 1; ++i4) { + for (int j2 = 0; j2 < 10; ++j2) { + float reducer5; + reducer5 = 0.000000e+00f; + for (int ra11 = 0; ra11 < 25; ++ra11) { + reducer5 = ((tanh3[(ra11 + (i4 * 25))] * weight_fc2[(ra11 + (j2 * 25))]) + reducer5); + } + dense1[(j2 + (i4 * 10))] = reducer5; + } + } + float compute1; + int max1; + max1 = 0; + for (int ra12 = 0; ra12 < 10; ++ra12) { + max1 = ((int)max(dense1[ra12], ((float)max1))); + } + compute1 = ((float)max1); + float compute2; + int sum; + sum = 0; + for (int ra13 = 0; ra13 < 10; ++ra13) { + sum = ((int)(exp(((float)(dense1[ra13] - compute1))) + ((float)sum))); + } + compute2 = ((float)sum); + float update0; + for (int j3 = 0; j3 < 10; ++j3) { + lenet[j3] = ((float)(exp(((float)(dense1[j3] - compute1))) / ((float)compute2))); + } +} + diff --git a/samples/lenet/lenet_main.py b/samples/lenet/lenet_main.py index 418c64cf0..b3f29c42e 100644 --- a/samples/lenet/lenet_main.py +++ b/samples/lenet/lenet_main.py @@ -67,7 +67,8 @@ def build_lenet(input_image, weight_conv1, weight_conv2, qtype1 = hcl.Fixed(16, 14) qtype2 = hcl.Fixed(16, 14) correct_sum = 0 -batch_size = 1000 +# batch_size = 1000 +batch_size = 1 mnist = mx.test_utils.get_mnist() ############################################################################### @@ -76,23 +77,37 @@ def build_lenet(input_image, weight_conv1, weight_conv2, # the internal tensors, we use `hcl.quantize` API. def build_lenet_inf(batch_size=batch_size, target=None): # set up input/output placeholders - input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") - weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) - weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1) - weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1) - weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1) + #input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") + input_image = hcl.placeholder((batch_size, 1, 3, 3), "input_image") + # weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) + weight_conv1 = hcl.placeholder((1, 1, 5, 5), "weight_conv1") + # weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1") + weight_conv2 = hcl.placeholder((10, 1, 5, 5), "weight_conv2") + # weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2") + # weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1) + weight_fc1 = hcl.placeholder((25, 40), "weight_fc1") + # weight_fc1 = hcl.placeholder((500, 800), "weight_fc1") + # weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1) + weight_fc2 = hcl.placeholder((10, 25), "weight_fc2") + # weight_fc2 = hcl.placeholder((10, 500), "weight_fc2") lenet = hcl.placeholder((batch_size, 10), "lenet") # create a quantization scheme - scheme = hcl.create_scheme( - [input_image, weight_conv1, weight_conv2, - weight_fc1, weight_fc2, lenet], build_lenet) + # scheme = hcl.create_scheme( + # [input_image, weight_conv1, weight_conv2, + # weight_fc1, weight_fc2, lenet], build_lenet) # quantize the three activation layers - scheme.quantize( - [build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3], qtype2) - s = hcl.create_schedule_from_scheme(scheme) + #scheme.quantize( + # [build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3], qtype2) + #s = hcl.create_schedule_from_scheme(scheme) + s = hcl.create_schedule([input_image, weight_conv1, weight_conv2, + weight_fc1, weight_fc2, lenet], build_lenet) return hcl.build(s, target=target) -f = build_lenet_inf() +# f = build_lenet_inf() +code = build_lenet_inf(batch_size, 'aocl') +with open('lenet_aocl.cl', 'w') as f: + f.write(code) +assert 1==2 ############################################################################### diff --git a/samples/smith_waterman/sdaccel_code.cl b/samples/smith_waterman/sdaccel_code.cl index 0fb6ea32d..a0f5fdb01 100644 --- a/samples/smith_waterman/sdaccel_code.cl +++ b/samples/smith_waterman/sdaccel_code.cl @@ -1,7 +1,7 @@ __kernel void default_function(__global unsigned char* seqAs, __global unsigned char* seqBs, __global unsigned char* outAs, __global unsigned char* outBs) { __local int B; __attribute__((xcl_pipeline_loop(1))) - for (int t_outer = 0; t_outer < 32; ++t_outer) { + for (int t_outer = 0; t_outer < 2; ++t_outer) { for (int t_inner = 0; t_inner < 32; ++t_inner) { __local int maxtrix_max; @@ -10,29 +10,29 @@ __kernel void default_function(__global unsigned char* seqAs, __global unsigned i_max = 0; __local int j_max; j_max = 0; - __local short matrix[16641]; - for (int x = 0; x < 129; ++x) { - for (int y = 0; y < 129; ++y) { - matrix[(y + (x * 129))] = (short)0; + __local short matrix[841]; + for (int x = 0; x < 29; ++x) { + for (int y = 0; y < 29; ++y) { + matrix[(y + (x * 29))] = (short)0; } } - __local short action[16641]; - for (int x1 = 0; x1 < 129; ++x1) { - for (int y1 = 0; y1 < 129; ++y1) { - action[(y1 + (x1 * 129))] = (short)3; + __local short action[841]; + for (int x1 = 0; x1 < 29; ++x1) { + for (int y1 = 0; y1 < 29; ++y1) { + action[(y1 + (x1 * 29))] = (short)3; } } __local int mutate1; - for (int i = 0; i < 129; ++i) { - for (int j = 0; j < 129; ++j) { + for (int i = 0; i < 29; ++i) { + for (int j = 0; j < 29; ++j) { __local int trace_back[4]; for (int x2 = 0; x2 < 4; ++x2) { trace_back[x2] = 0; } if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((long)matrix[((j + (i * 129)) + -130)]) + ((long)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[0] = ((int)(((long)matrix[((j + (i * 29)) + -30)]) + ((long)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 28)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 28)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 29)) + -29)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 29)) + -1)]) + -4); trace_back[3] = 0; __local int max; max = trace_back[0]; @@ -44,10 +44,10 @@ __kernel void default_function(__global unsigned char* seqAs, __global unsigned act = i1; } } - matrix[(j + (i * 129))] = ((short)max); - action[(j + (i * 129))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { - maxtrix_max = ((int)matrix[(j + (i * 129))]); + matrix[(j + (i * 29))] = ((short)max); + action[(j + (i * 29))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 29))])) { + maxtrix_max = ((int)matrix[(j + (i * 29))]); i_max = i; j_max = j; } @@ -64,7 +64,7 @@ __kernel void default_function(__global unsigned char* seqAs, __global unsigned __local int next_j; next_j = 0; __local int act1; - act1 = ((int)action[(curr_j + (curr_i * 129))]); + act1 = ((int)action[(curr_j + (curr_i * 29))]); __local int next_i1; next_i1 = 0; __local int next_j1; @@ -98,19 +98,19 @@ __kernel void default_function(__global unsigned char* seqAs, __global unsigned if (next_i == curr_i) { a = 0; } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 28)) + -1)]); } if (next_j == curr_j) { b = 0; } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 28)) + -1)]); } - outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((unsigned char)b); + outAs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((unsigned char)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((unsigned char)b); curr_i = next_i; curr_j = next_j; __local int act2; - act2 = ((int)action[(curr_j + (curr_i * 129))]); + act2 = ((int)action[(curr_j + (curr_i * 29))]); __local int next_i2; next_i2 = 0; __local int next_j2; diff --git a/samples/smith_waterman/smith_aocl.cl b/samples/smith_waterman/smith_aocl.cl index bf8608082..80a4ba601 100644 --- a/samples/smith_waterman/smith_aocl.cl +++ b/samples/smith_waterman/smith_aocl.cl @@ -1,8 +1,8 @@ #include "ihc_apint.h" -__kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_t* restrict seqBs, __global uint3_t* restrict outAs, __global uint3_t* restrict outBs) { +__kernel void default_function(__global uint* restrict seqAs, __global uint* restrict seqBs, __global uint* restrict outAs, __global uint* restrict outBs) { int B; #pragma ii 1 - for (int t_outer = 0; t_outer < 32; ++t_outer) { + for (int t_outer = 0; t_outer < 2; ++t_outer) { #pragma unroll for (int t_inner = 0; t_inner < 32; ++t_inner) { int maxtrix_max; @@ -11,29 +11,29 @@ __kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_ i_max = 0; int j_max; j_max = 0; - short matrix[16641]; - for (int x = 0; x < 129; ++x) { - for (int y = 0; y < 129; ++y) { - matrix[(y + (x * 129))] = (short)0; + short matrix[841]; + for (int x = 0; x < 29; ++x) { + for (int y = 0; y < 29; ++y) { + matrix[(y + (x * 29))] = (short)0; } } - short action[16641]; - for (int x1 = 0; x1 < 129; ++x1) { - for (int y1 = 0; y1 < 129; ++y1) { - action[(y1 + (x1 * 129))] = (short)3; + short action[841]; + for (int x1 = 0; x1 < 29; ++x1) { + for (int y1 = 0; y1 < 29; ++y1) { + action[(y1 + (x1 * 29))] = (short)3; } } int mutate3; - for (int i = 0; i < 129; ++i) { - for (int j = 0; j < 129; ++j) { + for (int i = 0; i < 29; ++i) { + for (int j = 0; j < 29; ++j) { int trace_back[4]; for (int x2 = 0; x2 < 4; ++x2) { trace_back[x2] = 0; } if ((i != 0) && (j != 0)) { - trace_back[0] = ((int)(((int33_t)matrix[((j + (i * 129)) + -130)]) + ((int33_t)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 128)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 128)) + -1)]) ? 1 : -4)))); - trace_back[1] = (((int)matrix[((j + (i * 129)) + -129)]) + -4); - trace_back[2] = (((int)matrix[((j + (i * 129)) + -1)]) + -4); + trace_back[0] = ((int)(((int33_t)matrix[((j + (i * 29)) + -30)]) + ((int33_t)(int)((seqAs[((i + ((t_inner + (t_outer * 32)) * 28)) + -1)] == seqBs[((j + ((t_inner + (t_outer * 32)) * 28)) + -1)]) ? 1 : -4)))); + trace_back[1] = (((int)matrix[((j + (i * 29)) + -29)]) + -4); + trace_back[2] = (((int)matrix[((j + (i * 29)) + -1)]) + -4); trace_back[3] = 0; int max; max = trace_back[0]; @@ -45,10 +45,10 @@ __kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_ act = i1; } } - matrix[(j + (i * 129))] = ((short)max); - action[(j + (i * 129))] = ((short)act); - if (maxtrix_max < ((int)matrix[(j + (i * 129))])) { - maxtrix_max = ((int)matrix[(j + (i * 129))]); + matrix[(j + (i * 29))] = ((short)max); + action[(j + (i * 29))] = ((short)act); + if (maxtrix_max < ((int)matrix[(j + (i * 29))])) { + maxtrix_max = ((int)matrix[(j + (i * 29))]); i_max = i; j_max = j; } @@ -65,7 +65,7 @@ __kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_ int next_j; next_j = 0; int act1; - act1 = ((int)action[(curr_j + (curr_i * 129))]); + act1 = ((int)action[(curr_j + (curr_i * 29))]); int next_i1; next_i1 = 0; int next_j1; @@ -99,19 +99,19 @@ __kernel void default_function(__global uint3_t* restrict seqAs, __global uint3_ if (next_i == curr_i) { a = 0; } else { - a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + a = ((int)seqAs[((curr_i + ((t_inner + (t_outer * 32)) * 28)) + -1)]); } if (next_j == curr_j) { b = 0; } else { - b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 128)) + -1)]); + b = ((int)seqBs[((curr_j + ((t_inner + (t_outer * 32)) * 28)) + -1)]); } - outAs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)a); - outBs[(tick + ((t_inner + (t_outer * 32)) * 256))] = ((uint3_t)b); + outAs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((uint3_t)a); + outBs[(tick + ((t_inner + (t_outer * 32)) * 56))] = ((uint3_t)b); curr_i = next_i; curr_j = next_j; int act2; - act2 = ((int)action[(curr_j + (curr_i * 129))]); + act2 = ((int)action[(curr_j + (curr_i * 29))]); int next_i2; next_i2 = 0; int next_j2; diff --git a/samples/smith_waterman/smith_vhls.cl b/samples/smith_waterman/smith_vhls.cl new file mode 100644 index 000000000..4fd36c8aa --- /dev/null +++ b/samples/smith_waterman/smith_vhls.cl @@ -0,0 +1,146 @@ +#include +#include +#include + +void default_function(ap_uint<3> seqAs[64][28], ap_uint<3> seqBs[64][28], ap_uint<3> outAs[64][56], ap_uint<3> outBs[64][56]) { + ap_int<32> B; + for (ap_int<32> t_outer = 0; t_outer < 2; ++t_outer) { + #pragma HLS pipeline + for (ap_int<32> t_inner = 0; t_inner < 32; ++t_inner) { + #pragma HLS unroll + ap_int<32> maxtrix_max; + maxtrix_max = 0; + ap_int<32> i_max; + i_max = 0; + ap_int<32> j_max; + j_max = 0; + ap_int<16> matrix[29][29]; + for (ap_int<32> x = 0; x < 29; ++x) { + for (ap_int<32> y = 0; y < 29; ++y) { + matrix[x][y] = (ap_int<16>)0; + } + } + ap_int<16> action[29][29]; + for (ap_int<32> x1 = 0; x1 < 29; ++x1) { + for (ap_int<32> y1 = 0; y1 < 29; ++y1) { + action[x1][y1] = (ap_int<16>)3; + } + } + ap_int<32> mutate5; + for (ap_int<32> i = 0; i < 29; ++i) { + for (ap_int<32> j = 0; j < 29; ++j) { + ap_int<32> trace_back[4]; + for (ap_int<32> x2 = 0; x2 < 4; ++x2) { + trace_back[x2] = 0; + } + if ((i != 0) && (j != 0)) { + trace_back[0] = ((ap_int<32>)(((ap_int<33>)matrix[(i + -1)][(j + -1)]) + ((ap_int<33>)((seqAs[(t_inner + (t_outer * 32))][(i + -1)] == seqBs[(t_inner + (t_outer * 32))][(j + -1)]) ? 1 : -4)))); + trace_back[1] = (((ap_int<32>)matrix[(i + -1)][j]) + -4); + trace_back[2] = (((ap_int<32>)matrix[i][(j + -1)]) + -4); + trace_back[3] = 0; + ap_int<32> max; + max = trace_back[0]; + ap_int<32> act; + act = 0; + for (ap_int<32> i1 = 0; i1 < 4; ++i1) { + if (max < trace_back[i1]) { + max = trace_back[i1]; + act = i1; + } + } + matrix[i][j] = ((ap_int<16>)max); + action[i][j] = ((ap_int<16>)act); + if (maxtrix_max < ((ap_int<32>)matrix[i][j])) { + maxtrix_max = ((ap_int<32>)matrix[i][j]); + i_max = i; + j_max = j; + } + } + } + } + ap_int<32> T; + ap_int<32> curr_i; + curr_i = i_max; + ap_int<32> curr_j; + curr_j = j_max; + ap_int<32> next_i; + next_i = 0; + ap_int<32> next_j; + next_j = 0; + ap_int<32> act1; + act1 = ((ap_int<32>)action[((curr_j / 29) + curr_i)][(curr_j % 29)]); + ap_int<32> next_i1; + next_i1 = 0; + ap_int<32> next_j1; + next_j1 = 0; + if (act1 == 0) { + next_i1 = (curr_i + -1); + next_j1 = (curr_j + -1); + } else { + if (act1 == 1) { + next_i1 = (curr_i + -1); + next_j1 = curr_j; + } else { + if (act1 == 2) { + next_i1 = curr_i; + next_j1 = (curr_j + -1); + } else { + next_i1 = curr_i; + next_j1 = curr_j; + } + } + } + next_i = next_i1; + next_j = next_j1; + ap_int<32> tick; + tick = 0; + while (((curr_i != next_i) || (curr_j != next_j))) { + ap_int<32> a; + a = 0; + ap_int<32> b; + b = 0; + if (next_i == curr_i) { + a = 0; + } else { + a = ((ap_int<32>)seqAs[((((curr_i - ((curr_i + -1) % 28)) + ((t_inner + (t_outer * 32)) * 28)) + -1) / 28)][((curr_i + -1) % 28)]); + } + if (next_j == curr_j) { + b = 0; + } else { + b = ((ap_int<32>)seqBs[((((curr_j - ((curr_j + -1) % 28)) + ((t_inner + (t_outer * 32)) * 28)) + -1) / 28)][((curr_j + -1) % 28)]); + } + outAs[((tick / 56) + (t_inner + (t_outer * 32)))][(tick % 56)] = ((ap_uint<3>)a); + outBs[((tick / 56) + (t_inner + (t_outer * 32)))][(tick % 56)] = ((ap_uint<3>)b); + curr_i = next_i; + curr_j = next_j; + ap_int<32> act2; + act2 = ((ap_int<32>)action[((curr_j / 29) + curr_i)][(curr_j % 29)]); + ap_int<32> next_i2; + next_i2 = 0; + ap_int<32> next_j2; + next_j2 = 0; + if (act2 == 0) { + next_i2 = (curr_i + -1); + next_j2 = (curr_j + -1); + } else { + if (act2 == 1) { + next_i2 = (curr_i + -1); + next_j2 = curr_j; + } else { + if (act2 == 2) { + next_i2 = curr_i; + next_j2 = (curr_j + -1); + } else { + next_i2 = curr_i; + next_j2 = curr_j; + } + } + } + next_i = next_i2; + next_j = next_j2; + tick = (tick + 1); + } + } + } +} + diff --git a/samples/smith_waterman/smith_waterman_main.py b/samples/smith_waterman/smith_waterman_main.py index 7a93849fe..6a104af93 100644 --- a/samples/smith_waterman/smith_waterman_main.py +++ b/samples/smith_waterman/smith_waterman_main.py @@ -10,9 +10,12 @@ import numpy as np import time -lenA = 128 -lenB = 128 -num = 1024 +#lenA = 128 +lenA = 28 +#lenB = 128 +lenB = 28 +#num = 1024 +num = 64 penalty = -4 hcl.init() @@ -156,6 +159,10 @@ def batch_sw(seqAs, seqBs, outAs, outBs): with open('smith_aocl.cl', 'w') as fin: fin.write(code2) +code3 = top('vhls') +with open('smith_vhls.cl', 'w') as fin: + fin.write(code3) + assert 1==2 diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 7af5dafc0..c4975d673 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -131,14 +131,14 @@ void CodeGenAOCL::PrintType(Type t, std::ostream &os) { case 16: os<<"half"; - enable_fp16_ = true; + // enable_fp16_ = true; break; case 32: os<<"float"; break; case 64: os<< "double"; - enable_fp64_ = true; + // enable_fp64_ = true; break; default: fail = true; @@ -173,11 +173,19 @@ void CodeGenAOCL::PrintType(Type t, std::ostream &os) { if(t.is_uint()) { - os<< "uint"< 64) { + os << "uint" << "64" << "_t"; return; + } else { + os<< "uint"< 64) { + os << "int" << "64" << "_t"; return; + } else { + os << "int" << t.bits() << "_t"; return; + } } } } From 879da3c1e341c575197f431c3cdb120124baae98 Mon Sep 17 00:00:00 2001 From: Shawn Xiang Date: Sun, 15 Sep 2019 13:06:03 -0400 Subject: [PATCH 073/103] [add] ir visitor & functor for codegen --- samples/stream/stream.py | 4 ++-- tvm/include/tvm/ir_functor_ext.h | 4 ++++ tvm/include/tvm/ir_visitor.h | 2 ++ tvm/src/codegen/codegen_c.cc | 8 ++++++++ tvm/src/codegen/codegen_c.h | 2 ++ tvm/src/codegen/hlsc/codegen_hlsc.cc | 1 + tvm/src/codegen/hlsc/codegen_vhls.cc | 25 +++++++++++++++++++++++++ tvm/src/codegen/hlsc/codegen_vhls.h | 2 ++ tvm/src/lang/ir.cc | 2 ++ tvm/src/pass/ir_visitor.cc | 9 +++++++++ 10 files changed, 57 insertions(+), 2 deletions(-) diff --git a/samples/stream/stream.py b/samples/stream/stream.py index aa70ebf3c..ea2255d1f 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -48,9 +48,9 @@ def ret_mul(c, d, e): s[add_mul.ret_mul]) # return buffer for inter-device move -d0 = s[d].stream_to(hcl.FPGA('intel')) +# d0 = s[d].stream_to(hcl.FPGA('intel')) # print(add_mul.ret_mul._buf, c._buf) print(hcl.lower(s)) print(hcl.build(s, target="vhls")) - + diff --git a/tvm/include/tvm/ir_functor_ext.h b/tvm/include/tvm/ir_functor_ext.h index c4f18ba7e..39ce6d2b8 100644 --- a/tvm/include/tvm/ir_functor_ext.h +++ b/tvm/include/tvm/ir_functor_ext.h @@ -148,6 +148,7 @@ class ExprFunctor { virtual R VisitExpr_(const SetSlice* op, Args... args) EXPR_FUNCTOR_DEFAULT; virtual R VisitExpr_(const Quantize* op, Args... args) EXPR_FUNCTOR_DEFAULT; virtual R VisitExpr_(const KernelExpr* op, Args... args) EXPR_FUNCTOR_DEFAULT; + virtual R VisitExpr_(const StreamExpr* op, Args... args) EXPR_FUNCTOR_DEFAULT; virtual R VisitExprDefault_(const Node* op, Args ...) { LOG(FATAL) << "Do not have a default for " << op->type_key(); return R(); @@ -193,6 +194,7 @@ class ExprFunctor { IR_EXPR_FUNCTOR_DISPATCH(SetSlice); IR_EXPR_FUNCTOR_DISPATCH(Quantize); IR_EXPR_FUNCTOR_DISPATCH(KernelExpr); + IR_EXPR_FUNCTOR_DISPATCH(StreamExpr); return vtable; } }; @@ -244,6 +246,7 @@ class StmtFunctor { virtual R VisitStmt_(const Evaluate* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const KernelDef* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const KernelStmt* op, Args... args) STMT_FUNCTOR_DEFAULT; + virtual R VisitStmt_(const StreamStmt* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const Return* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const Break* op, Args... args) STMT_FUNCTOR_DEFAULT; virtual R VisitStmt_(const While* op, Args... args) STMT_FUNCTOR_DEFAULT; @@ -275,6 +278,7 @@ class StmtFunctor { IR_STMT_FUNCTOR_DISPATCH(Evaluate); IR_STMT_FUNCTOR_DISPATCH(KernelDef); IR_STMT_FUNCTOR_DISPATCH(KernelStmt); + IR_STMT_FUNCTOR_DISPATCH(StreamStmt); IR_STMT_FUNCTOR_DISPATCH(Return); IR_STMT_FUNCTOR_DISPATCH(Break); IR_STMT_FUNCTOR_DISPATCH(While); diff --git a/tvm/include/tvm/ir_visitor.h b/tvm/include/tvm/ir_visitor.h index 6fe616aab..21ef77c32 100644 --- a/tvm/include/tvm/ir_visitor.h +++ b/tvm/include/tvm/ir_visitor.h @@ -131,6 +131,8 @@ class TVM_DLL IRVisitor { virtual void Visit_(const KernelDef* op); virtual void Visit_(const KernelExpr* op); virtual void Visit_(const KernelStmt* op); + virtual void Visit_(const StreamExpr* op); + virtual void Visit_(const StreamStmt* op); virtual void Visit_(const Return* op); virtual void Visit_(const Break* op); virtual void Visit_(const While* op); diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 6a6acbcb6..02b07f64d 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -729,6 +729,10 @@ void CodeGenC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) LOG(FATAL) << "Quantize is not yet support"; } +void CodeGenC::VisitExpr_(const StreamExpr *op, std::ostream& os) { // NOLINT(*) + LOG(FATAL) << "StreamExpr is not implemented yet"; +} + void CodeGenC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) os << op->name << "("; for (size_t i = 0; i < op->args.size(); ++i) { @@ -738,6 +742,10 @@ void CodeGenC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) os << ")"; } +void CodeGenC::VisitStmt_(const StreamStmt *op) { // NOLINT(*) + LOG(FATAL) << "StreamStmt is not implemented yet"; +} + void CodeGenC::VisitStmt_(const LetStmt* op) { std::string value = PrintExpr(op->value); if (print_ssa_form_) { diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index ae6093df0..0c158420a 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -117,6 +117,7 @@ class CodeGenC : void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; // NOLINT(*) // statment void VisitStmt_(const LetStmt* op) override; void VisitStmt_(const Store* op) override; @@ -130,6 +131,7 @@ class CodeGenC : void VisitStmt_(const ProducerConsumer* op) override; void VisitStmt_(const KernelDef* op) override; void VisitStmt_(const KernelStmt* op) override; + void VisitStmt_(const StreamStmt* op) override; void VisitStmt_(const Return* op) override; void VisitStmt_(const Break* op) override; void VisitStmt_(const While* op) override; diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index 46a711640..5ff7afb0d 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -88,6 +88,7 @@ void CodeGenHLSC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) PrintExpr(op->b, os); os << ")"; } + void CodeGenHLSC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) os << "std::max("; PrintExpr(op->a, os); diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index 6a0977e40..4cfbc8677 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -143,6 +143,31 @@ void CodeGenVivadoHLS::VisitStmt_(const Partition* op) { stream << "\n"; } +void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { + std::string vid = GetVarID(op->buffer_var.get()); + os << vid << ".read()"; +} + +void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { + std::string vid = GetVarID(op->buffer_var.get()); + PrintIndent(); + stream << vid; + switch (op->stream_type) { + case StreamType::Channel: + stream << "[channel]"; + break; + case StreamType::FIFO: + stream << "[fifo]"; + break; + case StreamType::Pipe: + stream << "[pipe]"; + break; + } + stream << ".write"; + PrintExpr(op->value, stream); + stream << ";\n"; +} + class AllocateCollector final : public IRVisitor { public: AllocateCollector(std::vector& alloc_list, diff --git a/tvm/src/codegen/hlsc/codegen_vhls.h b/tvm/src/codegen/hlsc/codegen_vhls.h index 5486be1dc..a2dd5fa0e 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.h +++ b/tvm/src/codegen/hlsc/codegen_vhls.h @@ -23,11 +23,13 @@ class CodeGenVivadoHLS final : public CodeGenHLSC { void VisitExpr_(const GetBit* op, std::ostream& os) override; void VisitExpr_(const GetSlice* op, std::ostream& os) override; + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; void VisitStmt_(const Store* op) override; void VisitStmt_(const For* op) override; void VisitStmt_(const Partition* op) override; void VisitStmt_(const Stencil* op) override; + void VisitStmt_(const StreamStmt* op) override; private: std::ofstream soda_header_; }; diff --git a/tvm/src/lang/ir.cc b/tvm/src/lang/ir.cc index 3589de195..c88f8ea94 100644 --- a/tvm/src/lang/ir.cc +++ b/tvm/src/lang/ir.cc @@ -149,6 +149,8 @@ TVM_REGISTER_NODE_TYPE(Quantize); TVM_REGISTER_NODE_TYPE(KernelDef); TVM_REGISTER_NODE_TYPE(KernelExpr); TVM_REGISTER_NODE_TYPE(KernelStmt); +TVM_REGISTER_NODE_TYPE(StreamStmt); +TVM_REGISTER_NODE_TYPE(StreamExpr); TVM_REGISTER_NODE_TYPE(Return); TVM_REGISTER_NODE_TYPE(Break); TVM_REGISTER_NODE_TYPE(While); diff --git a/tvm/src/pass/ir_visitor.cc b/tvm/src/pass/ir_visitor.cc index 160cb906e..6346c6262 100644 --- a/tvm/src/pass/ir_visitor.cc +++ b/tvm/src/pass/ir_visitor.cc @@ -252,6 +252,13 @@ void IRVisitor::Visit_(const KernelStmt *op) { } } +void IRVisitor::Visit_(const StreamStmt *op) { + this->Visit(op->value); +} + +void IRVisitor::Visit_(const StreamExpr *op) { +} + void IRVisitor::Visit_(const Return *op) { this->Visit(op->value); } @@ -338,6 +345,8 @@ TVM_STATIC_IR_FUNCTOR(IRVisitor, vtable) .DISPATCH_TO_VISIT(KernelDef) .DISPATCH_TO_VISIT(KernelExpr) .DISPATCH_TO_VISIT(KernelStmt) +.DISPATCH_TO_VISIT(StreamStmt) +.DISPATCH_TO_VISIT(StreamExpr) .DISPATCH_TO_VISIT(Return) .DISPATCH_TO_VISIT(Break) .DISPATCH_TO_VISIT(While) From 245bffa8c3482fc5a19683c5d08795636d00fefc Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 16 Sep 2019 11:46:38 -0400 Subject: [PATCH 074/103] [add] aocl stream codegen --- tvm/src/codegen/opencl/build_opencl.cc | 16 +- tvm/src/codegen/opencl/codeanalys_openclc.cc | 919 ------------------- tvm/src/codegen/opencl/codeanalys_openclc.h | 202 ---- tvm/src/codegen/opencl/codegen_aocl.cc | 87 +- tvm/src/codegen/opencl/codegen_aocl.h | 26 +- tvm/src/codegen/opencl/codegen_opencl.cc | 16 - tvm/src/codegen/opencl/codegen_opencl.h | 68 +- tvm/src/codegen/opencl/codegen_sdaccel.cc | 25 +- tvm/src/codegen/opencl/codegen_sdaccel.h | 4 +- 9 files changed, 81 insertions(+), 1282 deletions(-) delete mode 100755 tvm/src/codegen/opencl/codeanalys_openclc.cc delete mode 100755 tvm/src/codegen/opencl/codeanalys_openclc.h diff --git a/tvm/src/codegen/opencl/build_opencl.cc b/tvm/src/codegen/opencl/build_opencl.cc index 1ccfb0d32..f5b1352a7 100755 --- a/tvm/src/codegen/opencl/build_opencl.cc +++ b/tvm/src/codegen/opencl/build_opencl.cc @@ -1,22 +1,15 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - #include "./codegen_aocl.h" #include "./codegen_sdaccel.h" #include "../build_common.h" #include "./sdaccel_module.h" - - +#include "../merlinc/codeanalys_merlinc.h" namespace TVM { namespace codegen { - #if HCL_SDACCEL_RUNTIME runtime::Module BuildSDAccelSim(Array funcs) { - CodeAnalysOpenCLC ca; + CodeAnalysMerlinC ca; CodeGenSDACCEL cg; for (LoweredFunc f : funcs) { // 1st pass: Analyze AST and collect necessary information @@ -40,13 +33,12 @@ TVM_REGISTER_API("codegen.build_sdaccel_csim") template std::string BuildOpenCL(Array funcs){ using TVM::runtime::Registry; - CodeAnalysOpenCLC ca; + CodeAnalysMerlinC ca; CodeGen cg; for(LoweredFunc f: funcs){ ca.AddFunction(f); str2tupleMapmap_arg_type; map_arg_type = ca.Finish(); - cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); @@ -56,8 +48,6 @@ std::string BuildOpenCL(Array funcs){ } - - TVM_REGISTER_API("codegen.build_sdaccel") .set_body([]( TVMArgs args, TVMRetValue * rv ) { * rv = BuildOpenCL(args[0]); diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.cc b/tvm/src/codegen/opencl/codeanalys_openclc.cc deleted file mode 100755 index 030453a94..000000000 --- a/tvm/src/codegen/opencl/codeanalys_openclc.cc +++ /dev/null @@ -1,919 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file tvm/src/codegen/hlsc/codegen_hlsc.cc - */ -#include -#include -#include -#include "./codeanalys_openclc.h" -#include "../codegen_common.h" -#include "../../arithmetic/compute_expr.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -void CodeAnalysOpenCLC::Init() { - ; -} - -void CodeAnalysOpenCLC::InitFuncState(LoweredFunc f) { - alloc_storage_scope_.clear(); - handle_data_type_.clear(); - map_arg_type_.clear(); - CodeGenSourceBase::ClearFuncState(); -} -void CodeAnalysOpenCLC::AddFunction(LoweredFunc f) { - // Clear previous generated state. - this->InitFuncState(f); - - // Add to alloc buffer type. - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - - // Record the arguments for analyzing the type - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - this->map_arg_type_[vid]; - } - int func_scope = this->BeginScope(); - VisitStmt(f->body); - this->EndScope(func_scope); -} - -str2tupleMap CodeAnalysOpenCLC::Finish() { - return this->map_arg_type_; -} - -void CodeAnalysOpenCLC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) - VisitExpr(n, os); -} - -void CodeAnalysOpenCLC::PrintSSAAssign( - const std::string& target, const std::string& src, Type t) { - PrintType(t, stream); - stream << ' ' << target << " = "; - if (src.length() > 3 && - src[0] == '(' && src[src.length() - 1] == ')') { - stream << src.substr(1, src.length() - 2); - } else { - stream << src; - } - stream << ";\n"; -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetBufferRef( - Type t, const Variable* buffer, Expr index) { - std::ostringstream os; - std::string vid = GetVarID(buffer); - std::string scope; - if (alloc_storage_scope_.count(buffer)) { - scope = alloc_storage_scope_.at(buffer); - } - bool is_vol = volatile_buf_.count(buffer) != 0; - if (t.lanes() == 1) { - if (!HandleTypeMatch(buffer, t) || is_vol) { - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)" << vid << ')'; - } else { - os << vid; - } - os << '['; - PrintExpr(index, os); - os << ']'; - } else { - // Buffer declared as vector type. - // optimize for case where it is in register, - if (HandleTypeMatch(buffer, t) && !is_vol) { - // optimize for constant access - int offset; - if (arith::GetConstInt(index, &offset)) { - CHECK_EQ(offset % t.lanes(), 0) - << "Find unaligned vector load to a vector type"; - os << vid << '[' << (offset / t.lanes()) << ']'; - return os.str(); - } - } - os << "(("; - if (is_vol) { - os << "volatile "; - } - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t, os); - os << "*)("; - if (!HandleTypeMatch(buffer, t.element_of())) { - os << '('; - if (scope.length() != 0) { - PrintStorageScope(scope, os); - } - os << ' '; - PrintType(t.element_of(), os); - os << "*)"; - } - os << vid << " + "; - PrintExpr(index, os); - os << "))[0]"; - } - return os.str(); -} - -// Print a reference expression to a buffer. -std::string CodeAnalysOpenCLC::GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind) { - if (kind < intrinsic::kArrKindBound_) { - std::ostringstream os; - os << "(((TVMArray*)"; - this->PrintExpr(buffer, os); - os << ")"; - if (kind == intrinsic::kArrAddr) { - os << " + "; - this->PrintExpr(index, os); - os << ")"; - return os.str(); - } - os << '['; - this->PrintExpr(index, os); - os << "]."; - // other case: get fields. - switch (kind) { - case intrinsic::kArrData: os << "data"; break; - case intrinsic::kArrShape: os << "shape"; break; - case intrinsic::kArrStrides: os << "strides"; break; - case intrinsic::kArrNDim: os << "ndim"; break; - case intrinsic::kArrTypeCode: os << "dtype.code"; break; - case intrinsic::kArrTypeBits: os << "dtype.bits"; break; - case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; - case intrinsic::kArrTypeFracs: os << "dtype.fracs"; break; - case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; - case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; - default: os << "unknown_field_code_" << kind; - } - os << ')'; - return os.str(); - } else { - CHECK_LT(kind, intrinsic::kTVMValueKindBound_); - std::ostringstream os; - os << "(((TVMValue*)"; - this->PrintExpr(buffer, os); - os << ")[" << index << "]."; - if (t.is_handle()) { - os << "v_handle"; - } else if (t.is_float()) { - os << "v_float64"; - } else if (t.is_int()) { - os << "v_int64"; - } else { - os << t; - } - os << ")"; - return os.str(); - } -} - - -bool CodeAnalysOpenCLC::HandleTypeMatch(const Variable* buf_var, Type t) const { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) return false; - return it->second == t; -} - -void CodeAnalysOpenCLC::RegisterHandleType(const Variable* buf_var, Type t) { - auto it = handle_data_type_.find(buf_var); - if (it == handle_data_type_.end()) { - handle_data_type_[buf_var] = t; - } else { - CHECK(it->second == t) - << "conflicting buf var type"; - } -} - -void CodeAnalysOpenCLC::PrintVecElemLoad(const std::string& vec, - Type t, int i, - std::ostream& os) { // NOLINT(*) - os << vec << ".s" << std::hex << i << std::dec; -} - -void CodeAnalysOpenCLC::PrintVecElemStore(const std::string& vec, - Type t, int i, - const std::string& value) { - this->PrintIndent(); - stream << vec << ".s" << std::hex << i - << " = " << value << ";\n" << std::dec; -} - -std::string CodeAnalysOpenCLC::GetVecLoad( - Type t, const Variable* buffer, Expr base) { - return GetBufferRef(t, buffer, base); -} - -void CodeAnalysOpenCLC::PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value) { - std::string ref = GetBufferRef(t, buffer, base); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; -} - -std::string CodeAnalysOpenCLC::CastFromTo(std::string value, Type from, Type target) { - if (from == target) return value; - std::ostringstream os; - os << "(("; - this->PrintType(target, os); - os << ")" << value << ")"; - return os.str(); -} - -void CodeAnalysOpenCLC::BindThreadIndex(const IterVar& iv) { - LOG(FATAL) << "not implemented"; -} - -void CodeAnalysOpenCLC::PrintStorageSync(const Call* op) { // NOLINT(*) -} - -void CodeAnalysOpenCLC::PrintStorageScope(const std::string& scope, std::ostream& os) { // NOLINT(*) - CHECK_EQ(scope, "global"); -} - -std::string CodeAnalysOpenCLC::GetType(Type t) { // NOLINT(*) - std::ostringstream os; - PrintType(t, os); - return os.str(); -} - -void CodeAnalysOpenCLC::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - if (t.is_float()) { - if (t.bits() == 32) { - os << "float"; return; - } - if (t.bits() == 64) { - os << "double"; return; - } - } else if (t.is_uint()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "uint" << t.bits() << "_t"; return; - } - case 1: os << "int"; return; - } - } else if (t.is_int()) { - switch (t.bits()) { - case 8: case 16: case 32: case 64: { - os << "int" << t.bits() << "_t"; return; - } - } - } - os << t; -} - - -inline void PrintConst(const IntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == Int(32)) { - std::ostringstream temp; - temp << op->value; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const UIntImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - if (op->type == UInt(32)) { - std::ostringstream temp; - temp << op->value << "U"; - p->MarkConst(temp.str()); - os << temp.str(); - } else { - os << "("; - p->PrintType(op->type, os); - os << ")" << op->value; - } -} - -inline void PrintConst(const FloatImm* op, std::ostream& os, CodeAnalysOpenCLC* p) { // NOLINT(*) - switch (op->type.bits()) { - case 64: case 32: { - std::ostringstream temp; - temp << std::scientific << op->value; - if (op->type.bits() == 32) temp << 'f'; - p->MarkConst(temp.str()); - os << temp.str(); - break; - } - case 16: { - os << '('; - p->PrintType(op->type, os); - os << ')' << std::scientific <value << 'f'; - break; - } - default: os << op << "\n"; - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) - PrintConst(op, os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) - os << "\"" << op->value << "\""; -} - -template -inline void PrintBinaryExpr(const T* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - if (isalpha(opstr[0])) { - os << opstr << '('; - p->PrintExpr(op->a, os); - os << ", "; - p->PrintExpr(op->b, os); - os << ')'; - } else { - os << '('; - p->PrintExpr(op->a, os); - os << ' ' << opstr << ' '; - p->PrintExpr(op->b, os); - os << ')'; - } - } else { - p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); - } -} - -inline void PrintBinaryIntrinsitc(const Call* op, - const char *opstr, - std::ostream& os, // NOLINT(*) - CodeAnalysOpenCLC* p) { - if (op->type.lanes() == 1) { - CHECK_EQ(op->args.size(), 2U); - os << '('; - p->PrintExpr(op->args[0], os); - os << opstr; - p->PrintExpr(op->args[1], os); - os << ')'; - } else { - p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); - } -} -void CodeAnalysOpenCLC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) - std::stringstream value; - this->PrintExpr(op->value, value); - os << CastFromTo(value.str(), op->value.type(), op->type); -} -void CodeAnalysOpenCLC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) - os << GetVarID(op); -} -void CodeAnalysOpenCLC::VisitExpr_(const Add *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "+", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Sub *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "-", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "*", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "/", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Mod *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "%", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "min", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "max", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const EQ *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "==", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const NE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "!=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const LE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "<=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GT *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const GE *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, ">=", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const And *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "&&", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Or *op, std::ostream& os) { // NOLINT(*) - PrintBinaryExpr(op, "||", os, this); -} -void CodeAnalysOpenCLC::VisitExpr_(const Not *op, std::ostream& os) { // NOLINT(*) - os << '!'; - PrintExpr(op->a, os); -} - -void CodeAnalysOpenCLC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) - if (op->call_type == Call::Extern || - op->call_type == Call::PureExtern) { - os << op->name << "("; - for (size_t i = 0; i < op->args.size(); i++) { - this->PrintExpr(op->args[i], os); - if (i < op->args.size() - 1) { - os << ", "; - } - } - os << ")"; - } else if (op->is_intrinsic(Call::bitwise_and)) { - PrintBinaryIntrinsitc(op, " & ", os, this); - } else if (op->is_intrinsic(Call::bitwise_xor)) { - PrintBinaryIntrinsitc(op, " ^ ", os, this); - } else if (op->is_intrinsic(Call::bitwise_or)) { - PrintBinaryIntrinsitc(op, " | ", os, this); - } else if (op->is_intrinsic(Call::bitwise_not)) { - CHECK_EQ(op->args.size(), 1U); - os << "(~"; - this->PrintExpr(op->args[0], os); - os << ')'; - } else if (op->is_intrinsic(Call::shift_left)) { - PrintBinaryIntrinsitc(op, " << ", os, this); - } else if (op->is_intrinsic(Call::shift_right)) { - PrintBinaryIntrinsitc(op, " >> ", os, this); - } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - os << "("; - PrintExpr(op->args[0], os); - os << " ? "; - PrintExpr(op->args[1], os); - os << " : "; - PrintExpr(op->args[2], os); - os << ")"; - } else if (op->is_intrinsic(intrinsic::tvm_address_of)) { - const Load *l = op->args[0].as(); - CHECK(op->args.size() == 1 && l); - os << "(("; - this->PrintType(l->type.element_of(), os); - os << " *)" << this->GetVarID(l->buffer_var.get()) - << " + "; - this->PrintExpr(l->index, os); - os << ')'; - } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { - CHECK_EQ(op->args.size(), 3U); - os << GetStructRef( - op->type, op->args[0], op->args[1], - op->args[2].as()->value); - } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { - CHECK_EQ(op->args.size(), 1U); - os << "("; - this->PrintExpr(op->args[0], os); - os << " == NULL)"; - } else - os << op->name << "()"; -} - -void CodeAnalysOpenCLC::PrintVecBinaryOp( - const std::string& op, Type t, - Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) - if (isalpha(op[0])) { - os << op << "("; - this->PrintExpr(lhs, os); - os << ", "; - this->PrintExpr(rhs, os); - os << ")"; - } else { - os <<"("; - this->PrintExpr(lhs, os); - os << ' ' << op << ' '; - this->PrintExpr(rhs, os); - os << ")"; - } -} - -inline bool TryGetRamp1Base(Expr index, int lanes, Expr *base) { - const Ramp* r = index.as(); - if (!r) return false; - if (!is_one(r->stride)) return false; - CHECK_EQ(r->lanes, lanes); - *base = r->base; - return true; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) - int lanes = op->type.lanes(); - // delcare type. - if (op->type.lanes() == 1) { - std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); - os << ref; - } else { - CHECK(is_one(op->predicate)) - << "predicated load is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, op->type.lanes(), &base)) { - std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); - os << ref; - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // load seperately. - std::string svalue = GetUniqueName("_"); - this->PrintIndent(); - this->PrintType(op->type, stream); - stream << ' ' << svalue << ";\n"; - std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string vid = GetVarID(op->buffer_var.get()); - Type elem_type = op->type.element_of(); - for (int i = 0; i < lanes; ++i) { - std::ostringstream value_temp; - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - value_temp << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, value_temp); - value_temp << ' '; - } - } - PrintType(elem_type, value_temp); - value_temp << "*)" << vid << ')'; - } else { - value_temp << vid; - } - value_temp << '['; - PrintVecElemLoad(sindex, op->index.type(), i, value_temp); - value_temp << ']'; - PrintVecElemStore(svalue, op->type, i, value_temp.str()); - } - os << svalue; - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitStmt_(const Store* op) { - Type t = op->value.type(); - if (t.lanes() == 1) { - std::string value = this->PrintExpr(op->value); - std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); - this->PrintIndent(); - stream << ref << " = " << value << ";\n"; - } else { - CHECK(is_one(op->predicate)) - << "Predicated store is not supported"; - Expr base; - if (TryGetRamp1Base(op->index, t.lanes(), &base)) { - std::string value = this->PrintExpr(op->value); - this->PrintVecStore(op->buffer_var.get(), t, base, value); - } else { - // The assignment below introduces side-effect, and the resulting value cannot - // be reused across multiple expression, thus a new scope is needed - int vec_scope = BeginScope(); - - // store elements seperately - std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); - std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); - std::string vid = GetVarID(op->buffer_var.get()); - for (int i = 0; i < t.lanes(); ++i) { - this->PrintIndent(); - Type elem_type = t.element_of(); - if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { - stream << "(("; - if (op->buffer_var.get()->type.is_handle()) { - auto it = alloc_storage_scope_.find(op->buffer_var.get()); - if (it != alloc_storage_scope_.end()) { - PrintStorageScope(it->second, stream); - stream << ' '; - } - } - PrintType(elem_type, stream); - stream << "*)" << vid << ')'; - } else { - stream << vid; - } - stream << '['; - PrintVecElemLoad(index, op->index.type(), i, stream); - stream << "] = "; - PrintVecElemLoad(value, op->value.type(), i, stream); - stream << ";\n"; - } - EndScope(vec_scope); - } - } -} - -void CodeAnalysOpenCLC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) - std::string value = PrintExpr(op->value); - CHECK(!var_idmap_.count(op->var.get())); - var_idmap_[op->var.get()] = value; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) - // constraint of current logic - CHECK_EQ(op->base.type(), Int(32)); - os << "((int" << op->lanes << ")("; - for (int i = 0; i < op->lanes; i++) { - os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; - if (i != op->lanes - 1) - os << ", "; - } - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Broadcast: not supported "; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->condition, os); - os << " ? "; - PrintExpr(op->true_value, os); - os << " : "; - PrintExpr(op->false_value, os); - os << ")"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " & (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. a' = SHR a for Idx_R bits - // 2. mask: 1.(length).1 - // (1 << (L - R + 1)) - 1 - // 3. a' & mask - - os << "(("; - PrintExpr(op->a, os); - os << " >> "; - PrintExpr(op->index_right, os); - os << ") & ((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) - os << "("; - PrintExpr(op->a, os); - os << " | (1 << ("; - PrintExpr(op->index, os); - os << " - 1)))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) - // 1. mask: 0.(Idx L).01..10.(Idx R).0 - // ((1 << (L - R + 1)) - 1) << R - // 2. a & mask - - os << "("; - PrintExpr(op->a, os); - os << " & (((1 << ("; - PrintExpr(op->index_left, os); - os << " - "; - PrintExpr(op->index_right, os); - os << " + 1)) - 1) << "; - PrintExpr(op->index_right, os); - os << "))"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support"; -} - -void CodeAnalysOpenCLC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "KernelExpr is not yet support"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const LetStmt* op) { - // TODO comaniac - //std::vector vec_var = GetNodesByType(op->value); - - std::string arg_vid = "unknown"; - std::string str = PrintExpr(op->value); - if (std::regex_match(str, std::regex("(.+)TVMArray(.+)(arg)(.+)(data)(.+)"))) { - size_t pos_arg = str.find("arg"); - size_t pos_data = str.find("data"); - arg_vid = str.substr(pos_arg, pos_data-pos_arg-5); - } - else if (std::regex_match(str, std::regex("arg(.+)"))) - arg_vid = str; - - std::string vid = AllocVarID(op->var.get()); - if (this->map_arg_type_.find(arg_vid) == this->map_arg_type_.end()) { - if ("unknown" != arg_vid) - LOG(WARNING) << arg_vid << " not found in the argument mapping\n"; - } else { - Type type = op->var.type(); - if (op->var.type() == Handle() && - handle_data_type_.count(op->var.get())) - type = handle_data_type_.at(op->var.get()); - this->map_arg_type_[arg_vid] = std::make_tuple(vid, type); - } - VisitStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Allocate* op) { - CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); - if (op->new_expr.defined()) { - // Prefer global static allocation for the program - CHECK_EQ(op->free_function, "nop"); - std::string new_data = PrintExpr(op->new_expr); - this->PrintIndent(); - PrintType(op->type, stream); - stream << "* "<< vid << '=' << new_data << ";\n"; - } else { - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - stream << ' '; - PrintType(op->type, stream); - stream << ' '<< vid << '[' - << constant_size << "];\n"; - } - RegisterHandleType(op->buffer_var.get(), op->type); - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AttrStmt* op) { - if (op->attr_key == ir::attr::thread_extent) { - IterVar iv(op->node.node_); - if (iv->thread_tag.length() != 0) { - if (!var_idmap_.count(iv->var.get())) { - BindThreadIndex(iv); - } - } - } else if (op->attr_key == ir::attr::storage_scope) { - const Variable* v = op->node.as(); - CHECK(v); - alloc_storage_scope_[v] = op->value.as()->value; - } else if (op->attr_key == ir::attr::volatile_scope) { - const Variable* v = op->node.as(); - CHECK(v); - volatile_buf_.insert(v); - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const AssertStmt* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (op->message.as()) { - // GLOG style check - stream << "CHECK(" << cond << ") << \"" - << op->message.as()->value << "\";\n"; - } else { - stream << "assert(" << cond << ");\n"; - } - this->PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const For* op) { - std::string extent = PrintExpr(op->extent); - PrintIndent(); - std::string vid = AllocVarID(op->loop_var.get()); - CHECK(is_zero(op->min)); - stream << "for ("; - PrintType(op->loop_var.type(), stream); - stream << ' ' << vid << " = 0; " - << vid << " < " << extent - << "; ++" << vid << ") {\n"; - int for_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(for_scope); - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const IfThenElse* op) { - std::string cond = PrintExpr(op->condition); - PrintIndent(); - if (cond[0] == '(' && cond[cond.length() - 1] == ')') { - stream << "if " << cond << " {\n"; - } else { - stream << "if (" << cond << ") {\n"; - } - int then_scope = BeginScope(); - PrintStmt(op->then_case); - this->EndScope(then_scope); - - if (op->else_case.defined()) { - PrintIndent(); - stream << "} else {\n"; - int else_scope = BeginScope(); - PrintStmt(op->else_case); - this->EndScope(else_scope); - } - PrintIndent(); - stream << "}\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Block *op) { - PrintStmt(op->first); - if (op->rest.defined()) PrintStmt(op->rest); -} - -void CodeAnalysOpenCLC::VisitStmt_(const Evaluate *op) { - if (is_const(op->value)) return; - const Call* call = op->value.as(); - if (call) { - if (call->is_intrinsic(intrinsic::tvm_storage_sync)) { - this->PrintStorageSync(call); return; - } else if (call->is_intrinsic(intrinsic::tvm_struct_set)) { - CHECK_EQ(call->args.size(), 4); - std::string value = PrintExpr(call->args[3]); - std::string ref = GetStructRef( - call->args[3].type(), - call->args[0], - call->args[1], - call->args[2].as()->value); - this->PrintIndent(); - this->stream << ref << " = " << value << ";\n"; - return; - } - } - std::string vid = this->PrintExpr(op->value); - this->PrintIndent(); - this->stream << "(void)" << vid << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const ProducerConsumer *op) { - PrintStmt(op->body); -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelDef *op) { - LOG(FATAL) << "KernelDef is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const KernelStmt *op) { - LOG(FATAL) << "KernelStmt is not yet support"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Return *op) { - this->stream << "return "; - PrintExpr(op->value); - this->stream << ";\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const Break *op) { - // TODO: Check if the break statement is used correctly - this->stream << "break;\n"; -} - -void CodeAnalysOpenCLC::VisitStmt_(const While *op) { - std::string condition = PrintExpr(op->condition); - PrintIndent(); - stream << "while (" << condition << ") {\n"; - int while_scope = BeginScope(); - PrintStmt(op->body); - this->EndScope(while_scope); - PrintIndent(); - stream << "}\n"; -} - - -void CodeAnalysOpenCLC::VisitStmt_(const Partition *op) {} - - -} // namespace codegen -} // namespace TVM diff --git a/tvm/src/codegen/opencl/codeanalys_openclc.h b/tvm/src/codegen/opencl/codeanalys_openclc.h deleted file mode 100755 index 8aaeedb39..000000000 --- a/tvm/src/codegen/opencl/codeanalys_openclc.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -#ifndef TVM_CODEGEN_CODEANALYS_OPENCLC_H_ -#define TVM_CODEGEN_CODEANALYS_OPENCLC_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../codegen_source_base.h" - -namespace TVM { -namespace codegen { - -using namespace ir; - -template -using str2tupleMap = std::unordered_map>; - -/*! - * \brief A class to analyze the IR AST for MerlinC generation. - * - */ -class CodeAnalysOpenCLC : - public ExprFunctor, - public StmtFunctor, - public CodeGenSourceBase { - public: - /*! - * \brief Initialize the code generator. - * \param output_ssa Whether output SSA. - */ - void Init(); - /*! - * \brief Add the function to the generated module. - * \param f The function to be compiled. - */ - void AddFunction(LoweredFunc f); - /*! - * \brief Finalize the compilation and return the code. - * \return The code. - */ - str2tupleMap Finish(); - /*! - * \brief Print the Stmt n to CodeAnalysMerlinC->stream - * \param n The statement to be printed. - */ - void PrintStmt(const Stmt& n) { - VisitStmt(n); - } - /*! - * \brief Print the expression n(or its ssa id if in ssa mode) into os - * \param n The expression to be printed. - * \param os The output stream - */ - void PrintExpr(const Expr& n, std::ostream& os); - /*! - * \brief Same as PrintExpr, but simply returns result string - * \param n The expression to be printed. - */ - std::string PrintExpr(const Expr& n) { - std::ostringstream os; - PrintExpr(n, os); - return os.str(); - } - // The following parts are overloadable print operations. - /*! - * \brief Initialize codegen state for generating f. - * \param f The function to be compiled. - */ - virtual void InitFuncState(LoweredFunc f); - // expression - void VisitExpr_(const Variable* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Load* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Let* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Call* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Add* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Sub* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Mul* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Div* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Mod* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Min* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Max* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const EQ* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const NE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const LT* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const LE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GT* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GE* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const And* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Or* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Cast* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Not* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Select* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Ramp* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Broadcast* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const IntImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const UIntImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const FloatImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const StringImm* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GetBit* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const GetSlice* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const SetBit* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) - void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) - // statment - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const Store* op) override; - void VisitStmt_(const For* op) override; - void VisitStmt_(const IfThenElse* op) override; - void VisitStmt_(const Allocate* op) override; - void VisitStmt_(const AttrStmt* op) override; - void VisitStmt_(const AssertStmt* op) override; - void VisitStmt_(const Evaluate* op) override; - void VisitStmt_(const Block* op) override; - void VisitStmt_(const ProducerConsumer* op) override; - void VisitStmt_(const KernelDef* op) override; - void VisitStmt_(const KernelStmt* op) override; - void VisitStmt_(const Return* op) override; - void VisitStmt_(const Break* op) override; - void VisitStmt_(const While* op) override; - void VisitStmt_(const Partition* op) override; - /*! - * Print Type represetnation of type t. - * \param t The type representation. - * \param os The stream to print the ctype into - */ - void PrintType(Type t, std::ostream& os); // NOLINT(*) - std::string GetType(Type t); // NOLINT(*) - /*! - * \brief Print expr representing the thread tag - * \param IterVar iv The thread index to be binded; - */ - void BindThreadIndex(const IterVar& iv); // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os); // NOLINT(*) - void PrintStorageSync(const Call* op); // NOLINT(*) - // Binary vector op. - void PrintVecBinaryOp( - const std::string&op, Type op_type, - Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) - // print vector load - std::string GetVecLoad(Type t, const Variable* buffer, Expr base); - // print vector store - void PrintVecStore(const Variable* buffer, - Type t, Expr base, - const std::string& value); // NOLINT(*) - // print load of single element - void PrintVecElemLoad( - const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) - // print store of single element. - void PrintVecElemStore( - const std::string& vec, Type t, int i, const std::string& value); - // Get a cast type from to - std::string CastFromTo(std::string value, Type from, Type target); - - protected: - // Print reference to struct location - std::string GetStructRef( - Type t, const Expr& buffer, const Expr& index, int kind); - // print reference to a buffer as type t in index. - virtual std::string GetBufferRef( - Type t, const Variable* buffer, Expr index); - /*! - * \brief If buffer is allocated as type t. - * \param buf_var The buffer variable. - * \param t The type to be checked. - */ - bool HandleTypeMatch(const Variable* buf_var, Type t) const; - /*! - * \brief Register the data type of buf_var - * \param buf_var The buffer variable. - * \param t The type to be checked. - */ - void RegisterHandleType(const Variable* buf_var, Type t); - // override - void PrintSSAAssign( - const std::string& target, const std::string& src, Type t) final; - /*! \brief restrict keyword */ - std::string restrict_keyword_{""}; - /*! \brief the storage scope of allocation */ - std::unordered_map alloc_storage_scope_; - /*! \brief the data type of allocated buffers */ - std::unordered_map handle_data_type_; - - private: - /*! \brief set of volatile buf access */ - std::unordered_set volatile_buf_; - /*! \brief map of function arguments to their types */ - str2tupleMap map_arg_type_; -}; - -} // namespace codegen -} // namespace TVM -#endif // TVM_CODEGEN_CODEGEN_C_H_ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index c4975d673..bade9a839 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -1,13 +1,7 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include # include # include # include # include -# include # include "./codegen_aocl.h" # include "../../runtime/thread_storage_scope.h" @@ -18,6 +12,11 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Clear previous generated state this->InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } // Skip the first underscore, so SSA variable starts from _1 GetUniqueName("_"); @@ -27,7 +26,6 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, RegisterHandleType(kv.first.get(), kv.second.type()); } - this->stream << "#include \"ihc_apint.h\"" << "\n"; this->stream << "__kernel " << "void " << f->name << "("; @@ -61,54 +59,6 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, this->stream << "}\n\n"; } -/* 1st edition -void CodeGenAOCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) - CHECK_EQ(t.lanes(), 1) - << "do not yet support vector types"; - if (t.is_handle()) { - os << "void*"; return; - } - - if (t.is_uint() || t.is_int()) { - if (t.is_uint()) { - os << "ap_uint<" << t.bits() << ">" <<" "<<"uint"< "<<"int"<"<<" "<< "uint"< "<<"int"<for_type == ForType::Unrolled) { @@ -232,8 +181,30 @@ void CodeGenAOCL::VisitStmt_(const For* op) { CodeGenAOCL::GenForStmt(op, os.str(), true); } +void CodeGenAOCL::VisitExpr_(const StreamExpr* op, std::ostream& os) { + std::string vid = GetVarID(op->buffer_var.get()); + os << vid << ".read()"; +} - +void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { + std::string vid = GetVarID(op->buffer_var.get()); + PrintIndent(); + stream << vid; + switch (op->stream_type) { + case StreamType::Channel: + stream << "[channel]"; + break; + case StreamType::FIFO: + stream << "[fifo]"; + break; + case StreamType::Pipe: + stream << "[pipe]"; + break; + } + stream << ".write"; + PrintExpr(op->value, stream); + stream << ";\n"; +} } // namespace codegen -} // namespace TVM \ No newline at end of file +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 0921770c8..99f7bc395 100755 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -1,34 +1,26 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - #ifndef TVM_CODEGEN_CODEGEN_AOCL_H_ #define TVM_CODEGEN_CODEGEN_AOCL_H_ # include # include -# include -# include "./codeanalys_openclc.h" # include "./codegen_opencl.h" - namespace TVM { namespace codegen { - class CodeGenAOCL : public CodeGenOpenCL { - public: - CodeGenAOCL(){} - // void AddFunction(LoweredFunc f); - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + public: + CodeGenAOCL(){} + // void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const For* op) override; + void VisitStmt_(const StreamStmt* op) override; - void VisitStmt_(const For* op) override; - + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; }; } // namespace codegen } // namespace TVM -#endif // TVM_CODEGEN_CODEGEN_AOCL_H_ \ No newline at end of file +#endif // TVM_CODEGEN_CODEGEN_AOCL_H_ diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index 1feecdc8f..734931e0c 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -1,5 +1,3 @@ - -# include # include # include # include @@ -15,16 +13,6 @@ CodeGenOpenCL::CodeGenOpenCL(){ restrict_keyword_ = "restrict"; } -void CodeGenOpenCL::InitFuncState(LoweredFunc f) { - CodeGenC::InitFuncState(f); - for (Var arg: f->args) { - if (arg.type().is_handle()) { - alloc_storage_scope_[arg.get()] = "global"; - } - } -} - - std::string CodeGenOpenCL::Finish() { // inject extension enable pragma for fp16 and fp64 if (enable_fp16_) { @@ -68,8 +56,6 @@ void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { } - - void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, Expr base, std::ostream& os) { // NOLINT(*) if (!HandleTypeMatch(buffer, t.element_of())) { @@ -115,8 +101,6 @@ void CodeGenOpenCL::PrintStorageSync(const Call* op) { } } - - void CodeGenOpenCL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global") { diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index 8f9db613d..ccc0c51f2 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -4,50 +4,48 @@ # include # include # include -# include "./codeanalys_openclc.h" +// # include "./codeanalys_openclc.h" # include "../codegen_c.h" namespace TVM{ namespace codegen{ class CodeGenOpenCL : public CodeGenC{ - public: - // void AddFunction(LoweredFunc f); - CodeGenOpenCL(); - virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; - std::string Finish(); - void InitFuncState(LoweredFunc f) override; - void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void PrintStorageSync(const Call* op) override; //NOLINT(*) - // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - virtual void PrintType(Type t, std::ostream& os) = 0; //NOLINT - std::string GetVecLoad(Type t, const Variable * buffer, - Expr base) override; // NOLINT(*) - void PrintVecStore(const Variable * buffer, Type t, - Expr base, const std::string& value) override; //NOLINT(*) - void PrintVecAddr(const Variable * buffer, Type t, - Expr base, std::ostream& os); //NOLINT(*) - std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - - //overload visitor - void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) - void VisitStmt_(const LetStmt* op) override; // NOLINT - void GenForStmt(const For* op, std::string pragma, bool before); - virtual void VisitStmt_(const For* op) = 0; + public: + // void AddFunction(LoweredFunc f); + CodeGenOpenCL(); + virtual void AddFunction(LoweredFunc f, str2tupleMap map_arg_type) = 0; + std::string Finish(); + void BindThreadIndex(const IterVar& iv) override; // NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void PrintStorageSync(const Call* op) override; //NOLINT(*) + // void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + virtual void PrintType(Type t, std::ostream& os) = 0; //NOLINT + std::string GetVecLoad(Type t, const Variable * buffer, + Expr base) override; // NOLINT(*) + void PrintVecStore(const Variable * buffer, Type t, + Expr base, const std::string& value) override; //NOLINT(*) + void PrintVecAddr(const Variable * buffer, Type t, + Expr base, std::ostream& os); //NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target) override; //NOLINT(*) - protected: - // whether enable fp16 and fp64 extension - bool enable_fp16_{false}; - bool enable_fp64_{false}; -}; + //overload visitor + void VisitExpr_(const Broadcast * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Call * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const Select * op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const FloatImm * op, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const IfThenElse* op) override; //NOLINT(*) + void VisitStmt_(const LetStmt* op) override; // NOLINT + void GenForStmt(const For* op, std::string pragma, bool before); + virtual void VisitStmt_(const For* op) = 0; +protected: + // whether enable fp16 and fp64 extension + bool enable_fp16_{false}; + bool enable_fp64_{false}; +}; } // namespace codegen } // namespace TVM -#endif \ No newline at end of file +#endif diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index e40599c10..7054eae4d 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -1,13 +1,7 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ -# include # include # include # include # include -# include # include "./codegen_sdaccel.h" # include "../../runtime/thread_storage_scope.h" @@ -18,6 +12,11 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Clear previous generated state this->InitFuncState(f); + for (Var arg: f->args) { + if (arg.type().is_handle()) { + alloc_storage_scope_[arg.get()] = "global"; + } + } // Skip the first underscore, so SSA variable starts from _1 GetUniqueName("_"); @@ -27,7 +26,6 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, RegisterHandleType(kv.first.get(), kv.second.type()); } - this->stream << "__kernel " << "void " << f->name << "("; // Write arguments @@ -172,9 +170,6 @@ void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) return ; } - - - void CodeGenSDACCEL::PrintStorageScope( const std::string& scope, std::ostream& os) { // NOLINT(*) if (scope == "global" || scope == "shared") { @@ -182,13 +177,6 @@ void CodeGenSDACCEL::PrintStorageScope( } } - - - - - - - void CodeGenSDACCEL::VisitStmt_(const For* op) { std::ostringstream os; if (op->for_type == ForType::Unrolled) { @@ -231,8 +219,6 @@ void CodeGenSDACCEL::VisitStmt_(const For* op) { CodeGenSDACCEL::GenForStmt(op, os.str(), true); } - - void CodeGenSDACCEL::VisitStmt_(const Partition* op) { std::string vid = GetVarID(op->buffer_var.get()); stream << vid << " "; @@ -263,6 +249,5 @@ void CodeGenSDACCEL::VisitStmt_(const Partition* op) { } } - } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index 064d3098a..10aabf218 100755 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -9,7 +9,7 @@ # include # include # include -# include "./codeanalys_openclc.h" +// # include "./codeanalys_openclc.h" # include "./codegen_opencl.h" namespace TVM { @@ -32,4 +32,4 @@ class CodeGenSDACCEL : public CodeGenOpenCL { } // namespace codegen } // namespace TVM -#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ \ No newline at end of file +#endif // TVM_CODEGEN_CODEGEN_SDACCEL_H_ From cc65e5d6bee5be1125b2c6bd4437352d23d0ea58 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 17 Sep 2019 20:42:51 -0400 Subject: [PATCH 075/103] [add] aocl stream support --- .vscode/c_cpp_properties.json | 18 ---- .vscode/settings.json | 60 ------------ python/heterocl/dsl.py | 7 +- samples/stream/stream.py | 3 +- tvm/HalideIR/src/ir/IR.cpp | 8 +- tvm/HalideIR/src/ir/IR.h | 10 +- tvm/HalideIR/src/ir/IRMutator.cpp | 3 +- tvm/HalideIR/src/ir/IRVisitor.cpp | 8 ++ tvm/src/api/api_ir.cc | 16 +++- tvm/src/codegen/opencl/codegen_aocl.cc | 111 +++++++++++++++++++--- tvm/src/codegen/opencl/codegen_aocl.h | 9 +- tvm/src/codegen/opencl/codegen_opencl.cc | 1 - tvm/src/codegen/opencl/codegen_opencl.h | 2 +- tvm/src/codegen/opencl/codegen_sdaccel.cc | 25 +++++ tvm/src/codegen/opencl/codegen_sdaccel.h | 24 ++--- tvm/src/codegen/opencl/sdaccel_module.cc | 27 +----- tvm/src/pass/ir_mutator.cc | 3 +- tvm/src/schedule/compute_primitive.cc | 41 +++++++- 18 files changed, 229 insertions(+), 147 deletions(-) delete mode 100644 .vscode/c_cpp_properties.json delete mode 100644 .vscode/settings.json diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json deleted file mode 100644 index 8764f7120..000000000 --- a/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "configurations": [ - { - "name": "Mac", - "includePath": [ - "${workspaceFolder}/**" - ], - "defines": [], - "macFrameworkPath": [], - "compilerPath": "/usr/local/bin/gcc-8", - "cStandard": "c11", - "cppStandard": "c++17", - "intelliSenseMode": "clang-x64", - "compileCommands": "${workspaceFolder}/build/pkgs/llvm/build/compile_commands.json" - } - ], - "version": 4 -} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index a767b8b52..000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "files.associations": { - "array": "cpp", - "atomic": "cpp", - "*.tcc": "cpp", - "bitset": "cpp", - "cctype": "cpp", - "cfenv": "cpp", - "chrono": "cpp", - "cinttypes": "cpp", - "clocale": "cpp", - "cmath": "cpp", - "condition_variable": "cpp", - "csetjmp": "cpp", - "csignal": "cpp", - "cstdarg": "cpp", - "cstddef": "cpp", - "cstdint": "cpp", - "cstdio": "cpp", - "cstdlib": "cpp", - "cstring": "cpp", - "ctime": "cpp", - "cwchar": "cpp", - "cwctype": "cpp", - "deque": "cpp", - "forward_list": "cpp", - "list": "cpp", - "unordered_map": "cpp", - "unordered_set": "cpp", - "vector": "cpp", - "exception": "cpp", - "fstream": "cpp", - "functional": "cpp", - "future": "cpp", - "initializer_list": "cpp", - "iomanip": "cpp", - "iosfwd": "cpp", - "iostream": "cpp", - "istream": "cpp", - "limits": "cpp", - "memory": "cpp", - "mutex": "cpp", - "new": "cpp", - "numeric": "cpp", - "optional": "cpp", - "ostream": "cpp", - "ratio": "cpp", - "sstream": "cpp", - "stdexcept": "cpp", - "streambuf": "cpp", - "string_view": "cpp", - "system_error": "cpp", - "thread": "cpp", - "tuple": "cpp", - "type_traits": "cpp", - "typeindex": "cpp", - "typeinfo": "cpp", - "utility": "cpp" - } -} \ No newline at end of file diff --git a/python/heterocl/dsl.py b/python/heterocl/dsl.py index c9aceca91..1dce1c25e 100644 --- a/python/heterocl/dsl.py +++ b/python/heterocl/dsl.py @@ -414,18 +414,20 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n # prepare inputs for IR generation inputs = [] inputs_tvm = [] - arg_shapes = [] + arg_shapes, arg_dtypes = [], [] for shape, name_, dtype in zip(shapes, new_names, dtypes): if shape == (): var_ = placeholder((), name_, dtype) inputs.append(var_) inputs_tvm.append(var_.var) arg_shapes.append([1]) + arg_dtypes.append(dtype) else: # tensor inputs (new bufs) placeholder_ = placeholder(shape, name_, dtype) inputs.append(placeholder_) inputs_tvm.append(placeholder_.buf.data) arg_shapes.append(list(shape)) + arg_dtypes.append(dtype) s.ret_dtype = ret_dtype fmodule(*inputs) @@ -438,7 +440,8 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n ret_void = _make.UIntImm("uint1", 0) if s.has_return else _make.UIntImm("uint1", 1) body = s.pop_stmt() s.stmt_stack.append([]) - s.emit(_make.KernelDef(inputs_tvm, arg_shapes, body, ret_void, ret_dtype, name)) + s.emit(_make.KernelDef(inputs_tvm, arg_shapes, arg_dtypes, + body, ret_void, ret_dtype, name, [])) for name_, i in zip(names, inputs): s.var_dict[name_] = i s.input_stages.clear() diff --git a/samples/stream/stream.py b/samples/stream/stream.py index ea2255d1f..3f36162af 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -3,6 +3,7 @@ # hcl.init(place=hcl.CPU("riscv")) hcl.init(place=hcl.FPGA("intel")) initiation_interval = 4 + a = hcl.placeholder((10, 20), name="a") b = hcl.placeholder((10, 20), name="b") @@ -52,5 +53,5 @@ def ret_mul(c, d, e): # print(add_mul.ret_mul._buf, c._buf) print(hcl.lower(s)) -print(hcl.build(s, target="vhls")) +print(hcl.build(s, target="aocl")) diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index 9935d54a5..5b81fb30f 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -692,9 +692,13 @@ Expr Quantize::make(Expr body, Expr bitwidth) { return Expr(node); } -Stmt KernelDef::make(Array args, Array> api_args, Stmt body, Expr ret_void, Type ret_type, std::string name) { +Stmt KernelDef::make(Array args, Array> api_args, + Array api_types, Stmt body, Expr ret_void, + Type ret_type, std::string name, Array channels) { + internal_assert(api_args.size() == api_types.size()) << "KernelDef of unmatched args\n"; for (size_t i = 0; i < args.size(); i++) { internal_assert(args[i].defined()) << "KernelDef of undefined arg\n"; + internal_assert(api_types[i].defined()) << "KernelDef of undefined type\n"; for (size_t j = 0; j < api_args[i].size(); j++) { internal_assert(api_args[i][j].defined()) << "KernelDef of undefined shape\n"; } @@ -704,9 +708,11 @@ Stmt KernelDef::make(Array args, Array> api_args, Stmt body std::shared_ptr node = std::make_shared(); node->args = std::move(args); node->api_args = std::move(api_args); + node->api_types = std::move(api_types); node->body = std::move(body); node->ret_void = std::move(ret_void); node->ret_type = ret_type; + node->channels = std::move(channels); node->name = name; return Stmt(node); } diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index 327f32ca5..6a57471da 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -1050,20 +1050,28 @@ struct Quantize : public ExprNode { struct KernelDef : public StmtNode { Array args; Array> api_args; + Array api_types; Stmt body; Expr ret_void; Type ret_type; std::string name; + // args to stream data + Array channels; - EXPORT static Stmt make(Array args, Array> api_args, Stmt body, Expr ret_void, Type ret_type, std::string name); + EXPORT static Stmt make(Array args, Array> api_args, + Array api_types, Stmt body, Expr ret_void, + Type ret_type, std::string name, + Array channels); void VisitAttrs(IR::AttrVisitor* v) final { v -> Visit("args", &args); v -> Visit("api_args", &api_args); + v -> Visit("api_types", &api_types); v -> Visit("body", &body); v -> Visit("ret_void", &ret_void); v -> Visit("ret_type", &ret_type); v -> Visit("name", &name); + v -> Visit("channels", &channels); } static const IRNodeType _type_info = IRNodeType::KernelDef; static constexpr const char* _type_key = "KernelDef"; diff --git a/tvm/HalideIR/src/ir/IRMutator.cpp b/tvm/HalideIR/src/ir/IRMutator.cpp index a069d685a..fbd3e82b5 100644 --- a/tvm/HalideIR/src/ir/IRMutator.cpp +++ b/tvm/HalideIR/src/ir/IRMutator.cpp @@ -480,7 +480,8 @@ void IRMutator::visit(const KernelDef *op, const Stmt &s) { stmt = s; } else { - stmt = KernelDef::make(op->args, op->api_args, body, ret_void, op->ret_type, op->name); + stmt = KernelDef::make(op->args, op->api_args, op->api_types, + body, ret_void, op->ret_type, op->name, op->channels); } } diff --git a/tvm/HalideIR/src/ir/IRVisitor.cpp b/tvm/HalideIR/src/ir/IRVisitor.cpp index a38ae2fa4..30e1fe86b 100644 --- a/tvm/HalideIR/src/ir/IRVisitor.cpp +++ b/tvm/HalideIR/src/ir/IRVisitor.cpp @@ -273,6 +273,10 @@ void IRVisitor::visit(const Quantize *op, const Expr &) { void IRVisitor::visit(const KernelDef *op, const Stmt &) { for (size_t i = 0; i < op->args.size(); i++) { op->args[i].accept(this); + op->api_types[i].accept(this); + for (size_t j = 0; j < op->api_args[i].size(); j++) { + op->api_args[i][j].accept(this); + } } op->ret_void.accept(this); } @@ -581,6 +585,10 @@ void IRGraphVisitor::visit(const Quantize *op, const Expr &) { void IRGraphVisitor::visit(const KernelDef *op, const Stmt &) { for (size_t i = 0; i < op->args.size(); i++) { include(op->args[i]); + include(op->api_types[i]); + for (size_t j = 0; j < op->api_args[i].size(); j++) { + include(op->api_args[i][j]); + } } include(op->ret_void); } diff --git a/tvm/src/api/api_ir.cc b/tvm/src/api/api_ir.cc index 0411575cc..8edb1a0e8 100644 --- a/tvm/src/api/api_ir.cc +++ b/tvm/src/api/api_ir.cc @@ -176,6 +176,20 @@ TVM_REGISTER_API("make.Select") *ret = Node::make(args[0], args[1], args[2], args[3], args[4], args[5]); \ }) \ +#define REGISTER_MAKE7(Node) \ + TVM_REGISTER_API("make."#Node) \ + .set_body([](TVMArgs args, TVMRetValue *ret) { \ + *ret = Node::make(args[0], args[1], args[2], args[3], \ + args[4], args[5], args[6]); \ + }) \ + +#define REGISTER_MAKE8(Node) \ + TVM_REGISTER_API("make."#Node) \ + .set_body([](TVMArgs args, TVMRetValue *ret) { \ + *ret = Node::make(args[0], args[1], args[2], args[3], \ + args[4], args[5], args[6], args[7]); \ + }) \ + #define REGISTER_MAKE_BINARY_OP(Node) \ TVM_REGISTER_API("make."#Node) \ .set_body([](TVMArgs args, TVMRetValue *ret) { \ @@ -222,7 +236,7 @@ REGISTER_MAKE3(GetSlice); REGISTER_MAKE3(SetBit); REGISTER_MAKE4(SetSlice); REGISTER_MAKE2(Quantize); -REGISTER_MAKE6(KernelDef); +REGISTER_MAKE8(KernelDef); REGISTER_MAKE3(KernelExpr); REGISTER_MAKE2(KernelStmt); REGISTER_MAKE1(Return); diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index bade9a839..a588a08fb 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -1,9 +1,10 @@ -# include -# include -# include -# include -# include "./codegen_aocl.h" -# include "../../runtime/thread_storage_scope.h" +#include +#include +#include +#include +#include +#include "./codegen_aocl.h" +#include "../../runtime/thread_storage_scope.h" namespace TVM { namespace codegen { @@ -183,27 +184,107 @@ void CodeGenAOCL::VisitStmt_(const For* op) { void CodeGenAOCL::VisitExpr_(const StreamExpr* op, std::ostream& os) { std::string vid = GetVarID(op->buffer_var.get()); - os << vid << ".read()"; + switch (op->stream_type) { + case StreamType::Channel: + os << "read_channel_intel("; + os << vid << ")"; + break; + case StreamType::Pipe: + os << "read_pipe("; + break; + case StreamType::FIFO: + // buffered channel + break; + } +} + +void CodeGenAOCL::VisitStmt_(const KernelDef* op) { + LoweredFunc f; + SaveFuncState(f); + InitFuncState(f); + std::ostringstream save; + save << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + + // skip the first underscore + GetUniqueName("_"); + // add to alloc buffer : type. + for (const auto & k : op->args) { + RegisterHandleType(k.get(), k.get()->type); + } + PrintType(op->ret_type, stream); + stream << " " << op->name << "("; + + // create function signature + std::unordered_set inputs; + for (size_t j = 0; j < op->channels.size(); j++) { + inputs.insert(op->channels[j]); + } + for (size_t i = 0; i < op->args.size(); ++i) { + VarExpr v = op->args[i]; + var_shape_map_[v.get()] = op->api_args[i]; + std::string vid = AllocVarID(v.get()); + if (inputs.count(v)) { + // define channel out of scope + if (!stream_pragma) { + decl_stream << "#pragma OPENCL EXTENSION cl_intel_channels : enable\n"; + stream_pragma = true; + } + decl_stream << "channel "; + PrintExpr(op->api_types[i], decl_stream); + decl_stream << " " << vid << ";\n"; + } else { + if (i != 0) { + if (i == 1 && stream_pragma) void(0); + else stream << ", "; + } + PrintExpr(op->api_types[i], stream); + this->stream << " " << vid; + if (v.type().is_handle()) { + for (size_t j = 0; j < op->api_args[i].size(); j++) { + this->stream << '['; + this->PrintExpr(op->api_args[i][j], this->stream); + this->stream << ']'; + } + } + } + } + stream << ") {\n"; + int func_scope = BeginScope(); + range_ = CollectIterRange(op->body); + PrintStmt(op->body); + EndScope(func_scope); + stream << "}\n\n"; + + // restore default stream + module_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + this->stream << save.str(); + RestoreFuncState(f); } void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { std::string vid = GetVarID(op->buffer_var.get()); PrintIndent(); - stream << vid; switch (op->stream_type) { case StreamType::Channel: - stream << "[channel]"; - break; - case StreamType::FIFO: - stream << "[fifo]"; + stream << "write_channel_intel("; + stream << vid << ", ("; + PrintType(op->buffer_var.get()->type, stream); + stream << ") "; break; case StreamType::Pipe: - stream << "[pipe]"; + stream << "write_pipe("; + stream << vid << ", "; + break; + case StreamType::FIFO: + // buffered channel break; } - stream << ".write"; PrintExpr(op->value, stream); - stream << ";\n"; + stream << ");\n"; } } // namespace codegen diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 99f7bc395..7db1f2b64 100755 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -11,14 +11,17 @@ namespace codegen { class CodeGenAOCL : public CodeGenOpenCL { public: CodeGenAOCL(){} - // void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const For* op) override; - void VisitStmt_(const StreamStmt* op) override; + void VisitStmt_(const For* op) override; //NOLINT(*) + void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) + void VisitStmt_(const KernelDef* op) override; //NOLINT(*) void VisitExpr_(const StreamExpr* op, std::ostream& os) override; + + private: + bool stream_pragma{false}; }; } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index 734931e0c..0ecee8f16 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -110,7 +110,6 @@ void CodeGenOpenCL::PrintStorageScope( } } - std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { if (from == target) return value; std::ostringstream os; diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index ccc0c51f2..9dd69842d 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -40,7 +40,7 @@ class CodeGenOpenCL : public CodeGenC{ virtual void VisitStmt_(const For* op) = 0; protected: - // whether enable fp16 and fp64 extension + // fp16 and fp64 extension bool enable_fp16_{false}; bool enable_fp64_{false}; }; diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 7054eae4d..601e566f0 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -249,5 +249,30 @@ void CodeGenSDACCEL::VisitStmt_(const Partition* op) { } } +void CodeGenSDACCEL::VisitStmt_(const StreamStmt* op) { + std::string vid = GetVarID(op->buffer_var.get()); + PrintIndent(); + stream << vid; + switch (op->stream_type) { + case StreamType::Channel: + stream << "[channel]"; + break; + case StreamType::FIFO: + stream << "[fifo]"; + break; + case StreamType::Pipe: + stream << "[pipe]"; + break; + } + stream << ".write"; + PrintExpr(op->value, stream); + stream << ";\n"; +} + +void CodeGenSDACCEL::VisitExpr_(const StreamExpr* op, std::ostream& os) { + std::string vid = GetVarID(op->buffer_var.get()); + os << vid << ".read()"; +} + } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.h b/tvm/src/codegen/opencl/codegen_sdaccel.h index 10aabf218..4f1cfa053 100755 --- a/tvm/src/codegen/opencl/codegen_sdaccel.h +++ b/tvm/src/codegen/opencl/codegen_sdaccel.h @@ -1,32 +1,26 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - #ifndef TVM_CODEGEN_CODEGEN_SDACCEL_H_ #define TVM_CODEGEN_CODEGEN_SDACCEL_H_ # include # include -# include -// # include "./codeanalys_openclc.h" # include "./codegen_opencl.h" namespace TVM { namespace codegen { - class CodeGenSDACCEL : public CodeGenOpenCL { - public: - CodeGenSDACCEL(){} - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + public: + CodeGenSDACCEL(){} + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const For* op) override; //NOLINT(*) - void VisitStmt_(const Partition* op); //NOLINT(*) + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + void PrintStorageScope(const std::string& scope, std::ostream& os) override; //NOLINT(*) + void VisitStmt_(const For* op) override; //NOLINT(*) + void VisitStmt_(const Partition* op) override; //NOLINT(*) + void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; //NOLINT(*) }; } // namespace codegen diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 2ada9cc0d..3876f14d2 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -1,8 +1,3 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ - #include "./sdaccel_module.h" #include #include @@ -17,14 +12,11 @@ namespace runtime { namespace { - void PrintIndent(std::ofstream& stream, int indent) { for (int i = 0; i < indent; i++) stream << ' '; } - - inline size_t GetTypeSize(TVMType t) { size_t byte = (t.bits + 7) / 8; if (byte > 2){ @@ -35,8 +27,6 @@ inline size_t GetTypeSize(TVMType t) { return byte; } - - inline size_t GetDataSize(TVMArray* arr) { size_t size = 1; for (tvm_index_t i = 0; i < arr->ndim; ++i) { @@ -52,8 +42,6 @@ inline size_t GetDataSize(TVMArray* arr) { return size; } - - inline TVMType Type2TVMType(Type t) { TVMType tt; if (t.is_int()) tt.code = kDLInt; @@ -112,8 +100,6 @@ inline std::string Type2Str(TVMType t) { return str; } - - inline std::string Type2ExtStr(TVMType t) { std::string str = ""; if (t.code == kDLInt) { @@ -293,7 +279,6 @@ void PrintCopyBack(TVMArray* arr, } } - void GenMakFile() { int indent = 0; std::ofstream stream; @@ -342,7 +327,6 @@ void GenMakFile() { stream << "include ${COMMON_DIR}/common.mk\n"; stream.close(); - } void GenCommonFile() { @@ -428,8 +412,6 @@ void GenCommonFile() { stream.close(); } - - void GenHostCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, @@ -685,7 +667,6 @@ void GenHostCode(TVMArgs& args, } // namespace - class SDAccelModuleNode final : public ModuleNode { public: SDAccelModuleNode(LoweredFunc func, std::string test_file) @@ -733,10 +714,8 @@ class SDAccelModuleNode final : public ModuleNode { std::string test_file_; }; -Module CreateSDAccelModule( - LoweredFunc func, - std::string code) { - +Module CreateSDAccelModule(LoweredFunc func, + std::string code) { std::shared_ptr n = std::make_shared(func, code); @@ -744,4 +723,4 @@ Module CreateSDAccelModule( } } // namespace runtime -} // namespace TVM \ No newline at end of file +} // namespace TVM diff --git a/tvm/src/pass/ir_mutator.cc b/tvm/src/pass/ir_mutator.cc index a63889e78..89485e723 100644 --- a/tvm/src/pass/ir_mutator.cc +++ b/tvm/src/pass/ir_mutator.cc @@ -330,7 +330,8 @@ Stmt IRMutator::Mutate_(const KernelDef *op, const Stmt &s) { if (body.same_as(op->body) && ret_void.same_as(op->ret_void)) { return s; } else { - return KernelDef::make(op->args, op->api_args, body, ret_void, op->ret_type, op->name); + return KernelDef::make(op->args, op->api_args, op->api_types, + body, ret_void, op->ret_type, op->name, op->channels); } } diff --git a/tvm/src/schedule/compute_primitive.cc b/tvm/src/schedule/compute_primitive.cc index 648f48aad..3b3fa9734 100644 --- a/tvm/src/schedule/compute_primitive.cc +++ b/tvm/src/schedule/compute_primitive.cc @@ -149,6 +149,7 @@ class LoopFuser final : public IRMutator { class StreamConsumer final : public IRMutator { public: + VarExpr stream_data; StreamConsumer( const std::string& target, const ir::StreamType& type) @@ -159,11 +160,13 @@ class StreamConsumer final : public IRMutator { Expr index = op->index; std::string target_name = op->buffer_var.get()->name_hint; if (has_suffix(target_name, "." + target_)) { + stream_data = op->buffer_var; return StreamExpr::make(op->type, op->buffer_var, type_, 10); } else { return Load::make(op->type, op->buffer_var, index, op->predicate); } } + private: const std::string target_; const ir::StreamType type_; @@ -175,6 +178,7 @@ class StreamConsumer final : public IRMutator { class StreamProducer final : public IRMutator { public: + VarExpr stream_data; StreamProducer( const std::string& target, const ir::StreamType& type) @@ -186,6 +190,7 @@ class StreamProducer final : public IRMutator { Expr value = this->Mutate(op->value); std::string target_name = op->buffer_var.get()->name_hint; if (has_suffix(target_name, "." + target_)) { + stream_data = op->buffer_var; return StreamStmt::make(op->buffer_var, value, type_, 10); } else { return Store::make(op->buffer_var, value, index, op->predicate); @@ -243,6 +248,38 @@ class LoopReorderer final : public IRMutator { } }; +class KernelUpdater final : public IRMutator { + public: + KernelUpdater( + const std::string& target, + const ir::StreamType& type, + const bool is_producer) + : target_(target), type_(type), is_producer_(is_producer){} + + Stmt Mutate_(const KernelDef* op, const Stmt& s) { + // mutate target load + Stmt stmt = op->body; + Array arr; + if (is_producer_) { + StreamProducer mutator(target_, type_); + stmt = mutator.Mutate(stmt); + arr.push_back(mutator.stream_data); + } else { // replace load consumer + StreamConsumer mutator(target_, type_); + stmt = mutator.Mutate(stmt); + arr.push_back(mutator.stream_data); + } + // update kernel arg signature + return KernelDef::make(op->args, op->api_args, + op->api_types, stmt, op->ret_void, + op->ret_type, op->name, arr); + } + private: + const std::string target_; + const ir::StreamType type_; + const bool is_producer_; +}; + class IterVarAttrUpdater final : public IRMutator { public: IterVarAttrUpdater(const IterVar& var, const IterVarAttrNode* node) @@ -561,7 +598,7 @@ Stmt StreamFromProducer(Stmt& stmt, Buffer& producer_buf, ir::StreamType& type) { std::string target_name = producer_buf.operator->()->name; - StreamProducer mutator(target_name, type); + KernelUpdater mutator(target_name, type, true); stmt = mutator.Mutate(stmt); return stmt; } @@ -570,7 +607,7 @@ Stmt StreamToConsumer(Stmt& stmt, Buffer& producer_buf, ir::StreamType& type) { std::string target_name = producer_buf.operator->()->name; - StreamConsumer mutator(target_name, type); + KernelUpdater mutator(target_name, type, false); stmt = mutator.Mutate(stmt); return stmt; } From 786ccb747a161fa0156283fc047ee9e8ddca4898 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 17 Sep 2019 22:37:17 -0400 Subject: [PATCH 076/103] [fix] aocl type conversion --- samples/stream/example.cl | 34 ++++++++++++++++++++++++ samples/stream/stream.py | 6 ++++- tvm/src/codegen/opencl/codegen_aocl.cc | 36 +++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 samples/stream/example.cl diff --git a/samples/stream/example.cl b/samples/stream/example.cl new file mode 100644 index 000000000..c0cc25535 --- /dev/null +++ b/samples/stream/example.cl @@ -0,0 +1,34 @@ +#include "ihc_apint.h" +#pragma OPENCL EXTENSION cl_intel_channels : enable +channel int ret_add_c; +channel int ret_mul_c; +__kernel int ret_add(int ret_add_a[10][20], int ret_add_b[10][20]) { + for (int i = 0; i < 10; ++i) { + for (int i1 = 0; i1 < 20; ++i1) { + write_channel_intel(ret_add_c, (void*) ((int)(((int33_t)ret_add_a[(i1 + (i * 20))]) + ((int33_t)ret_add_b[(i1 + (i * 20))])))); + } + } +} + +__kernel int ret_mul(int ret_mul_d[10][20], int ret_mul_e[10][20]) { + for (int i = 0; i < 10; ++i) { + for (int i1 = 0; i1 < 20; ++i1) { + ret_mul_e[(i1 + (i * 20))] = ((int)(((long)read_channel_intel(ret_mul_c)) * ((long)ret_mul_d[(i1 + (i * 20))]))); + } + } +} + +__kernel void default_function(__global int* restrict a, __global int* restrict b, __global int* restrict c, __global int* restrict d, __global int* restrict e) { + int ret_add; + int ret_mul; + for (int x = 0; x < 10; ++x) { + for (int y = 0; y < 20; ++y) { + c[(y + (x * 20))] = 0; + } + } + int ret_add0; + ret_add(a, b, c); + int ret_mul0; + ret_mul(c, d, e); +} + diff --git a/samples/stream/stream.py b/samples/stream/stream.py index 3f36162af..e83e88a7d 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -53,5 +53,9 @@ def ret_mul(c, d, e): # print(add_mul.ret_mul._buf, c._buf) print(hcl.lower(s)) -print(hcl.build(s, target="aocl")) +code = hcl.build(s, target="aocl") + +with open("example.cl", "w") as f: + f.write(code) + f.close() diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index a588a08fb..52643fb64 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -9,6 +9,33 @@ namespace TVM { namespace codegen { +inline Type String2Type(std::string& s) { + if (s.front() == '\"' && s.back() == '\"') { + s.erase(0, 1); + s.pop_back(); + } + std::istringstream is(s); + halideir_type_code_t code = Type::Int; + if (s.substr(0, 3) == "int") { + code = Type::Int; s = s.substr(3); + } else if (s.substr(0, 4) == "uint") { + code = Type::UInt; s = s.substr(4); + } else if (s.substr(0, 5) == "float") { + code = Type::Float; s = s.substr(5); + } else if (s.substr(0, 5) == "float") { + code = Type::Float; s = s.substr(5); + } else if (s == "handle") { + return Handle(); + } else { + LOG(FATAL) << "unknown type " << s; + } + int bits = 32, lanes = 1; + if (sscanf(s.c_str(), "%dx%d", &bits, &lanes) == 0) { + LOG(FATAL) << "unknown type " << s; + } + return Type(code, bits, lanes); +} + void CodeGenAOCL::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Clear previous generated state @@ -27,7 +54,7 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, RegisterHandleType(kv.first.get(), kv.second.type()); } - this->stream << "#include \"ihc_apint.h\"" << "\n"; + this->decl_stream << "#include \"ihc_apint.h\"" << "\n"; this->stream << "__kernel " << "void " << f->name << "("; // Write arguments @@ -213,6 +240,7 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { for (const auto & k : op->args) { RegisterHandleType(k.get(), k.get()->type); } + stream << "__kernel "; PrintType(op->ret_type, stream); stream << " " << op->name << "("; @@ -232,14 +260,16 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { stream_pragma = true; } decl_stream << "channel "; - PrintExpr(op->api_types[i], decl_stream); + std::string str = PrintExpr(op->api_types[i]); + PrintType(String2Type(str), decl_stream); decl_stream << " " << vid << ";\n"; } else { if (i != 0) { if (i == 1 && stream_pragma) void(0); else stream << ", "; } - PrintExpr(op->api_types[i], stream); + std::string str = PrintExpr(op->api_types[i]); + PrintType(String2Type(str), stream); this->stream << " " << vid; if (v.type().is_handle()) { for (size_t j = 0; j < op->api_args[i].size(); j++) { From 1ec3fcd35635f2f984c11c0635d1b04dd15ec98a Mon Sep 17 00:00:00 2001 From: Hecmay Date: Wed, 18 Sep 2019 19:46:08 -0400 Subject: [PATCH 077/103] [fix] aocl channel syntax --- samples/stream/example.cl | 10 ++-- tvm/src/codegen/hlsc/codegen_hlsc.h | 24 +++++----- tvm/src/codegen/opencl/codegen_aocl.cc | 65 +++++++++++++++++++------- tvm/src/codegen/opencl/codegen_aocl.h | 5 +- 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/samples/stream/example.cl b/samples/stream/example.cl index c0cc25535..fa3cfbd81 100644 --- a/samples/stream/example.cl +++ b/samples/stream/example.cl @@ -2,15 +2,15 @@ #pragma OPENCL EXTENSION cl_intel_channels : enable channel int ret_add_c; channel int ret_mul_c; -__kernel int ret_add(int ret_add_a[10][20], int ret_add_b[10][20]) { +__kernel void ret_add(__global int* restrict ret_add_a, __global int* restrict ret_add_b) { for (int i = 0; i < 10; ++i) { for (int i1 = 0; i1 < 20; ++i1) { - write_channel_intel(ret_add_c, (void*) ((int)(((int33_t)ret_add_a[(i1 + (i * 20))]) + ((int33_t)ret_add_b[(i1 + (i * 20))])))); + write_channel_intel(ret_add_c, ((int)(((int33_t)ret_add_a[(i1 + (i * 20))]) + ((int33_t)ret_add_b[(i1 + (i * 20))])))); } } } -__kernel int ret_mul(int ret_mul_d[10][20], int ret_mul_e[10][20]) { +__kernel void ret_mul(__global int* restrict ret_mul_d, __global int* restrict ret_mul_e) { for (int i = 0; i < 10; ++i) { for (int i1 = 0; i1 < 20; ++i1) { ret_mul_e[(i1 + (i * 20))] = ((int)(((long)read_channel_intel(ret_mul_c)) * ((long)ret_mul_d[(i1 + (i * 20))]))); @@ -27,8 +27,8 @@ __kernel void default_function(__global int* restrict a, __global int* restrict } } int ret_add0; - ret_add(a, b, c); + ret_add(a, b); int ret_mul0; - ret_mul(c, d, e); + ret_mul(d, e); } diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.h b/tvm/src/codegen/hlsc/codegen_hlsc.h index 9403d9cff..c3dd0740d 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.h +++ b/tvm/src/codegen/hlsc/codegen_hlsc.h @@ -16,22 +16,20 @@ namespace TVM { namespace codegen { class CodeGenHLSC : public CodeGenC { - public: - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + public: + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void VisitExpr_(const Min* op, std::ostream& os) override; - void VisitExpr_(const Max* op, std::ostream& os) override; + void VisitExpr_(const Min* op, std::ostream& os) override; + void VisitExpr_(const Max* op, std::ostream& os) override; - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const IfThenElse* op) override; - void VisitStmt_(const Allocate* op) override; + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const IfThenElse* op) override; + void VisitStmt_(const Allocate* op) override; - void GenForStmt(const For* op, std::string pragma, bool before); - - // std::map > var_shape_map_; - // std::unordered_map range_; - protected: - std::string GetBufferRef(Type t, const Variable* buffer, Expr index); + void GenForStmt(const For* op, std::string pragma, bool before); + + protected: + std::string GetBufferRef(Type t, const Variable* buffer, Expr index); }; } // namespace codegen diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 52643fb64..ecb706415 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -70,7 +70,6 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, else { auto arg = map_arg_type[vid]; this->stream << "__global "; - // this->stream << "global "; PrintType(std::get<1>(arg), this->stream); if (v.type().is_handle()) this->stream << "*"; @@ -241,19 +240,22 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { RegisterHandleType(k.get(), k.get()->type); } stream << "__kernel "; - PrintType(op->ret_type, stream); + const UIntImm* is_void = op->ret_void.as(); + if (is_void) stream << "void"; + else PrintType(op->ret_type, stream); stream << " " << op->name << "("; // create function signature - std::unordered_set inputs; + std::unordered_set stream_vars; for (size_t j = 0; j < op->channels.size(); j++) { - inputs.insert(op->channels[j]); + stream_vars.insert(op->channels[j]); + stream_exprs.insert(op->channels[j].get()->name_hint); } for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; var_shape_map_[v.get()] = op->api_args[i]; std::string vid = AllocVarID(v.get()); - if (inputs.count(v)) { + if (stream_vars.count(v)) { // define channel out of scope if (!stream_pragma) { decl_stream << "#pragma OPENCL EXTENSION cl_intel_channels : enable\n"; @@ -268,16 +270,10 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { if (i == 1 && stream_pragma) void(0); else stream << ", "; } + this->stream << "__global "; std::string str = PrintExpr(op->api_types[i]); PrintType(String2Type(str), stream); - this->stream << " " << vid; - if (v.type().is_handle()) { - for (size_t j = 0; j < op->api_args[i].size(); j++) { - this->stream << '['; - this->PrintExpr(op->api_args[i][j], this->stream); - this->stream << ']'; - } - } + this->stream << "* restrict " << vid; } } stream << ") {\n"; @@ -295,15 +291,52 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { RestoreFuncState(f); } +void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { + PrintIndent(); + stream << op->name << "("; + bool arg_flag = false; + for (size_t i = 0; i < op->args.size(); i++) { + std::string str = op->name + "." + PrintExpr(op->args[i]); + if (stream_exprs.count(str)) { + arg_flag = true; + } else { + if (i != 0) { + if (i == 1 && arg_flag) void(0); + else stream << ", "; + arg_flag = false; + } + PrintExpr(op->args[i], stream); + } + } + stream << ");\n"; +} + +void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) + os << op->name << "("; + bool arg_flag = false; + for (size_t i = 0; i < op->args.size(); ++i) { + std::string str = op->name + "." + PrintExpr(op->args[i]); + if (stream_exprs.count(str)) { + arg_flag = true; + } else { + if (i != 0) { + if (i == 1 && arg_flag) void(0); + else stream << ", "; + arg_flag = false; + } + PrintExpr(op->args[i], stream); + } + } + os << ")"; +} + void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { std::string vid = GetVarID(op->buffer_var.get()); PrintIndent(); switch (op->stream_type) { case StreamType::Channel: stream << "write_channel_intel("; - stream << vid << ", ("; - PrintType(op->buffer_var.get()->type, stream); - stream << ") "; + stream << vid << ", "; break; case StreamType::Pipe: stream << "write_pipe("; diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 7db1f2b64..9f3d8c379 100755 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -17,11 +17,14 @@ class CodeGenAOCL : public CodeGenOpenCL { void VisitStmt_(const For* op) override; //NOLINT(*) void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) void VisitStmt_(const KernelDef* op) override; //NOLINT(*) + void VisitStmt_(const KernelStmt* op) override; //NOLINT(*) - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const KernelExpr* op, std::ostream& os) override; //NOLINT(*) private: bool stream_pragma{false}; + std::unordered_set stream_exprs; }; } // namespace codegen } // namespace TVM From 1ab0c8c1e062107ea65998d5f24dc420bc37b61d Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 1 Oct 2019 11:28:49 -0400 Subject: [PATCH 078/103] [add] sch.stream_to --- python/heterocl/api.py | 5 +- python/heterocl/schedule.py | 25 +- python/heterocl/tvm/build_module.py | 9 + python/heterocl/tvm/schedule.py | 67 ++-- samples/stream/stream.py | 39 +- tvm/HalideIR/src/ir/Expr.h | 7 + tvm/HalideIR/src/ir/IREquality.cpp | 6 + tvm/include/tvm/ir.h | 1 + tvm/include/tvm/schedule.h | 32 +- tvm/src/api/api_lang.cc | 37 +- tvm/src/codegen/codegen_c.cc | 2 + tvm/src/codegen/codegen_source_base.h | 2 + tvm/src/codegen/merlinc/codeanalys_merlinc.cc | 5 + tvm/src/codegen/merlinc/codeanalys_merlinc.h | 2 + tvm/src/codegen/opencl/codegen_opencl.cc | 1 + tvm/src/schedule/schedule_dataflow_rewrite.cc | 356 +++++++++++++++++- tvm/src/schedule/schedule_lang.cc | 39 -- 17 files changed, 500 insertions(+), 135 deletions(-) diff --git a/python/heterocl/api.py b/python/heterocl/api.py index 7843b2698..b8fd6bb4d 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -12,7 +12,7 @@ from . import types from . import config -def init(init_dtype="int32", place="intel_fpga"): +def init(init_dtype="int32", place="cpu_riscv"): """Initialize a HeteroCL environment with configurations. This API must be called each time the users write an application. @@ -89,8 +89,7 @@ def placeholder(shape, name=None, dtype=None, place=None): """ name = util.get_name("placeholder", name) dtype = util.get_dtype(dtype) - place = util.get_device(place) - + if shape == (): return Scalar(tvm_api._Var(name, dtype)) tensor = Tensor(shape, dtype, name) diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index f4f5e6b5a..4ff0b804b 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -135,7 +135,8 @@ def reuse_at(self, target, parent, axis, name=None): name = target.name + ".reuse" return self.sch.reuse_at(target, parent, axis, name) - def to(self, tensors, place=_expr.StreamExpr.FIFO): + def stream_to(self, tensors, dst, src=None, + stream_type=_expr.StreamExpr.FIFO, depth=10, name=None): """Stream a list of Tensors to dst devices Parameters @@ -143,21 +144,31 @@ def to(self, tensors, place=_expr.StreamExpr.FIFO): tensors : list of Tensor The tensors to be moved + dst : device or module + The tensors to be moved + stream_type : {FIFO, Channel, Burst}, optional The stream type """ - if place > 2: - raise APIError("Invalid device type") + if stream_type > 2: + raise APIError("Invalid channel type") rets = [] + if not isinstance(tensors, list): + tensors = [tensors] for tensor in tensors: try: - target = target.tensor + target = tensor.tensor except (AttributeError, ValueError): try: - target = target._op + target = tensor._op except AttributeError: - pass - rets.append(self.sch.stream(tensor, place)) + target = tensor + if name is None: + name = target.name + ".stream" + ret = self.sch.stream_to(target, dst, src, + stream_type, depth, name) + name = None + rets.append(ret) return rets def partition(self, target, partition_type=_stmt.Partition.Complete, dim=0, factor=0): diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 413f7a1d9..20eb6aecb 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -319,6 +319,10 @@ def lower(sch, """ binds, arg_list = get_binds(args, binds) cfg = BuildConfig.current + try: + remove_args = sch.remove_args + except: + remove_args = [] add_lower_pass = cfg.add_lower_pass if cfg.add_lower_pass else [] lower_phase0 = [x[1] for x in add_lower_pass if x[0] == 0] lower_phase1 = [x[1] for x in add_lower_pass if x[0] == 1] @@ -368,6 +372,10 @@ def lower(sch, return stmt if kernel_only: + for tensor in remove_args: + for arg in args: + if str(arg) == str(tensor): + args.remove(arg) return ir_pass.MakeKernelAPI(stmt, name, arg_list) else: return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func) @@ -405,6 +413,7 @@ def build_fpga_kernel(sch, args, target_name, name="default_function"): BuildConfig.current = build_config(generate_reuse_buffer=False) else: BuildConfig.current = build_config() + flist = lower(sch, args, kernel_only=True, name=name) if isinstance(flist, container.LoweredFunc): flist = [flist] diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 8183ea5b2..1071b1a8f 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -333,8 +333,45 @@ def reuse_at(self, target, parent, axis, name): def partition(self, target, partition_type, dim, factor): return _api_internal._SchedulePartition(self, target, dim, factor, partition_type) - def stream_to(self, tensor, stream_type): - return _api_internal._ScheduleStream(self, tensor,stream_type) + def stream_to(self, tensor, dst, src, + types=_expr.StreamExpr.Channel, + depth=10, name=None): + """ Stream data to devices or on-chip module + + Parameters + ---------- + tensor : list of Tensors + Tensor to be streamed. + dst : hcl device or dst stage + The device or module for streaming + type : channel type + The streaming type (e.g. fifo or pipe) + + Returns + ------- + outer : IterVar + The outer variable of iteration. + """ + # create producer and consumer for stream + if isinstance(dst, Device): + dst = 1 if 'FPGA' in str(dst) else 0 + return _api_internal._ScheduleMove(self, tensor, dst, + types, depth, name) + else: # connect kernel + assert isinstance(dst, _Stage), "dst not a stage " + if src: # remove buffer between kernels + assert isinstance(src, _Stage), \ + "destination should be a stage but " + str(type(src)) + try: + self.remove_args.append(tensor.op.output(0)) + except: + self.remove_args = [] + self.remove_args.append(tensor.op.output(0)) + _api_internal._ScheduleStream(self, tensor, dst, src, + types, depth, name) + else: # from externop buffer to kernel + _api_internal._ScheduleMoveToStage(self, tensor, dst, + types, depth, name) @register_node("Stage") class _Stage(NodeBase): @@ -616,32 +653,6 @@ def pipeline(self, var, initiation_interval=1): def stencil(self, burst_width=512, unroll_factor=1, num_iteration=1): _api_internal._StageStencil(self, burst_width, unroll_factor, num_iteration) - def stream_to(self, dst, src=None, types=_expr.StreamExpr.Channel, depth=10): - """Stream variables between modules and devices - - Create and return buffer for inter device data movement - Void return for inter module - - - Parameters - ---------- - dst : hcl device or dst stage - The device or module for streaming - src : hcl source module - The source module producing output - type : channel type - The streaming type (e.g. fifo or pipe) - """ - - if src: # inter-module move - assert isinstance(src, _Stage), \ - "only support device / stage" - _api_internal._StageStream(self, dst, src, types, depth) - else: # return device buffer - assert isinstance(dst, Device), \ - "missing src stage or wrong device" - # return _api_internal._Stage - def pragma(self, var, pragma_type): """Annotate the iteration with pragma diff --git a/samples/stream/stream.py b/samples/stream/stream.py index e83e88a7d..12e760d30 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -1,18 +1,12 @@ import heterocl as hcl -# hcl.init(place=hcl.CPU("riscv")) -hcl.init(place=hcl.FPGA("intel")) +# run on cpu by default +hcl.init(place=hcl.CPU("riscv")) initiation_interval = 4 a = hcl.placeholder((10, 20), name="a") b = hcl.placeholder((10, 20), name="b") - -# auto-alloc empty buffer on fpga -# c = hcl.placeholder((10, 20), name="c", -# place=hcl.FPGA("intel")) -c = hcl.compute((10, 20), lambda x, y: 0, - name = "c") - +c = hcl.placeholder((10, 20), name="c") d = hcl.placeholder((10, 20), name="d") e = hcl.placeholder((10, 20), name="e") @@ -38,24 +32,27 @@ def ret_mul(c, d, e): # op1 = add_mul.ret_add.c # op2 = add_mul.ret_mul.c # s[op1].pipeline(op1.axis[0], initiation_interval) -s.partition(b, dim=2, factor=2) # stream into modules / device -# a0, b0 = s.stream_to([a, b], hcl.FPGA("intel")) -# s.stream_to([a0, b0], add_mul.ret_add) +a0, b0 = s.stream_to([a, b], hcl.FPGA("intel")) +d0 = s.stream_to(d, hcl.FPGA('intel')) +#s.partition(b0, dim=2, factor=2) +s.stream_to([a0, b0], s[add_mul.ret_add]) +s.stream_to(d0, s[add_mul.ret_mul]) # within device move producer to consumer -s[c].stream_to(s[add_mul.ret_add], - s[add_mul.ret_mul]) +s.stream_to(c, s[add_mul.ret_mul], + s[add_mul.ret_add], depth=10) -# return buffer for inter-device move -# d0 = s[d].stream_to(hcl.FPGA('intel')) +# return tensor for inter-device move +e0 = s.stream_to(e, hcl.CPU('riscv')) # print(add_mul.ret_mul._buf, c._buf) print(hcl.lower(s)) -code = hcl.build(s, target="aocl") - -with open("example.cl", "w") as f: - f.write(code) - f.close() +# code = hcl.build(s, target="aocl") +# print(code) +# +# with open("example.cl", "w") as f: +# f.write(code) +# f.close() diff --git a/tvm/HalideIR/src/ir/Expr.h b/tvm/HalideIR/src/ir/Expr.h index 769dc8472..4b70d51fc 100644 --- a/tvm/HalideIR/src/ir/Expr.h +++ b/tvm/HalideIR/src/ir/Expr.h @@ -312,6 +312,13 @@ enum class StreamType : int { FIFO = 2 }; +/** An enum class for device type */ +enum class DeviceType : int { + CPU = 0, + FPGA = 1, + GPU = 2 +}; + /** A reference-counted handle to a statement node. */ struct Stmt : public IRHandle { Stmt() : IRHandle() {} diff --git a/tvm/HalideIR/src/ir/IREquality.cpp b/tvm/HalideIR/src/ir/IREquality.cpp index 9e5798fbb..46590056e 100644 --- a/tvm/HalideIR/src/ir/IREquality.cpp +++ b/tvm/HalideIR/src/ir/IREquality.cpp @@ -80,6 +80,7 @@ class IRComparer : public IRVisitor { void visit(const Call *, const Expr &); void visit(const Let *, const Expr &); void visit(const Shuffle *, const Expr &); + void visit(const StreamExpr *, const Expr &); void visit(const LetStmt *, const Stmt &); void visit(const AttrStmt *, const Stmt &); void visit(const AssertStmt *, const Stmt &); @@ -488,6 +489,11 @@ void IRComparer::visit(const Shuffle *op, const Expr &expr) { compare_expr_vector(e->indices, op->indices); } +void IRComparer::visit(const StreamExpr *op, const Expr &expr) { + const StreamExpr *node = expr_.as(); + compare_node_refs(op->buffer_var, node->buffer_var); +} + } // namespace diff --git a/tvm/include/tvm/ir.h b/tvm/include/tvm/ir.h index 2dd6b86ce..702530f69 100644 --- a/tvm/include/tvm/ir.h +++ b/tvm/include/tvm/ir.h @@ -22,6 +22,7 @@ using Halide::Internal::IRNodeType; using Halide::Internal::ForType; using Halide::Internal::PartitionType; using Halide::Internal::StreamType; +using Halide::Internal::DeviceType; using Halide::DeviceAPI; // Node container for CommReducer diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index 38629dfdc..e56085853 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -209,15 +209,6 @@ class Stage : public NodeRef { * \return reference to self. */ EXPORT Stage& pipeline(IterVar var, const Expr& initiation_interval); // NOLINT(*) - /*! - * \brief create stream data channel. - * \param target The data streaming consumer. - * \param stream_type The data streaming channel type. - * \param depth The channel depth. - * \return reference to self. - */ - EXPORT Stage& stream(Stage dest, Stage source, - ir::StreamType stream_type, int depth); // NOLINT(*) EXPORT Stage& stencil(int burst_width, int unroll_factor, int num_iteration); // NOLINT(*) /*! @@ -360,11 +351,30 @@ class Schedule : public NodeRef { const IterVar& axis, int factor_axis = 0); - EXPORT Tensor reuse_at(const Tensor& target, - Stage parent, + EXPORT Tensor reuse_at(const Tensor& target, + Stage parent, IterVar axis, std::string name); + EXPORT void to_stage(const Tensor& target, + Stage dest, + ir::StreamType stream_type, + int channel_depth, + std::string name); + + EXPORT Tensor move_to(const Tensor& target, + ir::DeviceType device_type, + ir::StreamType stream_type, + int channel_depth, + std::string new_name); + + EXPORT void stream_to(const Tensor& target, + Stage dest, + Stage source, + ir::StreamType stream_type, + int channel_depth, + std::string new_name); + EXPORT Tensor partition(const Tensor& target, int dim, int factor, ir::PartitionType partition_type); diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index 428d02e29..096f902cb 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -307,13 +307,6 @@ TVM_REGISTER_API("_StageFuse") *ret = fused; }); -TVM_REGISTER_API("_StageStream") -.set_body([](TVMArgs args, TVMRetValue* ret) { - args[0].operator Stage() - .stream(args[1], args[2], - static_cast(args[3].operator int()), args[4]); - }); - TVM_REGISTER_API("_StageComputeAt") .set_body([](TVMArgs args, TVMRetValue* ret) { args[0].operator Stage() @@ -468,12 +461,30 @@ TVM_REGISTER_API("_SchedulePartition") static_cast(args[4].operator int())); }); -// TVM_REGISTER_API("_ScheduleStream") -// .set_body([](TVMArgs args, TVMRetValue *ret) { -// *ret = args[0].operator Schedule() -// .stream(args[1], -// static_cast(args[2].operator int())); -// }); +TVM_REGISTER_API("_ScheduleMoveToStage") + .set_body([](TVMArgs args, TVMRetValue *ret) { + args[0].operator Schedule() + .to_stage(args[1], args[2], + static_cast(args[3].operator int()), + args[4], args[5]); + }); + +TVM_REGISTER_API("_ScheduleMove") + .set_body([](TVMArgs args, TVMRetValue *ret) { + *ret = args[0].operator Schedule() + .move_to(args[1], + static_cast(args[2].operator int()), + static_cast(args[3].operator int()), + args[4], args[5]); + }); + +TVM_REGISTER_API("_ScheduleStream") + .set_body([](TVMArgs args, TVMRetValue *ret) { + args[0].operator Schedule() + .stream_to(args[1], args[2], args[3], + static_cast(args[4].operator int()), + args[5], args[6]); + }); TVM_REGISTER_API("_ScheduleReshape") .set_body([](TVMArgs args, TVMRetValue *ret) { diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 02b07f64d..8d40c7557 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -70,6 +70,8 @@ void CodeGenC::AddFunction(LoweredFunc f) { } std::string CodeGenC::Finish() { + // std::ofstream fstream; + // fstream.open("host.cpp"); return decl_stream.str() + module_stream.str() + stream.str(); } diff --git a/tvm/src/codegen/codegen_source_base.h b/tvm/src/codegen/codegen_source_base.h index 6700550d3..04e639b14 100644 --- a/tvm/src/codegen/codegen_source_base.h +++ b/tvm/src/codegen/codegen_source_base.h @@ -95,6 +95,8 @@ class CodeGenSourceBase { std::ostringstream stream; /*! \brief the stream for mocule */ std::ostringstream module_stream; + /*! \brief the stream for host */ + std::ostringstream host_stream; /*! \brief name of each variable */ std::unordered_map var_idmap_; /*! \brief Save states as copy */ diff --git a/tvm/src/codegen/merlinc/codeanalys_merlinc.cc b/tvm/src/codegen/merlinc/codeanalys_merlinc.cc index 3bd835783..d6fa1c6ba 100644 --- a/tvm/src/codegen/merlinc/codeanalys_merlinc.cc +++ b/tvm/src/codegen/merlinc/codeanalys_merlinc.cc @@ -652,6 +652,9 @@ void CodeAnalysMerlinC::VisitExpr_(const Broadcast* op, std::ostream& os) { // LOG(FATAL) << "Broadcast: not supported "; } +void CodeAnalysMerlinC::VisitExpr_(const StreamExpr* op, std::ostream& os) { // NOLINT(*) +} + void CodeAnalysMerlinC::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT(*) os << "("; PrintExpr(op->condition, os); @@ -913,6 +916,8 @@ void CodeAnalysMerlinC::VisitStmt_(const Reuse *op) { void CodeAnalysMerlinC::VisitStmt_(const Partition *op) {} +void CodeAnalysMerlinC::VisitStmt_(const StreamStmt *op) {} + void CodeAnalysMerlinC::VisitStmt_(const Stencil *op) { PrintStmt(op->body); } diff --git a/tvm/src/codegen/merlinc/codeanalys_merlinc.h b/tvm/src/codegen/merlinc/codeanalys_merlinc.h index 6ba082f09..421f0d96f 100644 --- a/tvm/src/codegen/merlinc/codeanalys_merlinc.h +++ b/tvm/src/codegen/merlinc/codeanalys_merlinc.h @@ -112,6 +112,7 @@ class CodeAnalysMerlinC : void VisitExpr_(const SetSlice* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const Quantize* op, std::ostream& os) override; // NOLINT(*) void VisitExpr_(const KernelExpr* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; // NOLINT(*) // statment void VisitStmt_(const LetStmt* op) override; void VisitStmt_(const Store* op) override; @@ -131,6 +132,7 @@ class CodeAnalysMerlinC : void VisitStmt_(const Reuse* op) override; void VisitStmt_(const Partition* op) override; void VisitStmt_(const Stencil* op) override; + void VisitStmt_(const StreamStmt* op) override; /*! * Print Type represetnation of type t. * \param t The type representation. diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index 0ecee8f16..ad6fd9556 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -2,6 +2,7 @@ # include # include # include +# include # include # include "./codegen_opencl.h" # include "../../runtime/thread_storage_scope.h" diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 69d701cbd..aeefddf3f 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "./message_passing.h" #include "../pass/ir_util.h" @@ -43,6 +44,17 @@ class VarReplacer : public ir::IRMutator { const std::unordered_map& vsub_; }; +// create indices for store +Expr getIndex(std::vector indices, const Array shape) { + Expr ret = indices[0]; + Expr mul = 1; + for (size_t i = 1; i < indices.size(); i++) { + mul = Simplify(mul * shape[i]); + ret = Simplify(ret + indices[i] * mul); + } + return ret; +} + Expr InjectPredicate(const Array& predicates, Expr body) { using ir::Reduce; @@ -74,6 +86,97 @@ void ReplaceDataFlow(const Array& stages, } } +class StreamConsumer final : public IRMutator { + public: + VarExpr stream_data; + StreamConsumer( + const std::string& target, + const ir::StreamType& type) + : target_(target), type_(type) {} + + // Replace with StreamExpr e.g. var.read(op. index) + Expr Mutate_(const Load* op, const Expr& e) { + Expr index = op->index; + std::string target_name = op->buffer_var.get()->name_hint; + if (has_suffix(target_name, "." + target_)) { + stream_data = op->buffer_var; + return StreamExpr::make(op->type, op->buffer_var, type_, 10); + } else { + return Load::make(op->type, op->buffer_var, index, op->predicate); + } + } + + private: + const std::string target_; + const ir::StreamType type_; + bool has_suffix(const std::string &str, const std::string &suffix) { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; + } +}; + +class StreamProducer final : public IRMutator { + public: + VarExpr stream_data; + StreamProducer( + const std::string& target, + const ir::StreamType& type) + : target_(target), type_(type) {} + + // Replace with StreamStmt e.g. var.write(value) + Stmt Mutate_(const Store* op, const Stmt& s) { + Expr index = op->index; + Expr value = this->Mutate(op->value); + std::string target_name = op->buffer_var.get()->name_hint; + if (has_suffix(target_name, "." + target_)) { + stream_data = op->buffer_var; + return StreamStmt::make(op->buffer_var, value, type_, 10); + } else { + return Store::make(op->buffer_var, value, index, op->predicate); + } + } + + private: + const std::string target_; + const ir::StreamType type_; + bool has_suffix(const std::string &str, const std::string &suffix) { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; + } +}; + +class KernelUpdater final : public IRMutator { + public: + KernelUpdater( + const std::string& target, + const ir::StreamType& type, + const bool is_producer) + : target_(target), type_(type), is_producer_(is_producer){} + + Stmt Mutate_(const KernelDef* op, const Stmt& s) { + // mutate target load + Stmt stmt = op->body; + Array arr = op->channels; + if (is_producer_) { + StreamProducer mutator(target_, type_); + stmt = mutator.Mutate(stmt); + arr.push_back(mutator.stream_data); + } else { // replace load consumer + StreamConsumer mutator(target_, type_); + stmt = mutator.Mutate(stmt); + arr.push_back(mutator.stream_data); + } + // update kernel arg signature + return KernelDef::make(op->args, op->api_args, + op->api_types, stmt, op->ret_void, + op->ret_type, op->name, arr); + } + private: + const std::string target_; + const ir::StreamType type_; + const bool is_producer_; +}; + class ParentStmtCollector final : public IRMutator { public: ParentStmtCollector( @@ -117,6 +220,246 @@ class ParentStmtCollector final : public IRMutator { const IterVar& axis_; }; +// stream buffer data to kernel stage +void Schedule::to_stage(const Tensor& target, + Stage dest, + StreamType stream_type, + int channel_depth, + std::string name) { + Stage target_stage = (*this)[target]; + Buffer target_buffer; + if (const ExternOpNode* op = target_stage->op.as()) { + target_buffer = op->output_placeholders[0]; + // remove and current stage (only consumer) + target_stage->op = ExternOpNode::make(op->name, + "", + Array(), + op->inputs, + op->input_placeholders, + op->output_placeholders, + Evaluate::make(0)); + // update dest stage body for data stream in + const ExternOpNode* destOp = dest->op.as(); + std::regex reg("^(.+?)\\.stream_(.*)"); + std::smatch match_result; + std::regex_match(target_buffer->name, match_result, reg); + std::string old_name = match_result.str(1); + KernelUpdater mutator(old_name, stream_type, 0); + dest->op = ExternOpNode::make(destOp->name, + destOp->tag, + destOp->axis, + destOp->inputs, + destOp->input_placeholders, + Array(), + mutator.Mutate(destOp->body)); + } +} + +// stream data between hardware modules +void Schedule::stream_to(const Tensor& target, + Stage dest, + Stage source, + StreamType stream_type, + int channel_depth, + std::string new_name) { + Stage target_stage = (*this)[target]; + std::vector consumers; + size_t num_stage = (*this)->stages.size(); + Buffer target_buffer; + std::unordered_map pos; + const ExternOpNode* destOp = dest->op.as(); + const ExternOpNode* srcOp = source->op.as(); + + // update kernel def and scope + const PlaceholderOpNode* op = target_stage->op.as(); + bool is_placeholder = op ? true : false; + if (is_placeholder) { + for (size_t i = 0; i < num_stage; i++) { + Stage s = (*this)->stages[i]; + // name matching to locate kernels + if (const ExternOpNode* op = s->op.as()) { + for (size_t j = 0; j < op->inputs.size(); j++) { + if (target == op->inputs[j]) { + target_buffer = op->input_placeholders[j]; + consumers.push_back(s); + if (std::regex_match(op->name, std::regex(destOp->name + "(\\d)"))) + pos[dest] = j; + else if (std::regex_match(op->name, std::regex(destOp->name + "(\\d)"))) + pos[source] = j; + break; + } + } + } + } + } else { // only consumed by self stage + const ExternOpNode* op = target_stage->op.as(); + target_buffer = op->output_placeholders[0]; + consumers.push_back(target_stage); + } + // update original kernels + KernelUpdater destMutator(target_buffer->name, + stream_type, 0); + dest->op = ExternOpNode::make(destOp->name, + destOp->tag, + destOp->axis, + destOp->inputs, + destOp->input_placeholders, + Array(), + destMutator.Mutate(destOp->body)); + KernelUpdater srcMutator(target_buffer->name, + stream_type, 1); + source->op = ExternOpNode::make(srcOp->name, + srcOp->tag, + srcOp->axis, + srcOp->inputs, + srcOp->input_placeholders, + Array(), + srcMutator.Mutate(srcOp->body)); + // remove alloc buffer of kernels + for (auto s : consumers) { + const ExternOpNode* op = s->op.as(); + s->op = ExternOpNode::make(op->name, + op->tag, + op->axis, + op->inputs, + op->input_placeholders, + Array(), + op->body); + } +} + +Tensor Schedule::move_to(const Tensor& target, + DeviceType device_type, + StreamType stream_type, + int channel_depth, + std::string new_name) { + Stage target_stage = (*this)[target]; + std::vector consumers; + size_t num_stage = (*this)->stages.size(); + ArrayNode* stages = (*this)->stages.CopyOnWrite(); + Buffer target_buffer; + + // create producer and consumer stages for placeholder + const PlaceholderOpNode* op = target_stage->op.as(); + bool is_placeholder = op ? true : false; + if (is_placeholder) { + for (size_t i = 0; i < num_stage; i++) { + Stage s = (*this)->stages[i]; + if (const ExternOpNode* op = s->op.as()) { + for (size_t j = 0; j < op->inputs.size(); j++) { + if (target == op->inputs[j]) { + target_buffer = op->input_placeholders[j]; + consumers.push_back(s); + break; + } + } + } + } + } else { // only consumed by self stage + const ExternOpNode* op = target_stage->op.as(); + target_buffer = op->output_placeholders[0]; + consumers.push_back(target_stage); + } + + // build producer stage + Array producer_inputs; + Array producer_input_placeholders; + Array producer_output_placeholders; + std::string producer_name = target_buffer->name + ".stream_out"; + Buffer producer_buffer = BufferNode::make(Var(producer_name, Handle()), + target->dtype, + target->shape, + Array(), + Expr(), + producer_name, + "", 0, 0); + producer_inputs.push_back(target); + producer_input_placeholders.push_back(target_buffer); + producer_output_placeholders.push_back(producer_buffer); + // streaming producer tensor reading from placeholder + Expr stream = StreamExpr::make(target->dtype, + VarExpr(target_buffer->data), + stream_type, + channel_depth); + // create for loops for tensor init + std::vector indices; + std::vector loop_vars; + for (size_t i = 0; i < target->shape.size(); i++) { + VarExpr iter(producer_name + std::to_string(i)); + indices.push_back(iter); + loop_vars.push_back(iter); + } + Expr index = getIndex(indices, target->shape); + // store op initialized with Variable node + Stmt for_stmt = Store::make(VarExpr(producer_buffer->data), + stream, index, + UIntImm::make(UInt(1), 1)); + for (size_t j = 0; j < target->shape.size(); j++) { + for_stmt = For::make( + VarExpr(loop_vars[j]), + 0, target->shape[j], + ForType::Serial, + DeviceAPI::None, + for_stmt); + } + Expr device; + switch (device_type) { + case DeviceType::CPU: + device = StringImm::make("cpu"); + break; + case DeviceType::FPGA: + device = StringImm::make("fpga"); + break; + case DeviceType::GPU: + device = StringImm::make("gpu"); + break; + } + Stmt body = AttrStmt::make( + VarExpr(producer_buffer.node_), + "device_scope", device, for_stmt); + Tensor producer = ExternOpNode::make(producer_name, + "", + Array(), + producer_inputs, + producer_input_placeholders, + producer_output_placeholders, + body).output(0); + + // create new stage and return stream tensors + Stage producer_stage = Stage(producer->op); + size_t pos = FindNodeRef(stages, target_stage); + stages->data.insert(stages->data.begin() + pos, producer_stage.node_); + (*this)->stage_map.Set(producer->op, producer_stage); + + // update consumer stages with new tensor and buffer + for (size_t i = 0; i < consumers.size(); i++) { + Stage s = consumers[i]; + Array new_inputs; + Array new_input_placeholders; + const ExternOpNode* op = s->op.as(); + new_inputs.push_back(producer); + new_input_placeholders.push_back(producer_buffer); + for (size_t j = 0; j < op->inputs.size(); j++) { + new_inputs.push_back(op->inputs[j]); + new_input_placeholders.push_back(op->input_placeholders[j]); + } + Stmt new_body = AttrStmt::make( + VarExpr(producer_buffer.node_), + "device_context_scope", + StringImm::make(producer_buffer->name), + op->body); + s->op = ExternOpNode::make( + op->name, + op->tag, + op->axis, + new_inputs, + new_input_placeholders, + op->output_placeholders, + op->body); + } + return producer; +} + Tensor Schedule::reuse_at(const Tensor& target, Stage parent, IterVar axis, @@ -181,19 +524,6 @@ Tensor Schedule::reuse_at(const Tensor& target, return reuse; } -// Tensor Schedule::stream(const Tensor& target, -// Type stream_type) { -// Stage target_stage = (*this)[target]; -// std::vector consumers; -// size_t num_stage = (*this)->stages.size(); -// size_t min_pos = num_stage; -// ArrayNode* stages = (*this)->stages.CopyOnWrite(); -// Buffer target_buffer; -// const PlaceholderOpNode* op = target_stage->op.as(); -// bool is_placeholder = op ? true : false; -// // check if it is a placeholder or not -// } - Tensor Schedule::partition(const Tensor& target, int dim, int factor, PartitionType partition_type) { Stage target_stage = (*this)[target]; diff --git a/tvm/src/schedule/schedule_lang.cc b/tvm/src/schedule/schedule_lang.cc index 3f4e360f0..624c159a1 100644 --- a/tvm/src/schedule/schedule_lang.cc +++ b/tvm/src/schedule/schedule_lang.cc @@ -228,38 +228,6 @@ void Reorder(StageNode* self, const Array& order) { new_stmt); } -void StreamTo(StageNode* target, - StageNode* producer, - StageNode* consumer, - ir::StreamType type, - int depth_factor) { - // target op initialized as externop with buffer - auto producer_op = producer->op.as(); - auto consumer_op = consumer->op.as(); - Stmt producer_stmt = producer_op->body; - Stmt consumer_stmt = consumer_op->body; - // track the argument name for data moving - auto target_op = target->op.as(); - Buffer target_buf = target_op->output_placeholders[0]; - // mutate kernel and load operators inside - Stmt new_consumer_stmt = StreamToConsumer(consumer_stmt, target_buf, type); - Stmt new_producer_stmt = StreamFromProducer(producer_stmt, target_buf, type); - producer->op = ExternOpNode::make(producer_op->name, - producer_op->tag, - producer_op->axis, - producer_op->inputs, - producer_op->input_placeholders, - producer_op->output_placeholders, - new_producer_stmt); - consumer->op = ExternOpNode::make(consumer_op->name, - consumer_op->tag, - consumer_op->axis, - consumer_op->inputs, - consumer_op->input_placeholders, - consumer_op->output_placeholders, - new_consumer_stmt); -} - void ComputeAt(StageNode* producer, StageNode* consumer, const IterVar& var, @@ -447,13 +415,6 @@ Stage& Stage::fuse(IterVar outer, IterVar inner, IterVar* p_target) { // NOLINT return *this; } -Stage& Stage::stream(Stage dest, Stage source, - ir::StreamType type, int depth) { // NOLINT(*) - StreamTo(operator->(), dest.operator->(), - source.operator->(), type, depth); - return *this; -} - Stage& Stage::reorder(const Array& order) { // NOLINT(*) Reorder(operator->(), order); return *this; From 8b8dea99831faf820116a9d3607a7442c3ba3d14 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Fri, 4 Oct 2019 14:56:06 -0400 Subject: [PATCH 079/103] [fix] add stream annotation --- python/heterocl/schedule.py | 2 +- tvm/HalideIR/src/ir/IR.cpp | 33 +++++ tvm/HalideIR/src/ir/IR.h | 24 +++- tvm/include/tvm/ir.h | 2 + tvm/src/codegen/opencl/codegen_aocl.cc | 45 ++++--- tvm/src/pass/stream_deduce.cc | 118 ++++++++++++++++++ tvm/src/schedule/schedule_dataflow_rewrite.cc | 75 ++++++++--- 7 files changed, 265 insertions(+), 34 deletions(-) create mode 100644 tvm/src/pass/stream_deduce.cc diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index 4ff0b804b..a47e09115 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -136,7 +136,7 @@ def reuse_at(self, target, parent, axis, name=None): return self.sch.reuse_at(target, parent, axis, name) def stream_to(self, tensors, dst, src=None, - stream_type=_expr.StreamExpr.FIFO, depth=10, name=None): + stream_type=_expr.StreamExpr.Channel, depth=10, name=None): """Stream a list of Tensors to dst devices Parameters diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index 5b81fb30f..783dd5377 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -793,6 +793,22 @@ Expr StreamExpr::make(Type type, VarExpr buffer_var, StreamType stream_type, int return Expr(node); } +Expr StreamExpr::make(Type type, VarExpr buffer_var, StreamType stream_type, int depth, + Array annotate_keys, Array annotate_values) { + internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; + internal_assert(annotate_keys.size() == annotate_values.size()) << + "Length of annotate keys and annotate values not equal"; + + std::shared_ptr node = std::make_shared(); + node->type = type; + node->buffer_var = std::move(buffer_var); + node->depth = depth; + node->stream_type = stream_type; + node->annotate_keys = std::move(annotate_keys); + node->annotate_values = std::move(annotate_values); + return Expr(node); +} + Stmt StreamStmt::make(VarExpr buffer_var, Expr value, StreamType stream_type, int depth) { internal_assert(value.defined()) << "The stream-in value not defined\n"; internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; @@ -805,6 +821,23 @@ Stmt StreamStmt::make(VarExpr buffer_var, Expr value, StreamType stream_type, in return Stmt(node); } +Stmt StreamStmt::make(VarExpr buffer_var, Expr value, StreamType stream_type, int depth, + Array annotate_keys, Array annotate_values) { + internal_assert(value.defined()) << "The stream-in value not defined\n"; + internal_assert(depth>= 1) << "The stream channel depth must be larger than 1\n"; + internal_assert(annotate_keys.size() == annotate_values.size()) << + "Length of annotate keys and annotate values not equal"; + + std::shared_ptr node = std::make_shared(); + node->buffer_var = std::move(buffer_var); + node->value = std::move(value); + node->depth = depth; + node->stream_type = stream_type; + node->annotate_keys = std::move(annotate_keys); + node->annotate_values = std::move(annotate_values); + return Stmt(node); +} + Stmt Stencil::make(Array inputs, Array outputs, Stmt body, int burst_width, int unroll_factor, int num_iteration) { internal_assert(body.defined()) << "Stencil of undefined body\n"; diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index 6a57471da..7d1429200 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -1181,21 +1181,32 @@ struct Partition : public StmtNode { }; struct StreamStmt : public StmtNode { - VarExpr buffer_var; // var written + VarExpr buffer_var; Expr value; int depth; StreamType stream_type; + Array annotate_keys; + Array annotate_values; EXPORT static Stmt make(VarExpr buffer_var, Expr value, StreamType stream_type, int depth); + EXPORT static Stmt make(VarExpr buffer_var, + Expr value, + StreamType stream_type, + int depth, + Array annotate_keys, + Array annotate_values); + void VisitAttrs(IR::AttrVisitor* v) final { v -> Visit("buffer_var", &buffer_var); v -> Visit("value", &value); v -> Visit("depth", &depth); v -> Visit("stream_type", &stream_type); + v -> Visit("annotate_keys", &annotate_keys); + v -> Visit("annotate_values", &annotate_values); } static const IRNodeType _type_info = IRNodeType::StreamStmt; @@ -1206,17 +1217,28 @@ struct StreamExpr : public ExprNode { VarExpr buffer_var; // var loaded int depth; StreamType stream_type; + Array annotate_keys; + Array annotate_values; EXPORT static Expr make(Type type, VarExpr buffer_var, StreamType stream_type, int depth); + EXPORT static Expr make(Type type, + VarExpr buffer_var, + StreamType stream_type, + int depth, + Array annotate_keys, + Array annotate_values); + void VisitAttrs(IR::AttrVisitor* v) final { v -> Visit("dtype", &type); v -> Visit("buffer_var", &buffer_var); v -> Visit("depth", &depth); v -> Visit("stream_type", &stream_type); + v -> Visit("annotate_keys", &annotate_keys); + v -> Visit("annotate_values", &annotate_values); } static const IRNodeType _type_info = IRNodeType::StreamExpr; static constexpr const char* _type_key = "StreamExpr"; diff --git a/tvm/include/tvm/ir.h b/tvm/include/tvm/ir.h index 702530f69..8a26e551c 100644 --- a/tvm/include/tvm/ir.h +++ b/tvm/include/tvm/ir.h @@ -234,6 +234,8 @@ constexpr const char* pipeline_exec_scope = "pipeline_exec_scope"; constexpr const char* opengl_stage_scope = "opengl_stage_scope"; constexpr const char* attach_scope = "attach_scope"; + +constexpr const char* device_scope = "device_scope"; } // namespace attr /*! \brief namespace of TVM Intrinsic functions */ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index ecb706415..3fad73e8f 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -210,6 +210,18 @@ void CodeGenAOCL::VisitStmt_(const For* op) { void CodeGenAOCL::VisitExpr_(const StreamExpr* op, std::ostream& os) { std::string vid = GetVarID(op->buffer_var.get()); + int i = 0; + for (auto key : op->annotate_keys) { + auto str = key.as(); + auto val = op->annotate_values[i].as(); + if (str->value == "name" && val != nullptr) { + vid = val->value; + decl_stream << "channel "; + PrintType(op->type, decl_stream); + decl_stream << " " << vid << ";\n"; + } + i++; + } switch (op->stream_type) { case StreamType::Channel: os << "read_channel_intel("; @@ -220,6 +232,7 @@ void CodeGenAOCL::VisitExpr_(const StreamExpr* op, std::ostream& os) { break; case StreamType::FIFO: // buffered channel + os << "fifo"; break; } } @@ -261,13 +274,9 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { decl_stream << "#pragma OPENCL EXTENSION cl_intel_channels : enable\n"; stream_pragma = true; } - decl_stream << "channel "; - std::string str = PrintExpr(op->api_types[i]); - PrintType(String2Type(str), decl_stream); - decl_stream << " " << vid << ";\n"; } else { if (i != 0) { - if (i == 1 && stream_pragma) void(0); + if (stream_vars.count(op->args[i-1])) void(0); else stream << ", "; } this->stream << "__global "; @@ -294,16 +303,13 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { PrintIndent(); stream << op->name << "("; - bool arg_flag = false; for (size_t i = 0; i < op->args.size(); i++) { std::string str = op->name + "." + PrintExpr(op->args[i]); - if (stream_exprs.count(str)) { - arg_flag = true; - } else { + if (!stream_exprs.count(str)) { if (i != 0) { - if (i == 1 && arg_flag) void(0); + std::string pre = op->name + "." + PrintExpr(op->args[i-1]); + if (stream_exprs.count(pre)) void(0); else stream << ", "; - arg_flag = false; } PrintExpr(op->args[i], stream); } @@ -316,13 +322,11 @@ void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT bool arg_flag = false; for (size_t i = 0; i < op->args.size(); ++i) { std::string str = op->name + "." + PrintExpr(op->args[i]); - if (stream_exprs.count(str)) { - arg_flag = true; - } else { + if (!stream_exprs.count(str)) { if (i != 0) { - if (i == 1 && arg_flag) void(0); + std::string pre = op->name + "." + PrintExpr(op->args[i-1]); + if (stream_exprs.count(pre)) void(0); else stream << ", "; - arg_flag = false; } PrintExpr(op->args[i], stream); } @@ -333,6 +337,13 @@ void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { std::string vid = GetVarID(op->buffer_var.get()); PrintIndent(); + int i = 0; + for (auto key : op->annotate_keys) { + auto str = key.as(); + auto val = op->annotate_values[i].as(); + if (str->value == "name" && val != nullptr) vid = val->value; + i++; + } switch (op->stream_type) { case StreamType::Channel: stream << "write_channel_intel("; @@ -343,7 +354,7 @@ void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { stream << vid << ", "; break; case StreamType::FIFO: - // buffered channel + stream << "fifo("; break; } PrintExpr(op->value, stream); diff --git a/tvm/src/pass/stream_deduce.cc b/tvm/src/pass/stream_deduce.cc new file mode 100644 index 000000000..86405f783 --- /dev/null +++ b/tvm/src/pass/stream_deduce.cc @@ -0,0 +1,118 @@ +/*! + * Copyright (c) 2019 by Contributors + * \file remove_no_op.cc + * \brief Remove no op from the stmt + */ +#include +#include +#include +#include + +namespace TVM { +namespace ir { + +// Mark the statment of each stage. +class StreamInferer : public IRMutator { + public: + Stmt Mutate_(const LetStmt* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + return is_no_op(op->body) ? MakeEvaluate(op->value) : stmt; + } + Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { + if (op->attr_key == ir::attr::pragma_scope) { + const std::string& pname = op->value.as()->value; + if (pname == "debug_skip_region") { + return MakeEvaluate(0); + } + } + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + return is_no_op(op->body) ? MakeEvaluate(op->value) : stmt; + } + Stmt Mutate_(const IfThenElse* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + if (op->else_case.defined()) { + if (is_no_op(op->else_case)) { + if (is_no_op(op->then_case)) { + return MakeEvaluate(op->condition); + } else { + return IfThenElse::make(op->condition, op->then_case); + } + } else { + return stmt; + } + } else { + if (is_no_op(op->then_case)) { + return MakeEvaluate(op->condition); + } else { + return stmt; + } + } + } + Stmt Mutate_(const For* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + return is_no_op(op->body) ? MakeEvaluate({op->min, op->extent}) : stmt; + } + Stmt Mutate_(const Allocate* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + return is_no_op(op->body) ? MakeEvaluate(op->extents) : stmt; + } + Stmt Mutate_(const ProducerConsumer* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + return is_no_op(op->body) ? op->body : stmt; + } + Stmt Mutate_(const Realize* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + return is_no_op(op->body) ? op->body : stmt; + } + Stmt Mutate_(const Evaluate* op, const Stmt& s) final { + if (HasSideEffect(op->value)) return s; + return Evaluate::make(0); + } + Stmt Mutate_(const Block* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + if (is_no_op(op->first)) { + return op->rest; + } else if (is_no_op(op->rest)) { + return op->first; + } else { + return stmt; + } + } + + private: + Stmt MakeEvaluate(Expr value) { + if (HasSideEffect(value)) { + return Evaluate::make(value); + } else { + return Evaluate::make(0); + } + } + Stmt MakeEvaluate(const Array& values) { + Stmt stmt; + for (Expr e : values) { + if (HasSideEffect(e)) { + if (stmt.defined()) { + stmt = Block::make(stmt, Evaluate::make(e)); + } else { + stmt = Evaluate::make(e); + } + } + } + return stmt.defined() ? stmt : Evaluate::make(0); + } +}; + +Stmt InferStream(Stmt stmt) { + return StreamInferer().Mutate(stmt); +} + +} // namespace ir +} // namespace TVM diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index aeefddf3f..8e6e11088 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -91,8 +91,12 @@ class StreamConsumer final : public IRMutator { VarExpr stream_data; StreamConsumer( const std::string& target, - const ir::StreamType& type) - : target_(target), type_(type) {} + const ir::StreamType& type, + const bool kernel_channel, + const std::string& common_name) + : target_(target), type_(type), + kernel_channel_(kernel_channel), + common_name_(common_name) {} // Replace with StreamExpr e.g. var.read(op. index) Expr Mutate_(const Load* op, const Expr& e) { @@ -100,7 +104,12 @@ class StreamConsumer final : public IRMutator { std::string target_name = op->buffer_var.get()->name_hint; if (has_suffix(target_name, "." + target_)) { stream_data = op->buffer_var; - return StreamExpr::make(op->type, op->buffer_var, type_, 10); + Array keys, values; + if (kernel_channel_) { + keys.push_back(StringImm::make("name")); + values.push_back(StringImm::make(common_name_)); + } + return StreamExpr::make(op->type, op->buffer_var, type_, 10, keys, values); } else { return Load::make(op->type, op->buffer_var, index, op->predicate); } @@ -109,6 +118,8 @@ class StreamConsumer final : public IRMutator { private: const std::string target_; const ir::StreamType type_; + const bool kernel_channel_; + const std::string common_name_; bool has_suffix(const std::string &str, const std::string &suffix) { return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; @@ -120,8 +131,12 @@ class StreamProducer final : public IRMutator { VarExpr stream_data; StreamProducer( const std::string& target, - const ir::StreamType& type) - : target_(target), type_(type) {} + const ir::StreamType& type, + const bool kernel_channel, + const std::string& common_name) + : target_(target), type_(type), + kernel_channel_(kernel_channel), + common_name_(common_name) {} // Replace with StreamStmt e.g. var.write(value) Stmt Mutate_(const Store* op, const Stmt& s) { @@ -130,7 +145,12 @@ class StreamProducer final : public IRMutator { std::string target_name = op->buffer_var.get()->name_hint; if (has_suffix(target_name, "." + target_)) { stream_data = op->buffer_var; - return StreamStmt::make(op->buffer_var, value, type_, 10); + Array keys, values; + if (kernel_channel_) { + keys.push_back(StringImm::make("name")); + values.push_back(StringImm::make(common_name_)); + } + return StreamStmt::make(op->buffer_var, value, type_, 10, keys, values); } else { return Store::make(op->buffer_var, value, index, op->predicate); } @@ -139,6 +159,8 @@ class StreamProducer final : public IRMutator { private: const std::string target_; const ir::StreamType type_; + const bool kernel_channel_; + const std::string common_name_; bool has_suffix(const std::string &str, const std::string &suffix) { return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; @@ -147,22 +169,32 @@ class StreamProducer final : public IRMutator { class KernelUpdater final : public IRMutator { public: + static int channelCount; KernelUpdater( const std::string& target, const ir::StreamType& type, - const bool is_producer) - : target_(target), type_(type), is_producer_(is_producer){} + const bool is_producer, + const bool kernel_channel) + : target_(target), type_(type), + is_producer_(is_producer), + kernel_channel_(kernel_channel) { + if (kernel_channel_) common_name = getName(); + } Stmt Mutate_(const KernelDef* op, const Stmt& s) { // mutate target load Stmt stmt = op->body; Array arr = op->channels; if (is_producer_) { - StreamProducer mutator(target_, type_); + StreamProducer mutator(target_, type_, + kernel_channel_, + common_name); stmt = mutator.Mutate(stmt); arr.push_back(mutator.stream_data); } else { // replace load consumer - StreamConsumer mutator(target_, type_); + StreamConsumer mutator(target_, type_, + kernel_channel_, + common_name); stmt = mutator.Mutate(stmt); arr.push_back(mutator.stream_data); } @@ -175,8 +207,19 @@ class KernelUpdater final : public IRMutator { const std::string target_; const ir::StreamType type_; const bool is_producer_; + const bool kernel_channel_; + std::string common_name; + std::string getName() { + channelCount += 1; + int channel_num = channelCount; + if (channelCount % 2 == 0) channel_num = channelCount - 1; + return std::string("channel_" + std::to_string(channel_num)); + } }; +// Initialize static channel count +int KernelUpdater::channelCount = 0; + class ParentStmtCollector final : public IRMutator { public: ParentStmtCollector( @@ -244,7 +287,8 @@ void Schedule::to_stage(const Tensor& target, std::smatch match_result; std::regex_match(target_buffer->name, match_result, reg); std::string old_name = match_result.str(1); - KernelUpdater mutator(old_name, stream_type, 0); + KernelUpdater mutator(old_name, stream_type, + false, false); dest->op = ExternOpNode::make(destOp->name, destOp->tag, destOp->axis, @@ -298,7 +342,7 @@ void Schedule::stream_to(const Tensor& target, } // update original kernels KernelUpdater destMutator(target_buffer->name, - stream_type, 0); + stream_type, false, true); dest->op = ExternOpNode::make(destOp->name, destOp->tag, destOp->axis, @@ -307,7 +351,7 @@ void Schedule::stream_to(const Tensor& target, Array(), destMutator.Mutate(destOp->body)); KernelUpdater srcMutator(target_buffer->name, - stream_type, 1); + stream_type, true, true); source->op = ExternOpNode::make(srcOp->name, srcOp->tag, srcOp->axis, @@ -328,6 +372,7 @@ void Schedule::stream_to(const Tensor& target, } } +// move data to device Tensor Schedule::move_to(const Tensor& target, DeviceType device_type, StreamType stream_type, @@ -445,8 +490,8 @@ Tensor Schedule::move_to(const Tensor& target, } Stmt new_body = AttrStmt::make( VarExpr(producer_buffer.node_), - "device_context_scope", - StringImm::make(producer_buffer->name), + "device_scope", + device, op->body); s->op = ExternOpNode::make( op->name, From 23fa5996a2c6579d0933c86238b60202c48c126f Mon Sep 17 00:00:00 2001 From: Hecmay Date: Sat, 5 Oct 2019 22:47:03 -0400 Subject: [PATCH 080/103] [add] host device codegen --- tvm/src/codegen/codegen_c.cc | 44 +++++++++++++++++-- tvm/src/codegen/codegen_c.h | 6 +++ tvm/src/codegen/codegen_source_base.h | 4 +- tvm/src/codegen/opencl/codegen_aocl.cc | 11 ++++- tvm/src/schedule/schedule_dataflow_rewrite.cc | 12 +++-- 5 files changed, 67 insertions(+), 10 deletions(-) diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 8d40c7557..d23b95888 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -70,9 +70,22 @@ void CodeGenC::AddFunction(LoweredFunc f) { } std::string CodeGenC::Finish() { - // std::ofstream fstream; - // fstream.open("host.cpp"); - return decl_stream.str() + module_stream.str() + stream.str(); + std::ostringstream device; + device << "void top() {\n" << device_stream.str(); + if (fpga_scope_) device << stream.str(); + else host_stream << stream.str(); + if (top_data_type_.size() > 0) { + int i = 0; + for (const auto & kv : top_data_type_) { + // PrintType(kv.second, host_stream); + if (i != 0) host_stream << ", "; + host_stream << kv.first; + i++; + } + host_stream << ");\n"; + } + host_stream << "}\n"; + return decl_stream.str() + module_stream.str() + host_stream.str() + device.str(); } void CodeGenC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) @@ -320,7 +333,6 @@ void CodeGenC::PrintType(Type t, std::ostream& os) { // NOLINT(*) LOG(FATAL) << "Cannot convert type " << t << " to C type"; } - inline void PrintConst(const IntImm* op, std::ostream& os, CodeGenC* p) { // NOLINT(*) if (op->type == Int(32)) { std::ostringstream temp; @@ -818,6 +830,30 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { const Variable* v = op->node.as(); CHECK(v); volatile_buf_.insert(v); + } else if (op->attr_key == ir::attr::device_scope) { + if (op->value.as()->value == "fpga" && !fpga_scope_) { + fpga_scope_ = true; + // call top function + PrintIndent(); + stream << "top("; + host_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + } else if (op->value.as()->value == "cpu" && fpga_scope_) { + fpga_scope_ = false; + // add arguments after fpga block finished + int i = 0; + for (const auto & kv : top_data_type_) { + PrintType(kv.second, host_stream); + if (i != 0) stream << ","; + host_stream << " " << kv.first; + i++; + } + host_stream << ");\n"; + device_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + } } this->PrintStmt(op->body); } diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index 0c158420a..b77d1b657 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -208,6 +208,10 @@ class CodeGenC : std::unordered_map alloc_storage_scope_; /*! \brief the data type of allocated buffers */ std::unordered_map handle_data_type_; + /*! \brief the data type array for kernels */ + std::unordered_map> kernel_data_type_; + /*! \brief the data type array for top functions */ + std::unordered_map top_data_type_; std::unordered_map buf_length_map_; // save for kernel gen @@ -221,6 +225,8 @@ class CodeGenC : private: /*! \brief whether to print in SSA form */ bool print_ssa_form_{false}; + /*! \brief whether generate code for fpga */ + bool fpga_scope_{false}; /*! \brief set of volatile buf access */ std::unordered_set volatile_buf_; }; diff --git a/tvm/src/codegen/codegen_source_base.h b/tvm/src/codegen/codegen_source_base.h index 04e639b14..9bc90f33f 100644 --- a/tvm/src/codegen/codegen_source_base.h +++ b/tvm/src/codegen/codegen_source_base.h @@ -95,8 +95,10 @@ class CodeGenSourceBase { std::ostringstream stream; /*! \brief the stream for mocule */ std::ostringstream module_stream; - /*! \brief the stream for host */ + /*! \brief the stream host */ std::ostringstream host_stream; + /*! \brief the stream device */ + std::ostringstream device_stream; /*! \brief name of each variable */ std::unordered_map var_idmap_; /*! \brief Save states as copy */ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 3fad73e8f..fd16716be 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -264,6 +264,7 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { stream_vars.insert(op->channels[j]); stream_exprs.insert(op->channels[j].get()->name_hint); } + std::vector types; for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; var_shape_map_[v.get()] = op->api_args[i]; @@ -281,10 +282,13 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { } this->stream << "__global "; std::string str = PrintExpr(op->api_types[i]); - PrintType(String2Type(str), stream); + Type type = String2Type(str); + PrintType(type, stream); + types.push_back(type); this->stream << "* restrict " << vid; } } + kernel_data_type_[op->name] = types; stream << ") {\n"; int func_scope = BeginScope(); range_ = CollectIterRange(op->body); @@ -312,6 +316,8 @@ void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { else stream << ", "; } PrintExpr(op->args[i], stream); + Type type = kernel_data_type_[op->name][i]; + top_data_type_[PrintExpr(op->args[i])] = type; } } stream << ");\n"; @@ -319,7 +325,6 @@ void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) os << op->name << "("; - bool arg_flag = false; for (size_t i = 0; i < op->args.size(); ++i) { std::string str = op->name + "." + PrintExpr(op->args[i]); if (!stream_exprs.count(str)) { @@ -329,6 +334,8 @@ void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT else stream << ", "; } PrintExpr(op->args[i], stream); + Type type = kernel_data_type_[op->name][i]; + top_data_type_[PrintExpr(op->args[i])] = type; } } os << ")"; diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 8e6e11088..5ac5b12b9 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -190,13 +190,15 @@ class KernelUpdater final : public IRMutator { kernel_channel_, common_name); stmt = mutator.Mutate(stmt); - arr.push_back(mutator.stream_data); + if (kernel_channel_) + arr.push_back(mutator.stream_data); } else { // replace load consumer StreamConsumer mutator(target_, type_, kernel_channel_, common_name); stmt = mutator.Mutate(stmt); - arr.push_back(mutator.stream_data); + if (kernel_channel_) + arr.push_back(mutator.stream_data); } // update kernel arg signature return KernelDef::make(op->args, op->api_args, @@ -362,13 +364,17 @@ void Schedule::stream_to(const Tensor& target, // remove alloc buffer of kernels for (auto s : consumers) { const ExternOpNode* op = s->op.as(); + Stmt body = AttrStmt::make(VarExpr(), + "device_scope", + StringImm::make("fpga"), + op->body); s->op = ExternOpNode::make(op->name, op->tag, op->axis, op->inputs, op->input_placeholders, Array(), - op->body); + body); } } From 2d59e40940e6100bf14dc4273b3e318104b7882c Mon Sep 17 00:00:00 2001 From: "Yi-Hsiang (Sean) Lai" Date: Tue, 8 Oct 2019 14:23:12 -0400 Subject: [PATCH 081/103] [API] Enable building a function directly from IR (#133) * add a pass for building a function directly from IR * remove redundant print statement --- python/heterocl/api.py | 21 ++++++++-- python/heterocl/tvm/build_module.py | 16 +++++-- tests/test_api.py | 65 +++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 7 deletions(-) diff --git a/python/heterocl/api.py b/python/heterocl/api.py index 62e28227a..18c457e37 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -5,6 +5,7 @@ from .tvm import _api_internal as tvm_api from .tvm import schedule as _schedule from .tvm import make as _make +from .tvm import call_intrin from .tensor import Scalar, Tensor from .schedule import Stage, Schedule from .scheme import Scheme @@ -268,10 +269,11 @@ def lower(schedule): new_inputs.append(i.var) return _lower(schedule.sch, new_inputs, simple_mode=True) -def build(schedule, target=None, name="default_function"): +def build(schedule, target=None, name="default_function", stmt=None): """Build the executable according to the schedule and target. - The default target is `llvm` (i.e., CPU execution). + The default target is `llvm` (i.e., CPU execution). If stmt is specified, + the statements created by HeteroCL APIs will be ignored. Parameters ---------- @@ -284,6 +286,9 @@ def build(schedule, target=None, name="default_function"): name : str, optional The name of the generated function + stmt : Stmt, optional + The built statement + Returns ------- tvm.module.Module @@ -294,7 +299,17 @@ def build(schedule, target=None, name="default_function"): new_inputs.append(i.tensor.op.output(0)) else: new_inputs.append(i.var) - return _build(schedule.sch, new_inputs, target=target, name=name) + if stmt is not None: + for i in schedule.inputs: + if isinstance(i, Tensor): + shapes = [] + for s in i.shape: + shapes.append(0) + shapes.append(s) + tpl = tuple(shapes) + stmt = _make.AttrStmt([i.buf, i.tensor], "buffer_bind_scope", + call_intrin('handle', 'tvm_tuple', *tpl), stmt) + return _build(schedule.sch, new_inputs, target=target, name=name, stmt=stmt) ############################################################################## # Other useful APIs diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 413f7a1d9..c8dcc91f2 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -278,13 +278,13 @@ def get_binds(args, binds=None): raise ValueError("args must be Tensor, Buffer or Var") return binds, arg_list - def lower(sch, args, name="default_function", binds=None, simple_mode=False, - kernel_only=False): + kernel_only=False, + stmt=None): """Lowering step before build into target. Parameters @@ -319,6 +319,12 @@ def lower(sch, """ binds, arg_list = get_binds(args, binds) cfg = BuildConfig.current + if stmt is not None: + stmt = ir_pass.StorageFlatten(stmt, binds, 64) + if kernel_only: + return ir_pass.MakeKernelAPI(stmt, name, arg_list) + else: + return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func) add_lower_pass = cfg.add_lower_pass if cfg.add_lower_pass else [] lower_phase0 = [x[1] for x in add_lower_pass if x[0] == 0] lower_phase1 = [x[1] for x in add_lower_pass if x[0] == 1] @@ -422,7 +428,8 @@ def build(sch, target=None, target_host=None, name="default_function", - binds=None): + binds=None, + stmt=None): """Build a function with arguments as signiture. Parameters @@ -473,7 +480,8 @@ def build(sch, raise ValueError("args must be given for build from schedule") flist = lower(sch, args, name=name, - binds=binds) + binds=binds, + stmt=stmt) if isinstance(flist, container.LoweredFunc): flist = [flist] elif isinstance(sch, container.LoweredFunc): diff --git a/tests/test_api.py b/tests/test_api.py index d5eacf016..7ec1976ad 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,4 +1,5 @@ import heterocl as hcl +import heterocl.tvm as tvm import numpy as np def test_schedule_no_return(): @@ -117,3 +118,67 @@ def test_select(): np_B = hcl_B.asnumpy() assert np.allclose(np_B, np_C) + +def test_build_from_stmt(): + hcl.init(hcl.Int()) + # First, we still need to create HeteroCL inputs + A = hcl.placeholder((10,), "A") + B = hcl.placeholder((10,), "B") + X = hcl.placeholder((), "X") # a scalar input + + # Second, we create variables for loop var + # The first field is the name + # The second field is the data type + i = tvm._api_internal._Var("i", "int32") + + # Similarly, we can create a variable for intermediate tensor + C = tvm._api_internal._Var("C", "int32") + + # Third, we can create Load + # If we are accessing the HeteroCL inputs, we need to use ".buf.data" + load = tvm.make.Load("int32", A.buf.data, i) + + # Fourth, for arithmatic operation, we can add "False" to the end + # This avoids automatic casting + add = tvm.make.Add(load, 1, False) + + # Fifth, we can create Store + # In this case, we just write to the intermediate tensor + # Thus, we don't need to use ".buf.data" + store = tvm.make.Store(C, add, i) + + # Sixth, we can create the loop with our loop var + # For the details of each field, please refer to IR.h under HalideIR/src/ir + loop = tvm.make.For(i, 0, 10, 0, 0, store) + + # Finally, we need to allocate memory for our intermediate tensor + alloc = tvm.make.Allocate(C, "int32", [10], tvm.const(1, "uint1"), loop, []) + + # Similarly, we can do another loop that write stuffs to B + # Note that this i is a newly allocated variable though the name is the same + # We cannot reuse the same i for different loops + i = tvm._api_internal._Var("i", "int32") + load = tvm.make.Load("int32", C, i) + mul = tvm.make.Mul(load, X, False) + store = tvm.make.Store(B.buf.data, mul, i) + loop = tvm.make.For(i, 0, 10, 0, 0, store) + stmt = tvm.make.Block(alloc, loop) + + # Finally, we just need to use HeteroCL APIs to build the function + # Note that with this approach, we cannot apply any optimizations with primitives + s = hcl.create_schedule([A, B, X]) + # Just specify the stmt to be the statement we built + f = hcl.build(s, stmt=stmt) + + # A simple test + np_A = np.random.randint(10, size=10) + np_B = np.random.randint(10, size=10) + hcl_A = hcl.asarray(np_A) + hcl_B = hcl.asarray(np_B) + + f(hcl_A, hcl_B, 5) + + np_golden = 5 * (np_A + 1) + np_B = hcl_B.asnumpy() + + assert(np.array_equal(np_B, np_golden)) From a3f168a43a5ec4229504c32f3378b938df7d86bd Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 15 Oct 2019 10:14:18 -0400 Subject: [PATCH 082/103] [add] stream ir mutator --- python/heterocl/api.py | 5 +- python/heterocl/devices.py | 121 ++++++++++- python/heterocl/schedule.py | 8 +- python/heterocl/tvm/build_module.py | 49 +++-- python/heterocl/tvm/device.py | 0 python/heterocl/tvm/schedule.py | 6 +- python/heterocl/tvm/target.py | 7 - tvm/include/tvm/codegen.h | 1 + tvm/include/tvm/ir_pass.h | 8 + tvm/src/api/api_pass.cc | 1 + tvm/src/codegen/build_common.cc | 102 +++++++++ tvm/src/codegen/build_common.h | 1 + tvm/src/codegen/codegen.cc | 1 + tvm/src/codegen/codegen_c.cc | 126 ++++++++--- tvm/src/codegen/codegen_c.h | 8 +- tvm/src/codegen/codegen_source_base.cc | 57 +++-- tvm/src/codegen/codegen_source_base.h | 13 +- tvm/src/codegen/hlsc/codegen_hlsc.cc | 69 +++--- tvm/src/codegen/hlsc/codegen_vhls.cc | 23 +- tvm/src/codegen/hlsc/vhls_module.cc | 6 +- tvm/src/codegen/opencl/codegen_aocl.cc | 17 +- tvm/src/pass/split_host_device.cc | 16 ++ tvm/src/pass/stream_deduce.cc | 118 ---------- tvm/src/pass/stream_inference.cc | 204 ++++++++++++++++++ tvm/src/schedule/schedule_dataflow_rewrite.cc | 87 ++++++-- 25 files changed, 783 insertions(+), 271 deletions(-) delete mode 100644 python/heterocl/tvm/device.py create mode 100644 tvm/src/codegen/build_common.cc delete mode 100644 tvm/src/pass/stream_deduce.cc create mode 100644 tvm/src/pass/stream_inference.cc diff --git a/python/heterocl/api.py b/python/heterocl/api.py index b8fd6bb4d..d8de796c1 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -12,7 +12,7 @@ from . import types from . import config -def init(init_dtype="int32", place="cpu_riscv"): +def init(init_dtype="int32"): """Initialize a HeteroCL environment with configurations. This API must be called each time the users write an application. @@ -52,7 +52,6 @@ def app2(A, B, C): """ # set the configurations config.init_dtype = init_dtype - config.init_device = place # initialize global variables Schedule.stage_ops = [] Schedule.last_stages = OrderedSet([]) @@ -269,7 +268,7 @@ def lower(schedule): new_inputs.append(i.var) return _lower(schedule.sch, new_inputs, simple_mode=True) -def build(schedule, target=None, name="default_function"): +def build(schedule, target=None, name="host_function"): """Build the executable according to the schedule and target. The default target is `llvm` (i.e., CPU execution). diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index ad200e6e6..283d48afe 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -2,6 +2,99 @@ #pylint: disable=too-few-public-methods, too-many-return-statements from .debug import DeviceError +def map_gen(platform, types, model, mode): + pass + +class platform(type): + """The platform class for compute environment setups + + serves as meta-class for attr getting + default platform: aws_f1, zynq, ppac + + Parameters + ---------- + host: str + Device of device to place data + model: str + Model of device to place date + """ + def __getattr__(cls, key): + if key == "aws_f1": + host = CPU("x86", mode=cls.mode) + device = FPGA("xilinx") + return cls(host, device) + elif key == "zynq": + host = CPU("arm", key) + device = FPGA("xilinx", key) + return cls(host, device) + elif key == "ppac": + host = CPU("riscv", key) + device = PIM("ppac") + return cls(host, device) + else: # unsupported device + raise DeviceError("not supported") + +class env(metaclass=platform): + mode = "sim" + def __init__(self, host, device): + self.host = host + self.device = device + + def __str__(self): + return str(self.host) + " : " + \ + str(self.device) + + def __repr__(self): + return str(self.host) + " : " + \ + str(self.device) + + +class Tooling(object): + """The base class for all device tooling + + each device tooling object maintains a stage dict + including mapping from stage -> impl/sim tool + options + stop impl/sim where running into end of stage list + + Parameters + ---------- + types: str + Device of device to place data + model: str + Model of device to place date + """ + def __init__(self, types, model, platform, mode): + self.types = types + self.model = model + self.platform = platform + self.mode = mode + self.mapping = { "source" : "", + "sim" : "", + "impl" : "" } + if types == "CPU": # sim = impl + self.mapping["source"] = { "lang": "opencl", + "compile" : "aocl", + "options" : "" } + if types == "FPGA": + self.mapping["source"] = { "lang": "hlsc", + "compile" : "vhls", + "options" : "" } + self.mapping["sim"] = {} + self.mapping["co-sim"] = {} + self.mapping["syn"] = { "compile" : "vivado_hls", + "callback": ""} + self.mapping[""] = {} + else: # implementation + pass + + def __getattr__(self, entry): + return self.mapping[entry] + + def __str__(self): + return str(self.platform) + ":" + \ + str(self.model) + "(" + \ + str(self.mode) + ")" + class Device(object): """The base class for all device types @@ -14,37 +107,51 @@ class Device(object): model: str Model of device to place date """ - def __init__(self, types="CPU", model="x86"): + def __init__(self, types, model, platform, mode): self.types = types self.model = model + self.tool = Tooling(types, model, platform, mode) class CPU(Device): """cpu device with different models""" - def __init__(self, model): + def __init__(self, model, platform="aws_f1", mode="sim"): if model not in ["riscv", "arm", "x86", "sparc", "powerpc"]: raise DeviceError(model + " not supported yet") - super(CPU, self).__init__("CPU", model) + super(CPU, self).__init__("CPU", model, + platform, mode) def __repr__(self): return "CPU (" + str(self.model) + ")" class FPGA(Device): """fpga device with different models""" - def __init__(self, model): + def __init__(self, model, platform="aws_f1", mode="sim"): if model not in ["xilinx", "intel"]: raise DeviceError(model + " not supported yet") - super(FPGA, self).__init__("FPGA", model) + super(FPGA, self).__init__("FPGA", model, + platform, mode) def __repr__(self): return "FPGA (" + str(self.model) + ")" class GPU(Device): """gpu device with different models""" - def __init__(self, model): + def __init__(self, model, platform="aws_f1", mode="sim"): if model not in ["cuda", "rocm"]: raise DeviceError(model + " not supported yet") - super(GPU, self).__init__("GPU", model) + super(GPU, self).__init__("GPU", model, + platform, mode) def __repr__(self): return "GPU (" + str(self.model) + ")" +class PIM(Device): + """cpu device with different models""" + def __init__(self, model, platform="ppac", mode="sim"): + if model not in ["ppac"]: + raise DeviceError(model + " not supported yet") + super(CPU, self).__init__("PIM", model, + platform, mode) + def __repr__(self): + return "PIM (" + str(self.model) + ")" + def device_to_str(dtype): """Convert a device type to string format. diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py index a47e09115..03af1cf3e 100644 --- a/python/heterocl/schedule.py +++ b/python/heterocl/schedule.py @@ -135,8 +135,8 @@ def reuse_at(self, target, parent, axis, name=None): name = target.name + ".reuse" return self.sch.reuse_at(target, parent, axis, name) - def stream_to(self, tensors, dst, src=None, - stream_type=_expr.StreamExpr.Channel, depth=10, name=None): + def to(self, tensors, dst, src=None, + stream_type=_expr.StreamExpr.Channel, depth=10, name=None): """Stream a list of Tensors to dst devices Parameters @@ -165,8 +165,8 @@ def stream_to(self, tensors, dst, src=None, target = tensor if name is None: name = target.name + ".stream" - ret = self.sch.stream_to(target, dst, src, - stream_type, depth, name) + ret = self.sch.to(target, dst, src, + stream_type, depth, name) name = None rets.append(ret) return rets diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 20eb6aecb..1ab6bb337 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -8,6 +8,7 @@ import types from ._ffi.node import NodeBase, register_node +from ._ffi.function import register_func from ._ffi.base import _RUNTIME_ONLY from . import api from . import tensor @@ -21,6 +22,7 @@ from . import ndarray from . import target as _target from . import make +from ..devices import env class DumpIR(object): """ @@ -338,6 +340,7 @@ def lower(sch, stmt = f(stmt) # Phase 1 stmt = ir_pass.StorageFlatten(stmt, binds, 64) + stmt = ir_pass.InferStream(stmt, 32) #stmt = ir_pass.CanonicalSimplify(stmt) #TODO: SOLVE THIS!! stmt = ir_pass.LiftAllocateAttrs(stmt) if cfg.generate_reuse_buffer: @@ -380,7 +383,7 @@ def lower(sch, else: return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func) -def build_fpga_kernel(sch, args, target_name, name="default_function"): +def build_fpga_kernel(sch, args, target, name="default_function"): """Build an FPGA kernel. Parameters @@ -409,7 +412,8 @@ def build_fpga_kernel(sch, args, target_name, name="default_function"): if args is None: raise ValueError("args must be given for build from schedule") - if target_name == "merlinc": + # generate host (device) code / function + if target == "merlinc": BuildConfig.current = build_config(generate_reuse_buffer=False) else: BuildConfig.current = build_config() @@ -417,20 +421,38 @@ def build_fpga_kernel(sch, args, target_name, name="default_function"): flist = lower(sch, args, kernel_only=True, name=name) if isinstance(flist, container.LoweredFunc): flist = [flist] - fdevice = [ir_pass.LowerIntrin(x, target_name) for x in flist] + fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] + + try: # generate and split code + host = target.host.tool.source['compile'] + builder = getattr(codegen, "build_{0}".format(host)) + host_code = builder(fdevice) + findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") + host_code = host_code[findex + 6 : rindex] + + device = target.device.tool.source['compile'] + builder = getattr(codegen, "build_{0}".format(device)) + device_code = builder(fdevice) + findex, rindex = device_code.find("{device}"), device_code.rfind("{device}") + device_code = device_code[findex + 8 : rindex] + + # test build sim + @register_func + def tvm_callback_syn_postproc(code): + return "test" + builder = getattr(codegen, "build_{0}".format("sim")) + f = builder(fdevice, ["sss", "ww"], ["wwq", "swsw"]) + return f - try: - builder = getattr(codegen, "build_{0}".format(target_name)) - return builder(fdevice) except AttributeError: - raise AttributeError("Cannot find the target builder %s" % target_name) + raise AttributeError("Cannot find the target builder %s" % target) return None def build(sch, args=None, target=None, target_host=None, - name="default_function", + name="host_function", binds=None): """Build a function with arguments as signiture. @@ -470,11 +492,12 @@ def build(sch, ---- See the note on :any:`tvm.target` on target string format. """ - target = _target.current_target() if target is None else target - target = _target.create(target) if target else _target.create("llvm") - - if "fpga" in target.keys: - return build_fpga_kernel(sch, args, target.target_name, name=name) + if target and isinstance(target, str): + target = _target.current_target() if target is None else target + target = _target.create(target) if target else _target.create("llvm") + else: # platform target + assert isinstance(target, env), "unsupported target type" + return build_fpga_kernel(sch, args, target, name=name) BuildConfig.current = build_config() if isinstance(sch, schedule._Schedule): diff --git a/python/heterocl/tvm/device.py b/python/heterocl/tvm/device.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 1071b1a8f..c07532efd 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -333,9 +333,9 @@ def reuse_at(self, target, parent, axis, name): def partition(self, target, partition_type, dim, factor): return _api_internal._SchedulePartition(self, target, dim, factor, partition_type) - def stream_to(self, tensor, dst, src, - types=_expr.StreamExpr.Channel, - depth=10, name=None): + def to(self, tensor, dst, src, + types=_expr.StreamExpr.Channel, + depth=10, name=None): """ Stream data to devices or on-chip module Parameters diff --git a/python/heterocl/tvm/target.py b/python/heterocl/tvm/target.py index 19c34934a..72ea04c09 100644 --- a/python/heterocl/tvm/target.py +++ b/python/heterocl/tvm/target.py @@ -1,10 +1,3 @@ -''' -@Description: In User Settings Edit -@Author: your name -@Date: 2019-07-25 17:49:16 -@LastEditTime: 2019-08-14 16:19:52 -@LastEditors: Please set LastEditors -''' from __future__ import absolute_import import warnings diff --git a/tvm/include/tvm/codegen.h b/tvm/include/tvm/codegen.h index 3877db941..4d6be0230 100644 --- a/tvm/include/tvm/codegen.h +++ b/tvm/include/tvm/codegen.h @@ -42,6 +42,7 @@ runtime::Module Build(const Array& funcs, * \return cstr The C string representation of the file. */ std::string PackImportsToC(const runtime::Module& m, bool system_lib); + } // namespace codegen } // namespace TVM diff --git a/tvm/include/tvm/ir_pass.h b/tvm/include/tvm/ir_pass.h index 88c29f32c..dfba91d32 100644 --- a/tvm/include/tvm/ir_pass.h +++ b/tvm/include/tvm/ir_pass.h @@ -214,6 +214,14 @@ Stmt StorageFlatten(Stmt stmt, */ Stmt RemoveNoOp(Stmt stmt); +/*! + * \brief Infer device scope. + * \param stmt The stmt to be trasnformed + * \param bus_bandwidth The bandwisth of the stream bus + * \return Transformed stmt. + */ +Stmt InferStream(Stmt stmt, int bus_bandwidth); + /*! * \brief Split statement into pipeine stages. * \param stmt The stmt to be splitted diff --git a/tvm/src/api/api_pass.cc b/tvm/src/api/api_pass.cc index 348b8816e..1728b0c23 100644 --- a/tvm/src/api/api_pass.cc +++ b/tvm/src/api/api_pass.cc @@ -122,6 +122,7 @@ REGISTER_PASS1(InjectPrefetch); REGISTER_PASS2(InjectDoubleBuffer); REGISTER_PASS2(LoopPartition); REGISTER_PASS1(RemoveNoOp); +REGISTER_PASS2(InferStream); REGISTER_PASS2(SplitPipeline); REGISTER_PASS2(LiftAttrScope); REGISTER_PASS1(NarrowChannelAccess); diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc new file mode 100644 index 000000000..6fe63d986 --- /dev/null +++ b/tvm/src/codegen/build_common.cc @@ -0,0 +1,102 @@ +/*! + * Copyright (c) 2019 by Contributors + * \file build_common.cc + * \brief Build unified simulation module + */ +#include +#include +#include +#include +#include "./build_common.h" + +#include "merlinc/codeanalys_merlinc.h" +#include "hlsc/codegen_vhls.h" +#include "opencl/codegen_aocl.h" + +namespace TVM { +namespace runtime { + +class SimModuleNode final : public ModuleNode { + public: + SimModuleNode(LoweredFunc func, std::string test_file) + : func_(func), test_file_(test_file) {} + + const char* type_key() const { + return "unified_sim"; + } + + // unified simulation function + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final { + return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) + LOG(FATAL) << "The function should take in " << func_->args.size() + << " inputs but get " << args.size(); + std::vector arg_sizes; + std::vector arg_types; + std::vector shmids; + // CollectArgInfo(args, func_, arg_sizes, arg_types); + // GenSharedMem(args, shmids, arg_sizes); + // GenHostCode(args, shmids, arg_types, func_, test_file_); + // TODO: find a better way to do the following + LOG(CLEAN) << "Compiling the generated HLS C code ..."; + system("g++ main.cpp -o out"); + LOG(CLEAN) << "Running C simulation ..."; + system("./out"); + LOG(CLEAN) << "Finished C simulation"; + system("rm out main.cpp"); + FreeSharedMem(args, shmids, arg_sizes); + // extract resource information + if (const auto* f = Registry::Get("tvm_callback_syn_postproc")) { + std::string code; + code = (*f)("test").operator std::string(); + LOG(CLEAN) << "extract res info"; + } + + }); + } + + private: + LoweredFunc func_; + std::string test_file_; +}; + +Module CreateSimModule( + LoweredFunc func, + std::string code) { + std::shared_ptr n = + std::make_shared(func, code); + return Module(n); +} +} // namespace runtime + +namespace codegen { +// unified simulation function for diff platforms +runtime::Module BuildSimModule(Array funcs, + Array attrs, + Array values) { + CodeAnalysMerlinC ca; + CodeGenAOCL cg_host; + CodeGenVivadoHLS cg_dev; + for (LoweredFunc f : funcs) { + // analyze AST and collect arg info + ca.AddFunction(f); + str2tupleMap map_arg_type; + map_arg_type = ca.Finish(); + // generate kernel code + cg_host.AddFunction(f, map_arg_type); + cg_dev.AddFunction(f, map_arg_type); + } + + std::string code = cg_host.Finish(); + return runtime::CreateSimModule(funcs[0], code); +} + +TVM_REGISTER_API("codegen.build_sim") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildSimModule(args[0], args[1], args[2]); + }); + +} // namespace codegen +} // namespace TVM diff --git a/tvm/src/codegen/build_common.h b/tvm/src/codegen/build_common.h index ee8cbc509..f9f42d219 100644 --- a/tvm/src/codegen/build_common.h +++ b/tvm/src/codegen/build_common.h @@ -29,6 +29,7 @@ ExtractFuncInfo(const Array& funcs) { } return fmap; } + } // namespace codegen } // namespace TVM #endif // TVM_CODEGEN_BUILD_COMMON_H_ diff --git a/tvm/src/codegen/codegen.cc b/tvm/src/codegen/codegen.cc index 36d3f39e9..996d40744 100644 --- a/tvm/src/codegen/codegen.cc +++ b/tvm/src/codegen/codegen.cc @@ -89,5 +89,6 @@ std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { << "#endif\n"; return os.str(); } + } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index d23b95888..efc3241be 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -3,6 +3,7 @@ * \file codegen_c.cc */ #include +#include #include #include #include @@ -71,21 +72,25 @@ void CodeGenC::AddFunction(LoweredFunc f) { std::string CodeGenC::Finish() { std::ostringstream device; - device << "void top() {\n" << device_stream.str(); + device << "void top(" << arg_stream.str() + << "){\n" << device_stream.str(); if (fpga_scope_) device << stream.str(); else host_stream << stream.str(); - if (top_data_type_.size() > 0) { - int i = 0; - for (const auto & kv : top_data_type_) { - // PrintType(kv.second, host_stream); - if (i != 0) host_stream << ", "; - host_stream << kv.first; - i++; - } - host_stream << ");\n"; - } - host_stream << "}\n"; - return decl_stream.str() + module_stream.str() + host_stream.str() + device.str(); + // finish host call stmt + // if (top_data_type_.size() > 0) { + // int i = 0; + // for (const auto & kv : top_data_type_) { + // // PrintType(kv.second, host_stream); + // if (i != 0) host_stream << ", "; + // host_stream << kv.first; + // i++; + // } + // host_stream << ");\n"; + // } + device << "}\n"; + return decl_stream.str() + "\n{device}\n" + + module_stream.str() + device.str() + "\n{device}\n" + + "\n{host}\n" + host_stream.str() + "\n{host}\n"; } void CodeGenC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) @@ -814,6 +819,40 @@ void CodeGenC::VisitStmt_(const Allocate* op) { this->PrintStmt(op->body); } +// record of vars used in next scope switch +class StreamCollector final : public IRVisitor { + public: + StreamCollector(std::vector& stream_stmt_list, + std::vector& stream_expr_list, + std::string initial_scope) + : stream_stmt_list_(stream_stmt_list), + stream_expr_list_(stream_expr_list), + scope_(initial_scope) {} + + void Visit_(const StreamExpr* op) { + if (switch_on) + stream_expr_list_.push_back(op); + } + + void Visit_(const StreamStmt* op) { + if (switch_on) + stream_stmt_list_.push_back(op); + } + + void Visit_(const AttrStmt* op) { + if (op->attr_key == attr::device_scope && + op->value.as()->value == scope_) + switch_on = false; + this->Visit(op->body); + } + + private: + std::vector& stream_stmt_list_; + std::vector& stream_expr_list_; + std::string scope_; + bool switch_on{true}; +}; + void CodeGenC::VisitStmt_(const AttrStmt* op) { if (op->attr_key == ir::attr::thread_extent) { IterVar iv(op->node.node_); @@ -831,25 +870,64 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { CHECK(v); volatile_buf_.insert(v); } else if (op->attr_key == ir::attr::device_scope) { + // print top( ... in host and enter fpga scope if (op->value.as()->value == "fpga" && !fpga_scope_) { fpga_scope_ = true; - // call top function PrintIndent(); + + // track the stream usage + std::vector stream_stmts; + std::vector stream_exprs; + StreamCollector collector(stream_stmts, stream_exprs, "cpu"); + collector.Visit(op->body); + + // generte function calls stream << "top("; + int index = 0; + for (auto op : stream_stmts) { + if (index !=0) stream << ", "; + std::string vid = op->buffer_var.get()->name_hint; + stream << vid; + if (vid.find("stream_in") != std::string::npos || + vid.find("stream_out") != std::string::npos) { + if (index !=0) arg_stream << ", "; + PrintType(op->buffer_var.type(), arg_stream); + arg_stream << vid; + } + index++; + } + for (auto op : stream_exprs) { + if (index !=0) stream << ", "; + std::string vid = op->buffer_var.get()->name_hint; + stream << op->buffer_var.get()->name_hint; + if (vid.find("stream_in") != std::string::npos || + vid.find("stream_out") != std::string::npos) { + if (index !=0) arg_stream << ", "; + PrintType(op->buffer_var.type(), arg_stream); + arg_stream << vid; + } + index++; + } + stream << ");\n"; + + // switch context to device scope host_stream << this->stream.str(); this->stream.str(""); this->stream.clear(); - } else if (op->value.as()->value == "cpu" && fpga_scope_) { + + // swtich from device to host + } else if (op->value.as()->value == "cpu" && + fpga_scope_) { fpga_scope_ = false; - // add arguments after fpga block finished - int i = 0; - for (const auto & kv : top_data_type_) { - PrintType(kv.second, host_stream); - if (i != 0) stream << ","; - host_stream << " " << kv.first; - i++; - } - host_stream << ");\n"; + // add args after fpga block exited + // int i = 0; + // for (const auto & kv : top_data_type_) { + // PrintType(kv.second, host_stream); + // if (i != 0) stream << ","; + // host_stream << " " << kv.first; + // i++; + // } + // host_stream << ");\n"; device_stream << this->stream.str(); this->stream.str(""); this->stream.clear(); diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index b77d1b657..503d6cef5 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -204,14 +204,12 @@ class CodeGenC : const std::string& target, const std::string& src, Type t) final; /*! \brief restrict keyword */ std::string restrict_keyword_{""}; + /*! \brief the func arg decl stream */ + std::ostringstream arg_stream; /*! \brief the storage scope of allocation */ std::unordered_map alloc_storage_scope_; /*! \brief the data type of allocated buffers */ std::unordered_map handle_data_type_; - /*! \brief the data type array for kernels */ - std::unordered_map> kernel_data_type_; - /*! \brief the data type array for top functions */ - std::unordered_map top_data_type_; std::unordered_map buf_length_map_; // save for kernel gen @@ -225,8 +223,6 @@ class CodeGenC : private: /*! \brief whether to print in SSA form */ bool print_ssa_form_{false}; - /*! \brief whether generate code for fpga */ - bool fpga_scope_{false}; /*! \brief set of volatile buf access */ std::unordered_set volatile_buf_; }; diff --git a/tvm/src/codegen/codegen_source_base.cc b/tvm/src/codegen/codegen_source_base.cc index 28c5b99f0..9fc6fc706 100644 --- a/tvm/src/codegen/codegen_source_base.cc +++ b/tvm/src/codegen/codegen_source_base.cc @@ -8,19 +8,22 @@ namespace TVM { namespace codegen { void CodeGenSourceBase::ClearFuncState() { - name_alloc_map_.clear(); + host_name_alloc_map_.clear(); + device_name_alloc_map_.clear(); ssa_assign_map_.clear(); var_idmap_.clear(); scope_mark_.clear(); } void CodeGenSourceBase::SaveFuncState() { - name_alloc_map_save.clear(); + host_name_alloc_map_save.clear(); + device_name_alloc_map_save.clear(); ssa_assign_map_save.clear(); var_idmap_save.clear(); scope_mark_save.clear(); // save state into private member - name_alloc_map_save = name_alloc_map_; + host_name_alloc_map_save = host_name_alloc_map_; + device_name_alloc_map_save = device_name_alloc_map_; ssa_assign_map_save = ssa_assign_map_; var_idmap_save = var_idmap_; scope_mark_save = scope_mark_; @@ -28,7 +31,8 @@ void CodeGenSourceBase::SaveFuncState() { void CodeGenSourceBase::RestoreFuncState() { this->ClearFuncState(); - name_alloc_map_ = name_alloc_map_save; + host_name_alloc_map_ = host_name_alloc_map_save; + device_name_alloc_map_ = device_name_alloc_map_save; ssa_assign_map_ = ssa_assign_map_save; var_idmap_ = var_idmap_save; scope_mark_ = scope_mark_save; @@ -38,24 +42,45 @@ std::string CodeGenSourceBase::GetUniqueName(std::string prefix) { for (size_t i = 0; i < prefix.size(); ++i) { if (prefix[i] == '.') prefix[i] = '_'; } - auto it = name_alloc_map_.find(prefix); - if (it != name_alloc_map_.end()) { - while (true) { - std::ostringstream os; - os << prefix << (++it->second); - std::string name = os.str(); - if (name_alloc_map_.count(name) == 0) { - prefix = name; - break; + if (fpga_scope_) { + auto it = device_name_alloc_map_.find(prefix); + if (it != device_name_alloc_map_.end()) { + while (true) { + std::ostringstream os; + os << prefix << (++it->second); + std::string name = os.str(); + if (device_name_alloc_map_.count(name) == 0) { + prefix = name; + break; + } } } + device_name_alloc_map_[prefix] = 0; + return prefix; + } else { + auto it = host_name_alloc_map_.find(prefix); + if (it != host_name_alloc_map_.end()) { + while (true) { + std::ostringstream os; + os << prefix << (++it->second); + std::string name = os.str(); + if (host_name_alloc_map_.count(name) == 0) { + prefix = name; + break; + } + } + } + host_name_alloc_map_[prefix] = 0; + return prefix; } - name_alloc_map_[prefix] = 0; - return prefix; } std::string CodeGenSourceBase::SSAGetID(std::string src, Type t) { - if (name_alloc_map_.count(src)) return src; + if (fpga_scope_) { + if (device_name_alloc_map_.count(src)) return src; + } else { + if (host_name_alloc_map_.count(src)) return src; + } auto it = ssa_assign_map_.find(src); if (it != ssa_assign_map_.end()) { if (scope_mark_.at(it->second.scope_id)) { diff --git a/tvm/src/codegen/codegen_source_base.h b/tvm/src/codegen/codegen_source_base.h index 9bc90f33f..befc3f8ec 100644 --- a/tvm/src/codegen/codegen_source_base.h +++ b/tvm/src/codegen/codegen_source_base.h @@ -101,21 +101,26 @@ class CodeGenSourceBase { std::ostringstream device_stream; /*! \brief name of each variable */ std::unordered_map var_idmap_; - /*! \brief Save states as copy */ + /*! \brief save states as copy */ std::unordered_map var_idmap_save; + /*! \brief whether generate code for fpga */ + bool fpga_scope_{false}; + /*! \brief name allocation map for host */ + std::unordered_map host_name_alloc_map_; + /*! \brief name allocation map for device */ + std::unordered_map device_name_alloc_map_; private: /*! \brief assignment map of ssa */ std::unordered_map ssa_assign_map_; - /*! \brief name allocation map */ - std::unordered_map name_alloc_map_; /*! \brief array to check whether we are inside certain scope */ std::vector scope_mark_; /*! \brief The current indentation value */ int indent_{0}; /*! \brief Save states as copy */ std::unordered_map ssa_assign_map_save; - std::unordered_map name_alloc_map_save; + std::unordered_map host_name_alloc_map_save; + std::unordered_map device_name_alloc_map_save; std::vector scope_mark_save; }; diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index 5ff7afb0d..589ea25b6 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -164,36 +164,53 @@ void CodeGenHLSC::VisitStmt_(const IfThenElse* op) { } void CodeGenHLSC::VisitStmt_(const Allocate* op) { - CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - var_shape_map_[buffer] = op->extents; - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); + const Variable* v = op->buffer_var.get(); + std::string key = v->name_hint; + for (size_t i = 0; i < key.size(); ++i) + if (key[i] == '.') key[i] = '_'; + + // reuse host var & extract StreamExpr + if (!fpga_scope_ && host_name_alloc_map_.count(key)) { + this->PrintIndent(); + stream << "hls::stream<> read\n"; + this->PrintStmt(op->body); + } else { + CHECK(!is_zero(op->condition)); + std::string vid = AllocVarID(op->buffer_var.get()); + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + var_shape_map_[buffer] = op->extents; + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); - // remove kernel alloc - if (true) { - PrintType(op->type, stream); - stream << ' '<< vid; - if (constant_size > 1) {// Transfer length one array to scalar - for (size_t i = 0; i < op->extents.size(); i++) { - stream << '['; - PrintExpr(op->extents[i], stream); - stream << "]"; + // initlize hls stream channel + if (vid.find("stream_in") != std::string::npos || + vid.find("stream_out") != std::string::npos) { + stream << "hls::stream<"; + PrintType(op->type, stream); + stream << "> " << vid << ";\n"; + } else { + PrintType(op->type, stream); + stream << ' '<< vid; + if (constant_size > 1) {// Transfer length one array to scalar + for (size_t i = 0; i < op->extents.size(); i++) { + stream << '['; + PrintExpr(op->extents[i], stream); + stream << "]"; + } } + stream << ";\n"; } - stream << ";\n"; - } - buf_length_map_[buffer] = constant_size; - RegisterHandleType(op->buffer_var.get(), op->type); - for (size_t i = 0; i < op->attrs.size(); i++) { - this->PrintStmt(op->attrs[i]); + buf_length_map_[buffer] = constant_size; + RegisterHandleType(op->buffer_var.get(), op->type); + for (size_t i = 0; i < op->attrs.size(); i++) { + this->PrintStmt(op->attrs[i]); + } + this->PrintStmt(op->body); } - this->PrintStmt(op->body); } } // namespace codegen diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index 4cfbc8677..37c884f15 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -24,9 +24,9 @@ namespace codegen { void CodeGenVivadoHLS::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Write header files - this->stream << "#include \n"; - this->stream << "#include \n"; - this->stream << "#include \n\n"; + this->decl_stream << "#include \n"; + this->decl_stream << "#include \n"; + this->decl_stream << "#include \n\n"; CodeGenHLSC::AddFunction(f, map_arg_type); if (soda_header_.is_open()) soda_header_.close(); @@ -144,28 +144,31 @@ void CodeGenVivadoHLS::VisitStmt_(const Partition* op) { } void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { - std::string vid = GetVarID(op->buffer_var.get()); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); os << vid << ".read()"; } void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { - std::string vid = GetVarID(op->buffer_var.get()); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); PrintIndent(); stream << vid; switch (op->stream_type) { case StreamType::Channel: - stream << "[channel]"; break; case StreamType::FIFO: - stream << "[fifo]"; break; case StreamType::Pipe: - stream << "[pipe]"; break; } - stream << ".write"; + stream << ".write("; PrintExpr(op->value, stream); - stream << ";\n"; + stream << ");\n"; } class AllocateCollector final : public IRVisitor { diff --git a/tvm/src/codegen/hlsc/vhls_module.cc b/tvm/src/codegen/hlsc/vhls_module.cc index fd28234db..7355c7894 100644 --- a/tvm/src/codegen/hlsc/vhls_module.cc +++ b/tvm/src/codegen/hlsc/vhls_module.cc @@ -345,9 +345,9 @@ class VivadoHLSModuleNode final : public ModuleNode { GenHostCode(args, shmids, arg_types, func_, test_file_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; - // system("g++ main.cpp -o out"); + system("g++ main.cpp -o out"); LOG(CLEAN) << "Running C simulation ..."; - // system("./out"); + system("./out"); LOG(CLEAN) << "Finished C simulation"; // system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); @@ -370,4 +370,4 @@ Module CreateVivadoHLSModule( } } // namespace runtime -} // namespace TVM \ No newline at end of file +} // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index fd16716be..4a503d80b 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -209,7 +209,10 @@ void CodeGenAOCL::VisitStmt_(const For* op) { } void CodeGenAOCL::VisitExpr_(const StreamExpr* op, std::ostream& os) { - std::string vid = GetVarID(op->buffer_var.get()); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); int i = 0; for (auto key : op->annotate_keys) { auto str = key.as(); @@ -264,7 +267,6 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { stream_vars.insert(op->channels[j]); stream_exprs.insert(op->channels[j].get()->name_hint); } - std::vector types; for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; var_shape_map_[v.get()] = op->api_args[i]; @@ -284,11 +286,9 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { std::string str = PrintExpr(op->api_types[i]); Type type = String2Type(str); PrintType(type, stream); - types.push_back(type); this->stream << "* restrict " << vid; } } - kernel_data_type_[op->name] = types; stream << ") {\n"; int func_scope = BeginScope(); range_ = CollectIterRange(op->body); @@ -316,8 +316,6 @@ void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { else stream << ", "; } PrintExpr(op->args[i], stream); - Type type = kernel_data_type_[op->name][i]; - top_data_type_[PrintExpr(op->args[i])] = type; } } stream << ");\n"; @@ -334,15 +332,16 @@ void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT else stream << ", "; } PrintExpr(op->args[i], stream); - Type type = kernel_data_type_[op->name][i]; - top_data_type_[PrintExpr(op->args[i])] = type; } } os << ")"; } void CodeGenAOCL::VisitStmt_(const StreamStmt* op) { - std::string vid = GetVarID(op->buffer_var.get()); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); PrintIndent(); int i = 0; for (auto key : op->annotate_keys) { diff --git a/tvm/src/pass/split_host_device.cc b/tvm/src/pass/split_host_device.cc index 534e0b695..fdcd0c56f 100644 --- a/tvm/src/pass/split_host_device.cc +++ b/tvm/src/pass/split_host_device.cc @@ -81,6 +81,14 @@ class IRUseDefAnalysis : public IRMutator { return IRMutator::Mutate_(op, s); } + Stmt Mutate_(const StreamStmt *op, const Stmt& s) final { + if (!def_count_.count(op->buffer_var.get())) { + def_count_[op->buffer_var.get()] = 0; + use_count_[op->buffer_var.get()] = 0; + } + return IRMutator::Mutate_(op, s); + } + Expr Mutate_(const Let *op, const Expr& e) final { this->HandleDef(op->var.get()); Expr body = this->Mutate(op->body); @@ -109,6 +117,14 @@ class IRUseDefAnalysis : public IRMutator { return IRMutator::Mutate_(op, e); } + Expr Mutate_(const StreamExpr *op, const Expr& e) final { + if (!def_count_.count(op->buffer_var.get())) { + def_count_[op->buffer_var.get()] = 0; + use_count_[op->buffer_var.get()] = 0; + } + return IRMutator::Mutate_(op, e); + } + Stmt Mutate_(const KernelDef *op, const Stmt& s) { for (auto arg : op->args) { this->HandleDef(arg.get()); diff --git a/tvm/src/pass/stream_deduce.cc b/tvm/src/pass/stream_deduce.cc deleted file mode 100644 index 86405f783..000000000 --- a/tvm/src/pass/stream_deduce.cc +++ /dev/null @@ -1,118 +0,0 @@ -/*! - * Copyright (c) 2019 by Contributors - * \file remove_no_op.cc - * \brief Remove no op from the stmt - */ -#include -#include -#include -#include - -namespace TVM { -namespace ir { - -// Mark the statment of each stage. -class StreamInferer : public IRMutator { - public: - Stmt Mutate_(const LetStmt* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - return is_no_op(op->body) ? MakeEvaluate(op->value) : stmt; - } - Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { - if (op->attr_key == ir::attr::pragma_scope) { - const std::string& pname = op->value.as()->value; - if (pname == "debug_skip_region") { - return MakeEvaluate(0); - } - } - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - return is_no_op(op->body) ? MakeEvaluate(op->value) : stmt; - } - Stmt Mutate_(const IfThenElse* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - if (op->else_case.defined()) { - if (is_no_op(op->else_case)) { - if (is_no_op(op->then_case)) { - return MakeEvaluate(op->condition); - } else { - return IfThenElse::make(op->condition, op->then_case); - } - } else { - return stmt; - } - } else { - if (is_no_op(op->then_case)) { - return MakeEvaluate(op->condition); - } else { - return stmt; - } - } - } - Stmt Mutate_(const For* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - return is_no_op(op->body) ? MakeEvaluate({op->min, op->extent}) : stmt; - } - Stmt Mutate_(const Allocate* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - return is_no_op(op->body) ? MakeEvaluate(op->extents) : stmt; - } - Stmt Mutate_(const ProducerConsumer* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - return is_no_op(op->body) ? op->body : stmt; - } - Stmt Mutate_(const Realize* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - return is_no_op(op->body) ? op->body : stmt; - } - Stmt Mutate_(const Evaluate* op, const Stmt& s) final { - if (HasSideEffect(op->value)) return s; - return Evaluate::make(0); - } - Stmt Mutate_(const Block* op, const Stmt& s) final { - Stmt stmt = IRMutator::Mutate_(op, s); - op = stmt.as(); - if (is_no_op(op->first)) { - return op->rest; - } else if (is_no_op(op->rest)) { - return op->first; - } else { - return stmt; - } - } - - private: - Stmt MakeEvaluate(Expr value) { - if (HasSideEffect(value)) { - return Evaluate::make(value); - } else { - return Evaluate::make(0); - } - } - Stmt MakeEvaluate(const Array& values) { - Stmt stmt; - for (Expr e : values) { - if (HasSideEffect(e)) { - if (stmt.defined()) { - stmt = Block::make(stmt, Evaluate::make(e)); - } else { - stmt = Evaluate::make(e); - } - } - } - return stmt.defined() ? stmt : Evaluate::make(0); - } -}; - -Stmt InferStream(Stmt stmt) { - return StreamInferer().Mutate(stmt); -} - -} // namespace ir -} // namespace TVM diff --git a/tvm/src/pass/stream_inference.cc b/tvm/src/pass/stream_inference.cc new file mode 100644 index 000000000..9afe136d2 --- /dev/null +++ b/tvm/src/pass/stream_inference.cc @@ -0,0 +1,204 @@ +/*! + * Copyright (c) 2019 by Contributors + * \file remove_no_op.cc + * \brief Remove no op from the stmt + */ +#include +#include +#include +#include + +namespace TVM { +namespace ir { + +class StreamMutator : public IRMutator { + public: + explicit StreamMutator(int bus_bandwidth) { + bus_bandwidth_ = bus_bandwidth; + } + // move device attr to allocate level + Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + // if (op->attr_key == attr::device_scope) + // return stmt.as()->body; + return stmt; + } + + Stmt Mutate_(const For* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + auto extent = op->extent.as()->value; + auto min = op->min.as()->value; + // mutate sender: split and block inner loop + if (auto stream_op = op->body.as()) { + if (extent - min > bus_bandwidth_) { + LOG(WARNING) << "large"; + } else { + } + // mutate receiver : (StreamExpr + For(Store = GetSlice)) + } else if (auto store_op = op->body.as()) { + if (store_op->value.as() == nullptr) return stmt; + if (extent - min > bus_bandwidth_) { + LOG(WARNING) << "large"; + } else { + return stmt; + // allocate intermediate buffer + VarExpr new_var(store_op->buffer_var.get()->name_hint + "_save"); + Expr new_load = Load::make(store_op->buffer_var.type(), new_var, 0, const_true()); + Stmt new_store = Store::make(store_op->buffer_var, new_load, + store_op->index, store_op->predicate); + Stmt new_for = For::make(op->loop_var, op->min, op->extent, op->for_type, + op->device_api, new_store); + // save stream data into intermediate buffer + Stmt read_in = Store::make(new_var, store_op->value, + Expr(0), const_true()); + // allocate intermediate buffer + return Allocate::make(new_var, + store_op->value.type(), + {make_const(Int(bus_bandwidth_), 1)}, + const_true(), Block::make(read_in, new_for)); + } + } + return stmt; + } + + Stmt Mutate_(const StreamStmt* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + const Variable* v = op->buffer_var.get(); + stream_type_map_[v] = op->buffer_var.type(); + return stmt; + } + + Expr Mutate_(const StreamExpr* op, const Expr& e) final { + Expr expr = IRMutator::Mutate_(op, e); + op = expr.as(); + const Variable* v = op->buffer_var.get(); + stream_type_map_[v] = op->buffer_var.type(); + return expr; + } + private: + int bus_bandwidth_; + bool is_host_{true}; + std::unordered_map stream_type_map_; +}; + +// Mark the statment scope of each stage. +class StreamInferer : public IRMutator { + public: + explicit StreamInferer(int bus_bandwidth) { + bus_bandwidth_ = bus_bandwidth; + } + + Stmt Mutate_(const Allocate* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + if (auto block = op->body.as()) { + if (auto producer = block->first.as()){ + if (const AttrStmt* attr_stmt = producer->body.as()) { + if (const AttrStmt* device_attr = attr_stmt->body.as()) { + if (device_attr->attr_key == attr::device_scope) { + // mutate allocate body + StreamMutator mutator(bus_bandwidth_); + // allocate stream for host + Stmt new_body = mutator.Mutate(op->body); + Stmt new_stmt = Allocate::make(op->buffer_var, + op->type, + op->extents, + op->condition, + new_body); + return AttrStmt::make(device_attr->node, + attr::device_scope, + device_attr->value, + new_stmt); + } + } + } + } + } + return stmt; + } + + // Stmt Mutate_(const ProducerConsumer* op, const Stmt& s) final { + // Stmt stmt = IRMutator::Mutate_(op, s); + // op = stmt.as(); + // return is_no_op(op->body) ? op->body : stmt; + // } + + // Stmt Mutate_(const Store* op, const Stmt& s) final { + // Stmt stmt = IRMutator::Mutate_(op, s); + // op = stmt.as(); + // auto it = var_remap_.find(op->buffer_var.get()); + // if (it != var_remap_.end() && + // !it->second.same_as(op->buffer_var)) { + // CHECK(it->second.as()); + // VarExpr buf_var(it->second.node_); + // if (has_stencil_) outputs_.insert(buf_var); + // return Store::make(buf_var, op->value, op->index, op->predicate); + // } else { + // return stmt; + // } + // } + + // Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { + // if (op->attr_key == attr::realize_scope) { + // storage_scope_[op->node.get()] = op->value.as()->value; + // return this->Mutate(op->body); + // } else if (op->attr_key == attr::double_buffer_scope) { + // Operation func(op->node.node_); + // Stmt body = Mutate(op->body); + // for (int i = 0; i < func->num_outputs(); ++i) { + // TensorKey key{func, i}; + // auto it = buf_map_.find(key); + // CHECK(it != buf_map_.end()) + // << "Cannot find allocated buffer for " << key.f; + // body = AttrStmt::make( + // it->second.buffer->data, op->attr_key, op->value, body); + // } + // return body; + // } else if (op->attr_key == attr::thread_extent) { + // IterVar iv(op->node.node_); + // ThreadScope ts = ThreadScope::make(iv->thread_tag); + // curr_thread_scope_.push_back(ts); + // Stmt stmt = IRMutator::Mutate_(op, s); + // curr_thread_scope_.pop_back(); + // return stmt; + // } else if (op->attr_key == attr::buffer_bind_scope) { + + // Stmt Mutate_(const For* op, const Stmt& s) final { + // Stmt stmt = IRMutator::Mutate_(op, s); + // op = stmt.as(); + // return is_no_op(op->body) ? MakeEvaluate({op->min, op->extent}) : stmt; + // } + + private: + int bus_bandwidth_; + Stmt MakeEvaluate(Expr value) { + if (HasSideEffect(value)) { + return Evaluate::make(value); + } else { + return Evaluate::make(0); + } + } + Stmt MakeEvaluate(const Array& values) { + Stmt stmt; + for (Expr e : values) { + if (HasSideEffect(e)) { + if (stmt.defined()) { + stmt = Block::make(stmt, Evaluate::make(e)); + } else { + stmt = Evaluate::make(e); + } + } + } + return stmt.defined() ? stmt : Evaluate::make(0); + } +}; + +Stmt InferStream(Stmt stmt, + int bus_bandwidth) { + return StreamInferer(bus_bandwidth).Mutate(stmt); +} + +} // namespace ir +} // namespace TVM diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 5ac5b12b9..9250950ac 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -412,37 +412,87 @@ Tensor Schedule::move_to(const Tensor& target, consumers.push_back(target_stage); } - // build producer stage - Array producer_inputs; - Array producer_input_placeholders; - Array producer_output_placeholders; - std::string producer_name = target_buffer->name + ".stream_out"; - Buffer producer_buffer = BufferNode::make(Var(producer_name, Handle()), + // build consumer (sender) stage + Array consumer_inputs; + Array consumer_input_placeholders; + Array consumer_output_placeholders; + std::string consumer_name = target_buffer->name + ".stream_in"; + Buffer consumer_buffer = BufferNode::make(Var(consumer_name, Handle()), target->dtype, target->shape, Array(), Expr(), - producer_name, + consumer_name, "", 0, 0); - producer_inputs.push_back(target); - producer_input_placeholders.push_back(target_buffer); - producer_output_placeholders.push_back(producer_buffer); + consumer_inputs.push_back(target); + consumer_input_placeholders.push_back(target_buffer); + consumer_output_placeholders.push_back(consumer_buffer); + + // var.write(input_placeholder) + std::vector csm_indices; + std::vector csm_loop_vars; + for (size_t i = 0; i < target->shape.size(); i++) { + VarExpr iter("i" + std::to_string(i)); + csm_indices.push_back(iter); + csm_loop_vars.push_back(iter); + } + Expr csm_index = getIndex(csm_indices, target->shape); + Stmt consumer_body = StreamStmt::make(VarExpr(consumer_buffer->data), + csm_index, + stream_type, + channel_depth); + for (size_t j = 0; j < target->shape.size(); j++) { + consumer_body = For::make( + VarExpr(csm_loop_vars[j]), + 0, target->shape[j], + ForType::Serial, + DeviceAPI::None, + consumer_body); + } + // create new stage and return stream tensors + auto n = std::make_shared(); + n->name = consumer_name; + n->body = consumer_body; + n->inputs = consumer_inputs; + n->input_placeholders = consumer_input_placeholders; + n->output_placeholders = consumer_output_placeholders; + Operation consumer_op(n); + Stage consumer_stage = Stage(consumer_op); + size_t consumer_pos = FindNodeRef(stages, target_stage); + stages->data.insert(stages->data.begin() + consumer_pos, consumer_stage.node_); + (*this)->stage_map.Set(consumer_op, consumer_stage); + + // build producer (receiver) stage + Array producer_inputs; + Array producer_input_placeholders; + Array producer_output_placeholders; + std::string producer_name = target_buffer->name + ".stream_out"; + // Buffer producer_buffer = BufferNode::make(Var(target_buffer->name, Handle()), + // target->dtype, + // target->shape, + // Array(), + // Expr(), + // target_buffer->name, + // "", 0, 0); + // producer_inputs.push_back(consumer); + // producer_input_placeholders.push_back(consumer_buffer); + producer_output_placeholders.push_back(target_buffer); // streaming producer tensor reading from placeholder Expr stream = StreamExpr::make(target->dtype, - VarExpr(target_buffer->data), + VarExpr(consumer_buffer->data), stream_type, channel_depth); // create for loops for tensor init std::vector indices; std::vector loop_vars; for (size_t i = 0; i < target->shape.size(); i++) { - VarExpr iter(producer_name + std::to_string(i)); + VarExpr iter("i" + std::to_string(i)); indices.push_back(iter); loop_vars.push_back(iter); } Expr index = getIndex(indices, target->shape); // store op initialized with Variable node - Stmt for_stmt = Store::make(VarExpr(producer_buffer->data), + Stmt for_stmt = Store::make(VarExpr(target_buffer->data), stream, index, UIntImm::make(UInt(1), 1)); for (size_t j = 0; j < target->shape.size(); j++) { @@ -465,10 +515,11 @@ Tensor Schedule::move_to(const Tensor& target, device = StringImm::make("gpu"); break; } + // attr annotates new scope Stmt body = AttrStmt::make( - VarExpr(producer_buffer.node_), + VarExpr(target_buffer.node_), "device_scope", device, for_stmt); - Tensor producer = ExternOpNode::make(producer_name, + Tensor producer = ExternOpNode::make(target_buffer->name, "", Array(), producer_inputs, @@ -488,14 +539,14 @@ Tensor Schedule::move_to(const Tensor& target, Array new_inputs; Array new_input_placeholders; const ExternOpNode* op = s->op.as(); - new_inputs.push_back(producer); - new_input_placeholders.push_back(producer_buffer); + // new_inputs.push_back(producer); + // new_input_placeholders.push_back(producer_buffer); for (size_t j = 0; j < op->inputs.size(); j++) { new_inputs.push_back(op->inputs[j]); new_input_placeholders.push_back(op->input_placeholders[j]); } Stmt new_body = AttrStmt::make( - VarExpr(producer_buffer.node_), + VarExpr(target_buffer.node_), "device_scope", device, op->body); From 5a3112e6dd90d25d2153053db0e444788316a9c4 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 15 Oct 2019 22:41:16 -0400 Subject: [PATCH 083/103] [Add] Interface prag,a for SDx sim --- python/heterocl/devices.py | 9 +- python/heterocl/tvm/build_module.py | 13 +- tvm/src/codegen/build_common.cc | 712 +++++++++++++++++++++++++++- tvm/src/codegen/codegen_c.cc | 20 +- tvm/src/codegen/codegen_c.h | 10 + 5 files changed, 743 insertions(+), 21 deletions(-) diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 283d48afe..95d3cea6f 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -75,14 +75,16 @@ def __init__(self, types, model, platform, mode): self.mapping["source"] = { "lang": "opencl", "compile" : "aocl", "options" : "" } + self.mapping["sim"] = { "env" : "sdaccel", + "compile" : "xcpp" } if types == "FPGA": self.mapping["source"] = { "lang": "hlsc", "compile" : "vhls", "options" : "" } self.mapping["sim"] = {} self.mapping["co-sim"] = {} - self.mapping["syn"] = { "compile" : "vivado_hls", - "callback": ""} + self.mapping["syn"] = { "compile" : "vivado_hls", + "callback" : ""} self.mapping[""] = {} else: # implementation pass @@ -112,6 +114,9 @@ def __init__(self, types, model, platform, mode): self.model = model self.tool = Tooling(types, model, platform, mode) + def __getattr__(self, key): + return self.tool.__getattr__(key) + class CPU(Device): """cpu device with different models""" def __init__(self, model, platform="aws_f1", mode="sim"): diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 1ab6bb337..0f80d0e3f 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -424,13 +424,13 @@ def build_fpga_kernel(sch, args, target, name="default_function"): fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] try: # generate and split code - host = target.host.tool.source['compile'] + host = target.host.source['compile'] builder = getattr(codegen, "build_{0}".format(host)) host_code = builder(fdevice) findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") host_code = host_code[findex + 6 : rindex] - device = target.device.tool.source['compile'] + device = target.device.source['compile'] builder = getattr(codegen, "build_{0}".format(device)) device_code = builder(fdevice) findex, rindex = device_code.find("{device}"), device_code.rfind("{device}") @@ -440,9 +440,12 @@ def build_fpga_kernel(sch, args, target, name="default_function"): @register_func def tvm_callback_syn_postproc(code): return "test" - builder = getattr(codegen, "build_{0}".format("sim")) - f = builder(fdevice, ["sss", "ww"], ["wwq", "swsw"]) - return f + + if target.mode == "source": return device_code + host_code + elif target.mode == "sim": + builder = getattr(codegen, "build_{0}".format("sim")) + f = builder(fdevice, ["s"], ["wwq", "swsw"]) + return f except AttributeError: raise AttributeError("Cannot find the target builder %s" % target) diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 6fe63d986..087e2fe0f 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -9,6 +9,12 @@ #include #include "./build_common.h" +#include +#include +#include +#include +#include + #include "merlinc/codeanalys_merlinc.h" #include "hlsc/codegen_vhls.h" #include "opencl/codegen_aocl.h" @@ -16,10 +22,682 @@ namespace TVM { namespace runtime { +void PrintIndent(std::ofstream& stream, int indent) { + for (int i = 0; i < indent; i++) + stream << ' '; +} + +inline size_t GetTypeSize(TVMType t) { + size_t byte = (t.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + return byte; +} + +inline size_t GetDataSize(TVMArray* arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= arr->shape[i]; + } + size_t byte = (arr->dtype.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + size *= (byte * 8 * arr->dtype.lanes + 7) / 8; + return size; +} + +inline TVMType Type2TVMType(Type t) { + TVMType tt; + if (t.is_int()) tt.code = kDLInt; + else if (t.is_uint()) tt.code = kDLUInt; + else if (t.is_float()) tt.code = kDLFloat; + else LOG(FATAL) << "Unacceptable type: " << t; + tt.bits = static_cast(t.bits()); + tt.fracs = static_cast(t.fracs()); + return tt; +} + +inline std::string Type2Str(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) str += "ap_fixed<"; + else str += "ap_int<"; + str += std::to_string(static_cast(t.bits)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) str += "ap_ufixed<"; + else str += "ap_uint<"; + str += std::to_string(static_cast(t.bits)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2ExtStr(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) str += "ap_fixed<"; + else str += "ap_int<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) str += "ap_ufixed<"; + else str += "ap_uint<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2WrapStr(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) { + str += "ap_fixed<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + } else { + str += "ap_int<"; + if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + } + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) { + str += "ap_ufixed<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + } else { + str += "ap_uint<"; + if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + } + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2Byte(TVMType t) { + std::string str = ""; + if (t.code == kDLFloat) { + str += "float"; + } else if (t.code == kDLInt || t.code == kDLUInt) { + if (t.code == kDLUInt) str += "u"; + str += "int"; + if (t.bits <= 8) str += "8"; + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + str += "_t"; + } + return str; +} + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + arg_sizes.push_back(GetDataSize(arr)); + arg_types.push_back(arr->dtype); + } else { + const Variable* var = func->api_args[i].as(); + TVMType t = Type2TVMType(var->type); + arg_sizes.push_back(GetTypeSize(t)); + arg_types.push_back(t); + } + } +} + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} + +// copy values from the shared mem to local mem +void PrintCopy(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr) { + for (int i = 0; i < arr->ndim; i++) { + PrintIndent(stream, indent); + stream << "for (size_t i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + if (i == arr->ndim-1) { + PrintIndent(stream, indent); + // stream << "arg_top_" << nth_arr; + // for (int j = 0; j < arr->ndim; j++) { + // stream << "[i" << j << "]"; + // } + + stream << "arg_top_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul2 = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul2 *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul2; + } + stream << "]"; + + + stream << " = ("; + // stream << Type2ExtStr(arr->dtype); + stream << Type2Byte(arr->dtype); + + stream << ")(arg_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << "])"; + if (arr->dtype.fracs > 0) + stream << " >> " << static_cast(arr->dtype.fracs); + stream << ";\n"; + } + } + for (int i = 0; i < arr->ndim; i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } +} + +// copy values from local mem back to shared mem +void PrintCopyBack(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr) { + for (int i = 0; i < arr->ndim; i++) { + PrintIndent(stream, indent); + stream << "for (size_t i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + if (i == arr->ndim-1) { + PrintIndent(stream, indent); + stream << "arg_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << "] = ("; + // stream << Type2ExtStr(arr->dtype); + stream << Type2Byte(arr->dtype); + stream << ")(arg_top_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul2 = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul2 *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul2; + } + + stream << "])"; + + // for (int j = 0; j < arr->ndim; j++) { + // stream << "[i" << j << "]"; + // } + // stream << ")"; + if (arr->dtype.fracs > 0) + stream << " << " << static_cast(arr->dtype.fracs); + stream << ";\n"; + } + } + for (int i = 0; i < arr->ndim; i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } +} + +void GenKernelCode(std::string test_file) { + std::ofstream stream; + // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/knn_vhls.cpp"); + stream.open("knn_vhls_auto.cpp"); + stream << test_file; + stream.close(); +} + +// interface pragma to specify mem and ctrl interface in sdx +void GenWrapperCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func) { + std::ofstream stream; + // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/digitrec.cpp"); + int indent = 0; + stream.open("digitrec.cpp"); + stream << "#include \n"; + stream << "#include \"/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/knn_vhls.cpp\"\n"; + stream << "\n\n"; + stream << "extern \"C\" \n"; + stream << "{\n"; + indent += 2; + PrintIndent(stream, indent); + stream << "void DigitRec( "; + for (int i = 0;i < args.size();i++) { + if (i!=args.size() - 1) { + stream << Type2WrapStr(arg_types[i]); + stream << "*"; + stream << " source_wrapper_" << i; + stream << ", "; + } else { + stream << Type2WrapStr(arg_types[i]); + stream << "*"; + stream << " source_wrapper_" << i; + stream << " ) {\n"; + } + } + stream << "\n\n"; + PrintIndent(stream, indent); + for (int i = 0;i < args.size();i++) { + stream << "#pragma HLS INTERFACE m_axi port= "; + stream << "source_wrapper_" << i; + stream << " offset=slave bundle=gmem\n"; + } + for (int i = 0;i < args.size();i++) { + stream << "#pragma HLS INTERFACE s_axilite port= "; + stream << "source_wrapper_" << i; + stream << " bundle=control\n"; + } + PrintIndent(stream, indent); + stream << "#pragma HLS INTERFACE s_axilite port=return bundle=control\n"; + stream << "\n\n"; + for (int i = 1;i < args.size();i++) { + PrintIndent(stream, indent); + stream << Type2WrapStr(arg_types[i]); + stream << " source_wrapper_temp_" << i; + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + stream << "[" << arr->shape[j] << "]"; + } + stream << ";\n"; + } + + for (int i = 1;i < args.size();i++) { + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + PrintIndent(stream, indent); + stream << "for ( int i" << j << " = 0; "; + stream << "i" << j << " < " << arr->shape[j] << "; "; + stream << "i" << j << "++) {\n"; + indent += 2; + if (j == arr->ndim - 1) { + PrintIndent(stream, indent); + stream << "source_wrapper_temp_" << i; + for (int k = 0;k < arr->ndim;k++) { + stream << "[i" << k << "]"; + } + stream << " = "; + stream << "source_wrapper_" << j; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int k = arr->ndim-2; k >= 0;k--) { + mul *= arr->shape[k+1]; + stream << "+ i" << k << "*" << mul; + } + stream << "];\n"; + } + } + for (int j = 0;j < arr->ndim;j++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "default_function( "; + for (int i = 0;i < args.size();i++) { + if (i == 0) { + stream << "source_wrapper_" << i; + stream << "[0], "; + } else if (i !=0 && i!=args.size() - 1){ + stream << "source_wrapper_temp_" << i; + stream << ", "; + } else { + stream << "source_wrapper_temp_" << i; + stream << ");\n"; + } + + } + stream << "\n\n"; + + int index = args.size() - 1; + TVMArray* arr = args[index]; + for (int i = 0;i < arr->ndim;i++) { + PrintIndent(stream, indent); + stream << "for ( int i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + + if (i == arr->ndim - 1) { + PrintIndent(stream, indent); + stream << "source_wrapper_" << index; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0;j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << " ] = "; + + stream << "source_wrapper_temp_" << index; + for (int j = 0;j < arr->ndim;j++) { + stream << "[i" << j << "]"; + } + stream <<";\n"; + } + } + for (int i = 0;i < arr->ndim;i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } + stream << "}\n"; + indent -= 2; + stream << "}\n"; + stream.close(); +} + +// generate opencl kernel and mem obj +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string test_file) { + int indent = 0; + std::ofstream stream; + stream.open("digit_recognition.cpp"); + // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/host/digit_recognition.cpp"); + stream << "#include \n"; + stream << "#include \n"; + stream << "\n\n"; + stream << "// standard C/C++ headers\n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "\n\n"; + stream << "// opencl harness headers\n"; + stream << "#include \"CLWorld.h\"\n"; + stream << "#include \"CLKernel.h\"\n"; + stream << "#include \"CLMemObj.h\"\n"; + stream << "// harness namespace\n"; + stream << "using namespace rosetta;\n"; + stream << "\n\n"; + stream << "//other headers\n"; + stream << "#include \"utils.h\"\n"; + stream << "#include \"typedefs.h\"\n"; + stream << "int main(int argc, char ** argv) {\n"; + indent += 2; + + int cnt = 0; // label the constant value + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << "*)"; + stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + PrintIndent(stream, indent); + + stream << Type2Byte(arg_types[i]) << " "; + // stream << Type2Str(arg_types[i]) << " "; + stream << "arg_top_" << i; + TVMArray* arr = args[i]; + + stream << "["; + for (int j = 0; j < arr->ndim; j++) { + //stream << "[" << arr->shape[j] << "]"; + if (j == arr->ndim-1) { + stream << arr->shape[j]; + } else { + stream << arr->shape[j]; + stream << " * "; + } + } + stream << "];\n"; + // copy from shared mem + PrintCopy(arr, stream, indent, i); + + } else { + // directly assign the value to the variable + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << ")"; + if (args[i].type_code() == kDLInt || + args[i].type_code() == kDLUInt) { + stream << int64_t(args[i]); + } + stream << ";\n"; + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_top_" << i; + stream << " = ("; + stream << Type2Byte(arg_types[i]); + + stream << ")(arg_" << i << ")"; + if (arg_types[i].fracs > 0) + stream << " >> " << static_cast(arg_types[i].fracs); + stream << ";\n"; + + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; + cnt += 1; + } + stream << "\n\n"; + } + + // generate host side (before) on arg_top_k + + PrintIndent(stream,indent); + stream << "printf(\"Digit Recognition Application\\n\");\n"; + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// parse command line arguments for opencl version\n"; + PrintIndent(stream, indent); + stream << "std::string kernelFile(\"\");\n"; + PrintIndent(stream, indent); + stream << "parse_sdaccel_command_line_args(argc, argv, kernelFile);\n"; + stream << "\n\n"; + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// create OpenCL world\n"; + PrintIndent(stream, indent); + stream << "CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// add the bitstream file\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.addProgram(kernelFile);\n"; + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// create kernels\n"; + PrintIndent(stream, indent); + stream << "CLKernel DigitRec(digit_rec_world.getContext(), digit_rec_world.getProgram(), \"DigitRec\", digit_rec_world.getDevice());\n"; + stream << "\n\n"; + + PrintIndent(stream, indent); + stream << "// create mem objects\n"; + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + if (cnt!=0) { + stream << "CLMemObj source_" << i; + stream << "((void*)fool_" << cnt - 1; + stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; + stream << "1, "; + stream << "CL_MEM_READ_WRITE);\n"; + cnt--; + continue; + } + stream << "CLMemObj source_" << i; + stream << "((void*)arg_top_" << i; + stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; + // stream << ", sizeof(" << Type2ExtStr(arg_types[i]) << "), "; + + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + if (j==0) { + stream << arr->shape[j] << " "; + } else { + stream << "* " << arr->shape[j]; + } + } + stream << ", "; + stream << "CL_MEM_READ_WRITE);\n"; + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// add them to the world\n"; + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << "digit_rec_world.addMemObj(source_" << i; + stream << ");\n"; + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << " // set work size\n"; + PrintIndent(stream, indent); + stream << "int global_size[3] = {1, 1, 1};\n"; + PrintIndent(stream, indent); + stream << "int local_size[3] = {1, 1, 1};\n"; + PrintIndent(stream, indent); + stream << "DigitRec.set_global(global_size);\n"; + PrintIndent(stream, indent); + stream << "DigitRec.set_local(local_size);\n"; + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// add them to the world\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.addKernel(DigitRec);\n"; + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// set kernel arguments\n"; + // TODO + // PrintIndent(stream, indent); + // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + stream << "digit_rec_world.setMemKernelArg(0, "<< i << ", " << i; + stream << ");\n"; + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// run\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.runKernels();\n"; + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// read the data back\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.readMemObj(2);\n"; + + // generate host side (post) + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// compute after kernel function\n"; + stream << test_file; + + // copy to shared mem + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + // PrintCopyBack2(arr, stream, indent, i); + PrintIndent(stream, indent); + stream << "shmdt("; + stream << "arg_" << i << ");\n"; + } + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "}\n"; + stream.close(); + +} + class SimModuleNode final : public ModuleNode { public: - SimModuleNode(LoweredFunc func, std::string test_file) - : func_(func), test_file_(test_file) {} + SimModuleNode(LoweredFunc func, + std::string host_code, + std::string dev_code) + : func_(func), host_(host_code), dev_(dev_code) { + } const char* type_key() const { return "unified_sim"; @@ -36,13 +714,20 @@ class SimModuleNode final : public ModuleNode { std::vector arg_sizes; std::vector arg_types; std::vector shmids; - // CollectArgInfo(args, func_, arg_sizes, arg_types); - // GenSharedMem(args, shmids, arg_sizes); - // GenHostCode(args, shmids, arg_types, func_, test_file_); + + // generate interface wrapper for kernel args + CollectArgInfo(args, func_, arg_sizes, arg_types); + GenSharedMem(args, shmids, arg_sizes); + GenWrapperCode(args, shmids, arg_types, func_); + // host code invoking extern c wrapped hlsc kernel + GenKernelCode(dev_); + GenHostCode(args, shmids, arg_types, func_, host_); + // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; system("g++ main.cpp -o out"); - LOG(CLEAN) << "Running C simulation ..."; + LOG(CLEAN) << "Running SW simulation ..."; + system("source ./run_sw.sh"); system("./out"); LOG(CLEAN) << "Finished C simulation"; system("rm out main.cpp"); @@ -53,20 +738,21 @@ class SimModuleNode final : public ModuleNode { code = (*f)("test").operator std::string(); LOG(CLEAN) << "extract res info"; } - }); } private: LoweredFunc func_; - std::string test_file_; + std::string host_; + std::string dev_; }; Module CreateSimModule( LoweredFunc func, - std::string code) { + std::string host_code, + std::string dev_code) { std::shared_ptr n = - std::make_shared(func, code); + std::make_shared(func, host_code, dev_code); return Module(n); } } // namespace runtime @@ -88,9 +774,9 @@ runtime::Module BuildSimModule(Array funcs, cg_host.AddFunction(f, map_arg_type); cg_dev.AddFunction(f, map_arg_type); } - - std::string code = cg_host.Finish(); - return runtime::CreateSimModule(funcs[0], code); + return runtime::CreateSimModule(funcs[0], + cg_host.GetHost(), + cg_dev.GetDevice()); } TVM_REGISTER_API("codegen.build_sim") diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index efc3241be..dd2754142 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -70,6 +70,24 @@ void CodeGenC::AddFunction(LoweredFunc f) { this->stream << "}\n\n"; } +std::string CodeGenC::GetHost() { + if (!fpga_scope_) + host_stream << stream.str(); + std::string postproc = host_stream.str(); + postproc.erase(postproc.rfind("}") - 1, + postproc.length() - 1); + postproc.erase(0, postproc.find("\n") + 1); + return postproc + "\n\n"; +} + +std::string CodeGenC::GetDevice() { + std::ostringstream device; + device << "void top(" << arg_stream.str() + << "){\n" << device_stream.str(); + if (fpga_scope_) device << stream.str(); + return decl_stream.str() + device.str(); +} + std::string CodeGenC::Finish() { std::ostringstream device; device << "void top(" << arg_stream.str() @@ -904,7 +922,7 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { vid.find("stream_out") != std::string::npos) { if (index !=0) arg_stream << ", "; PrintType(op->buffer_var.type(), arg_stream); - arg_stream << vid; + arg_stream << " " << vid; } index++; } diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index 503d6cef5..1d7466af3 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -54,6 +54,16 @@ class CodeGenC : * \return The code. */ std::string Finish(); + /*! + * \brief Finalize the compilation and return the code. + * \return The host code. + */ + std::string GetHost(); + /*! + * \brief Finalize the compilation and return the code. + * \return The device code. + */ + std::string GetDevice(); /*! * \brief Print the Stmt n to CodeGenC->stream * \param n The statement to be printed. From 52ffe8027be1f691bed1a8170f8e9e1247add159 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 21 Oct 2019 22:47:46 -0400 Subject: [PATCH 084/103] [add] host xcel codegen --- python/heterocl/devices.py | 7 +- python/heterocl/tvm/build_module.py | 22 +- samples/digitrec/digitrec_vhls.py | 169 +++- samples/digitrec/host.cpp | 152 ++++ samples/digitrec/interface.cpp | 47 + samples/digitrec/kernel.cpp | 80 +- tvm/src/codegen/build_common.cc | 801 +++++++++++++++--- tvm/src/codegen/codegen_c.cc | 90 +- tvm/src/codegen/hlsc/codegen_hlsc.cc | 75 +- tvm/src/codegen/hlsc/codegen_vhls.cc | 2 + tvm/src/codegen/hlsc/codegen_vhls.h | 2 +- tvm/src/codegen/opencl/codegen_aocl.cc | 7 +- tvm/src/codegen/opencl/codegen_opencl.cc | 2 + tvm/src/pass/stream_inference.cc | 141 +++ tvm/src/schedule/schedule_dataflow_rewrite.cc | 182 ++-- tvm/src/schedule/schedule_ops.cc | 2 +- 16 files changed, 1417 insertions(+), 364 deletions(-) create mode 100644 samples/digitrec/host.cpp create mode 100644 samples/digitrec/interface.cpp diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 95d3cea6f..19d1bb1a2 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -38,15 +38,16 @@ class env(metaclass=platform): mode = "sim" def __init__(self, host, device): self.host = host - self.device = device + self.xcel = device + self.tool = "" def __str__(self): return str(self.host) + " : " + \ - str(self.device) + str(self.xcel) def __repr__(self): return str(self.host) + " : " + \ - str(self.device) + str(self.xcel) class Tooling(object): diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 0f80d0e3f..59dd84ee3 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -424,17 +424,17 @@ def build_fpga_kernel(sch, args, target, name="default_function"): fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] try: # generate and split code - host = target.host.source['compile'] - builder = getattr(codegen, "build_{0}".format(host)) - host_code = builder(fdevice) - findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") - host_code = host_code[findex + 6 : rindex] - - device = target.device.source['compile'] - builder = getattr(codegen, "build_{0}".format(device)) - device_code = builder(fdevice) - findex, rindex = device_code.find("{device}"), device_code.rfind("{device}") - device_code = device_code[findex + 8 : rindex] + # host = target.host.source['compile'] + # builder = getattr(codegen, "build_{0}".format(host)) + # host_code = builder(fdevice) + # findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") + # host_code = host_code[findex + 6 : rindex] + + # device = "aocl" # target.device.source['compile'] + # builder = getattr(codegen, "build_{0}".format(device)) + # device_code = builder(fdevice) + # findex, rindex = device_code.find("{device}"), device_code.rfind("{device}") + # device_code = device_code[findex + 8 : rindex] # test build sim @register_func diff --git a/samples/digitrec/digitrec_vhls.py b/samples/digitrec/digitrec_vhls.py index 8ba4aa7b5..4797ece5f 100644 --- a/samples/digitrec/digitrec_vhls.py +++ b/samples/digitrec/digitrec_vhls.py @@ -1,25 +1,172 @@ -from digitrec_main import * +import heterocl as hcl +import time +import numpy as np +import math +from digitrec_data import read_digitrec_data -f = top('vhls_csim') +N = 8 * 8 +max_bit = int(math.ceil(math.log(N, 2))) +data_size = (10, 1800) -train_images, _, test_images, test_labels = read_digitrec_data() +dtype_image = hcl.UInt(N) +dtype_knnmat = hcl.UInt(max_bit) -correct = 0.0 +def knn(test_image, train_images): + + # Imperative programming and bit operations (§2) + def popcount(num): + out = hcl.local(0, "out") + with hcl.for_(0, train_images.type.bits) as i: + # Bit selection operation + out.v += num[i] + return out.v + + # This function update the candidates, i.e., `knn_mat`. Here we mutate + # through the shape of tensor `dist`. For each `dist` value, if it is + # smaller than the maximum candidate, we replace it. + def update_knn(dist, knn_mat, i, j): + max_id = hcl.local(0, "max_id") + with hcl.for_(0, 3) as k: + with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): + max_id.v = k + with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): + knn_mat[i][max_id.v] = dist[i][j] + + # This function sorts the 10 x 3 matrix. Sorting each row elements from + # small to large distance, and find digit index where the distance is the smallest + # then returns the digit having the highest scores + def sort_knn(knn_mat, i, j): + val = hcl.local(0, "val") + with hcl.if_( j == 1 ): + with hcl.if_( knn_mat[i][1] > knn_mat[i][2] ): + val.v = knn_mat[i][1] + knn_mat[i][1] = knn_mat[i][2] + knn_mat[i][2] = val.v + with hcl.else_(): + with hcl.if_( knn_mat[i][0] > knn_mat[i][1] ): + val.v = knn_mat[i][0] + knn_mat[i][0] = knn_mat[i][1] + knn_mat[i][1] = val.v + + def knn_vote(knn_mat, j): + id0 = hcl.local(0, "id0") + id1 = hcl.local(0, "id1") + id2 = hcl.local(0, "id2") + count = hcl.local(0, "count") + with hcl.for_(0, 10) as n: + with hcl.if_(knn_mat[n][0] < knn_mat[id0.v][0]): + id0.v = n + with hcl.for_(0, 10) as m: + with hcl.if_(knn_mat[m][0] < knn_mat[id1.v][0]): + id1.v = m + with hcl.for_(0, 10) as k: + with hcl.if_(knn_mat[k][0] < knn_mat[id2.v][0]): + id2.v = k + with hcl.if_(j == id0.v): + count.v += 1 + with hcl.elif_(j == id1.v): + count.v += 1 + with hcl.elif_(j == id2.v): + count.v += 1 + with hcl.else_(): + count.v += 0 + return count.v + + # Main algorithm (§3) + # Fist step: XOR (§3.1) + diff = hcl.compute(train_images.shape, + lambda x, y: train_images[x][y] ^ test_image, + "diff") + + # Second step: popcount (§3.2) + dist = hcl.compute(diff.shape, + lambda x, y: popcount(diff[x][y]), + "dist") + + # Third step: initialize the candidates (§3.3) + knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") + + + # Fourth step: update the candidates (§3.4) + hcl.mutate(dist.shape, + lambda x, y: update_knn(dist, knn_mat, x, y), + "knn_update") + + # Fifth step: voting candidates (§3.5) + hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") + + # Sixth step: compute the score baord ranking + knn_new = hcl.compute(knn_mat.shape, lambda x, y: knn_mat[x][y], "new") + knn_pred = hcl.compute((10,), lambda x: knn_vote(knn_mat, x), "vote") + # computed data + return knn_pred + +# Inputs/Outputs definition (§4) +# Scalars (§4.1) +test_image = hcl.placeholder((), "test_image", dtype_image) +# Tensors (§4.2) +train_images = hcl.placeholder(data_size, "train_images", dtype_image) + +# Data type customization (§5.1) +scheme = hcl.create_scheme([test_image, train_images], knn) +scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) + +# Compute customization (§5.2) +s = hcl.create_schedule_from_scheme(scheme) + +diff = knn.diff +dist = knn.dist +vote = knn.new +knn_update = knn.knn_update + +# s.stream_to(test_image, hcl.FPGA("intel")) +s.to(train_images, hcl.FPGA("intel")) +s.to(vote, hcl.CPU("x86")) + +# Merge loop nests +s[diff].compute_at(s[dist], dist.axis[1]) +s[dist].compute_at(s[knn_update], knn_update.axis[1]) + +# Reorder loop to expose more parallelism +s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) + +# Parallel outer loop and pipeline inner loop +s[knn_update].parallel(knn_update.axis[1]) +s[knn_update].pipeline(knn_update.axis[0]) + +# At the end, we build the whole offloaded function. +print(hcl.lower(s)) +target = hcl.env.aws_f1 +# target.tool.mode = "sim/impl" +# hcl.sim / sw +# hcl.impl # refer stage -> tool opt cli +# target.tool[''] +# target.host["lang" "compiler"] +# targte.host +# target.xcel # +f = hcl.build(s, target) + +# print(f) +# import sys; sys.exit(1) + +train_images, _, test_images, test_labels = read_digitrec_data() +correct = 0.0 total_time = 0 -for i in range(0, 180): +for i in range(0, 1): hcl_train_images = hcl.asarray(train_images, dtype_image) - hcl_knn_mat = hcl.asarray(np.zeros((10, 3)), dtype_knnmat) + hcl_knn_pred = hcl.asarray(np.zeros((10,)), dtype_knnmat) start = time.time() - f(test_images[i], hcl_train_images, hcl_knn_mat) + f(test_images[i], hcl_train_images, hcl_knn_pred) total_time = total_time + (time.time() - start) - knn_mat = hcl_knn_mat.asnumpy() + knn_mat = hcl_knn_pred.asnumpy() + print(knn_mat) - if knn_vote(knn_mat) == test_labels[i]: + if knn_mat == test_labels[i]: correct += 1 -print("Average kernel time (s): {:.2f}".format(total_time/180)) -print("Accuracy (%): {:.2f}".format(100*correct/180)) +print("Average kernel time (s): {:.2f}".format(total_time/1)) +print("Accuracy (%): {:.2f}".format(100*correct/1)) diff --git a/samples/digitrec/host.cpp b/samples/digitrec/host.cpp new file mode 100644 index 000000000..f8db0d699 --- /dev/null +++ b/samples/digitrec/host.cpp @@ -0,0 +1,152 @@ +#include +#include + + +// standard C/C++ headers +#include +#include +#include +#include +#include +#include + + +// opencl harness headers +#include "CLWorld.h" +#include "CLKernel.h" +#include "CLMemObj.h" +// harness namespace +using namespace rosetta; + + +//other headers +#include "utils.h" +#include "typedefs.h" +int main(int argc, char ** argv) { + uint64_t arg_0 = (uint64_t)207249344512; + uint64_t arg_top_0[1] = { arg_0 }; + + + uint64_t* arg_1 = (uint64_t*)shmat(90701824, nullptr, 0); + uint64_t arg_top_1[10 * 1800]; + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 1800; i1++) { + arg_top_1[i1 + i0*1800] = (uint64_t)(arg_1[i1 + i0*1800]); + } + } + + + uint8_t* arg_2 = (uint8_t*)shmat(90734593, nullptr, 0); + uint8_t arg_top_2[10]; + for (size_t i0 = 0; i0 < 10; i0++) { + arg_top_2[i0] = (uint8_t)(arg_2[i0]); + } + + + printf("Digit Recognition Application\n"); + + // compute bofore kernel function + + // parse command line arguments for opencl version + std::string kernelFile(""); + parse_sdaccel_command_line_args(argc, argv, kernelFile); + + + // create OpenCL world + CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR); + + + // add the bitstream file + digit_rec_world.addProgram(kernelFile); + + + // create kernels + CLKernel App(digit_rec_world.getContext(), digit_rec_world.getProgram(), "App", digit_rec_world.getDevice()); + + + // create mem objects + CLMemObj source_0((void*)arg_top_0, sizeof(uint64_t), 1, CL_MEM_READ_WRITE); + CLMemObj source_1((void*)arg_top_1, sizeof(uint64_t), 10 * 1800, CL_MEM_READ_WRITE); + CLMemObj source_2((void*)arg_top_2, sizeof(uint8_t), 10 , CL_MEM_READ_WRITE); + + + // add them to the world + digit_rec_world.addMemObj(source_0); + digit_rec_world.addMemObj(source_1); + digit_rec_world.addMemObj(source_2); + + + // set work size + int global_size[3] = {1, 1, 1}; + int local_size[3] = {1, 1, 1}; + App.set_global(global_size); + App.set_local(local_size); + + + // add them to the world + digit_rec_world.addKernel(App); + + + // set kernel arguments + digit_rec_world.setMemKernelArg(0, 0, 0); + digit_rec_world.setMemKernelArg(0, 1, 1); + digit_rec_world.setMemKernelArg(0, 2, 2); + + // run + digit_rec_world.runKernels(); + + // read the data back + digit_rec_world.readMemObj(2); + + // compute after kernel function + for (int x = 0; x < 10; ++x) { + int id0; + id0 = 0; + int id1; + id1 = 0; + int id2; + id2 = 0; + int count; + count = 0; + for (int i = 0; i < 10; ++i) { + if (knn_mat[(i * 3)] < knn_mat[(id0 * 3)]) { + id0 = i; + } + } + for (int i1 = 0; i1 < 10; ++i1) { + if (knn_mat[(i1 * 3)] < knn_mat[(id1 * 3)]) { + id1 = i1; + } + } + for (int i2 = 0; i2 < 10; ++i2) { + if (knn_mat[(i2 * 3)] < knn_mat[(id2 * 3)]) { + id2 = i2; + } + } + if (x == id0) { + count = (count + 1); + } else { + if (x == id1) { + count = (count + 1); + } else { + if (x == id2) { + count = (count + 1); + } + } + } + arg_top_2[x] = count; + } + + for (size_t i0 = 0; i0 < 10; i0++) { + for (size_t i1 = 0; i1 < 1800; i1++) { + arg_1[i1 + i0*1800] = (uint64_t)(arg_top_1[i1 + i0*1800]); + } + } + shmdt(arg_1); + for (size_t i0 = 0; i0 < 10; i0++) { + arg_2[i0] = (uint8_t)(arg_top_2[i0]); + } + shmdt(arg_2); + + + } diff --git a/samples/digitrec/interface.cpp b/samples/digitrec/interface.cpp new file mode 100644 index 000000000..788ad355f --- /dev/null +++ b/samples/digitrec/interface.cpp @@ -0,0 +1,47 @@ +#include +#include "kernel.cpp" + + +extern "C" +{ + void App( ap_uint<64>* source_wrapper_0, ap_uint<64>* source_wrapper_1, ap_uint<6>* source_wrapper_2, ap_uint<6>* source_wrapper_3 ) { + #pragma HLS INTERFACE m_axi port=source_wrapper_0 offset=slave bundle=gmem + #pragma HLS INTERFACE axis port=source_wrapper_1 offset=slave bundle=gmem + #pragma HLS INTERFACE m_axi port=source_wrapper_2 offset=slave bundle=gmem + #pragma HLS INTERFACE axis port=source_wrapper_3 offset=slave bundle=gmem + #pragma HLS INTERFACE s_axilite port=source_wrapper_0 bundle=control + #pragma HLS INTERFACE axis port=source_wrapper_1 bundle=control + #pragma HLS INTERFACE s_axilite port=source_wrapper_2 bundle=control + #pragma HLS INTERFACE axis port=source_wrapper_3 bundle=control + #pragma HLS INTERFACE s_axilite port=return bundle=control + + ap_uint<64> source_wrapper_temp_0[1]; + ap_uint<64> source_wrapper_temp_1[10][1800]; + ap_int<32> source_wrapper_temp_2[10]; + ap_uint<6> source_wrapper_temp_3[10][3]; + for (int i0 = 0; i0 < 10; i0++) { + for (int i1 = 0; i1 < 1800; i1++) { + source_wrapper_temp_1[i0][i1] = source_wrapper_1[i1+ i0*1800]; + } + } + for (int i0 = 0; i0 < 10; i0++) { + source_wrapper_temp_2[i0] = source_wrapper_0[i0]; + } + for (int i0 = 0; i0 < 10; i0++) { + for (int i1 = 0; i1 < 3; i1++) { + source_wrapper_temp_3[i0][i1] = source_wrapper_1[i1+ i0*3]; + } + } + + top( source_wrapper_temp_0, source_wrapper_temp_1, source_wrapper_temp_2, source_wrapper_temp_3); + + for (int i0 = 0; i0 < 10; i0++) { + for (int i1 = 0; i1 < 3; i1++) { + source_wrapper_3[i1 + i0*3 ] = source_wrapper_temp_3[i0][i1]; + } + } + for (int i0 = 0; i0 < 10; i0++) { + source_wrapper_2[i0 ] = source_wrapper_temp_2[i0]; + } +} +} diff --git a/samples/digitrec/kernel.cpp b/samples/digitrec/kernel.cpp index 21b550c8b..68893cb7f 100644 --- a/samples/digitrec/kernel.cpp +++ b/samples/digitrec/kernel.cpp @@ -1,38 +1,64 @@ -#include +#include +#include #include -#include -#pragma ACCEL kernel -void default_function(unsigned long test_image, unsigned long* train_images, unsigned char* knn_mat) { - for (int x = 0; x < 10; ++x) { - for (int y = 0; y < 3; ++y) { - knn_mat[(y + (x * 3))] = (unsigned char)50; + +void top(ap_uint<64>* arg_top_0, ap_uint<64>* train_images_stream_recv, ap_int<32>* arg_top_2, ap_uint<6>* knn_mat){ + ap_uint<6> knn_mat[10][3]; + for (ap_int<32> x = 0; x < 10; ++x) { + for (ap_int<32> y = 0; y < 3; ++y) { + knn_mat[x][y] = (ap_uint<6>)50; } } - unsigned long knn_update; -#pragma ACCEL parallel - for (int y1 = 0; y1 < 1800; ++y1) { -#pragma ACCEL pipeline - for (int x1 = 0; x1 < 10; ++x1) { - unsigned char dist; - unsigned long diff; - diff = (train_images[(y1 + (x1 * 1800))] ^ test_image); - unsigned char out; - out = (unsigned char)0; - for (int i = 0; i < 49; ++i) { - out = ((unsigned char)(((unsigned long)out) + ((unsigned long)((diff & (1L << i)) >> i)))); + ap_int<32> knn_update; + for (ap_int<32> y1 = 0; y1 < 1800; ++y1) { + for (ap_int<32> x1 = 0; x1 < 10; ++x1) { + #pragma HLS pipeline + ap_uint<6> dist; + ap_int<32> diff; + diff = ((ap_int<32>)(train_images_stream_recv[x1][y1] ^ arg_top_0)); + ap_uint<6> out; + out = (ap_uint<6>)0; + for (ap_int<32> i = 0; i < 64; ++i) { + out = ((ap_uint<6>)(((ap_int<34>)out) + ((ap_int<34>)diff[i]))); } dist = out; - unsigned long max_id; - max_id = (unsigned long)0; - for (int i1 = 0; i1 < 3; ++i1) { - if (knn_mat[(((long)max_id) + ((long)(x1 * 3)))] < knn_mat[(i1 + (x1 * 3))]) { - max_id = ((unsigned long)i1); + ap_int<32> max_id; + max_id = 0; + for (ap_int<32> i1 = 0; i1 < 3; ++i1) { + if (knn_mat[((max_id / 3) + x1)][(max_id % 3)] < knn_mat[x1][i1]) { + max_id = i1; } } - if (dist < knn_mat[(((long)max_id) + ((long)(x1 * 3)))]) { - knn_mat[(((long)max_id) + ((long)(x1 * 3)))] = dist; + if (dist < knn_mat[((max_id / 3) + x1)][(max_id % 3)]) { + knn_mat[((max_id / 3) + x1)][(max_id % 3)] = dist; } } } -} + ap_int<32> sort; + for (ap_int<32> x2 = 0; x2 < 10; ++x2) { + for (ap_int<32> y2 = 0; y2 < 3; ++y2) { + ap_int<32> val; + val = 0; + if (y2 == 1) { + if (knn_mat[x2][2] < knn_mat[x2][1]) { + val = ((ap_int<32>)knn_mat[x2][1]); + knn_mat[x2][1] = knn_mat[x2][2]; + knn_mat[x2][2] = ((ap_uint<6>)val); + } + } else { + if (knn_mat[x2][1] < knn_mat[x2][0]) { + val = ((ap_int<32>)knn_mat[x2][0]); + knn_mat[x2][0] = knn_mat[x2][1]; + knn_mat[x2][1] = ((ap_uint<6>)val); + } + } + } + } + ap_int<32> new[10][3]; + for (ap_int<32> x3 = 0; x3 < 10; ++x3) { + for (ap_int<32> y3 = 0; y3 < 3; ++y3) { + new[x3][y3] = ((ap_int<32>)knn_mat[x3][y3]); + } + } + } diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 087e2fe0f..9b4de219b 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -4,9 +4,11 @@ * \brief Build unified simulation module */ #include +#include #include #include #include +#include #include "./build_common.h" #include @@ -63,6 +65,19 @@ inline TVMType Type2TVMType(Type t) { return tt; } +inline std::string PrintHalideType(Type t) { + std::string str = ""; + if (t.is_uint() || t.is_int() || t.is_fixed() || t.is_ufixed()) { + if (t.is_uint()) str += "ap_uint<" + std::to_string(t.bits()) + ">"; + else if (t.is_int()) str += "ap_int<" + std::to_string(t.bits()) + ">"; + else if (t.is_ufixed()) str += "ap_ufixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; + else str += "ap_fixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; + } else { + LOG(FATAL) << "Cannot convert type " << t << " to C type"; + } + return str; +} + inline std::string Type2Str(TVMType t) { std::string str = ""; if (t.code == kDLInt) { @@ -314,7 +329,7 @@ void PrintCopyBack(TVMArray* arr, void GenKernelCode(std::string test_file) { std::ofstream stream; // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/knn_vhls.cpp"); - stream.open("knn_vhls_auto.cpp"); + stream.open("kernel.cpp"); stream << test_file; stream.close(); } @@ -323,98 +338,111 @@ void GenKernelCode(std::string test_file) { void GenWrapperCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, + const std::vector>>& arg_stream_types, LoweredFunc func) { std::ofstream stream; // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/digitrec.cpp"); int indent = 0; - stream.open("digitrec.cpp"); + stream.open("interface.cpp"); stream << "#include \n"; - stream << "#include \"/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/knn_vhls.cpp\"\n"; + stream << "#include \"kernel.cpp\"\n"; stream << "\n\n"; stream << "extern \"C\" \n"; stream << "{\n"; indent += 2; PrintIndent(stream, indent); - stream << "void DigitRec( "; - for (int i = 0;i < args.size();i++) { - if (i!=args.size() - 1) { - stream << Type2WrapStr(arg_types[i]); - stream << "*"; - stream << " source_wrapper_" << i; - stream << ", "; - } else { - stream << Type2WrapStr(arg_types[i]); - stream << "*"; - stream << " source_wrapper_" << i; - stream << " ) {\n"; - } + + // wrapper func interface + stream << "void App( "; + size_t ex_arg_count = 0; + ex_arg_count = arg_stream_types.size() - arg_types.size(); + for (size_t i = 0; i < arg_types.size(); i++) { + if (i != 0) stream << ", "; + stream << Type2WrapStr(arg_types[i]); + stream << "*"; + stream << " source_wrapper_" << i; } - stream << "\n\n"; - PrintIndent(stream, indent); - for (int i = 0;i < args.size();i++) { - stream << "#pragma HLS INTERFACE m_axi port= "; + for (size_t k = 0; k < ex_arg_count; k++) { + if (k != ex_arg_count) stream << ", "; + stream << PrintHalideType(std::get<1>(arg_stream_types[k + arg_types.size()])); + stream << "*"; + stream << " source_wrapper_" << k + arg_types.size(); + } + stream << " ) {\n"; + + // memeory and control pragma + for (size_t i = 0; i < arg_stream_types.size(); i++) { + std::string interface; + if (std::get<0>(arg_stream_types[i])) interface = " axis "; + else interface = " m_axi "; + PrintIndent(stream, indent); + stream << "#pragma HLS INTERFACE" + interface + "port="; stream << "source_wrapper_" << i; stream << " offset=slave bundle=gmem\n"; } - for (int i = 0;i < args.size();i++) { - stream << "#pragma HLS INTERFACE s_axilite port= "; + for (size_t i = 0; i < arg_stream_types.size(); i++) { + std::string interface; + if (std::get<0>(arg_stream_types[i])) interface = " axis "; + else interface = " s_axilite "; + PrintIndent(stream, indent); + stream << "#pragma HLS INTERFACE" + interface + "port="; stream << "source_wrapper_" << i; stream << " bundle=control\n"; } PrintIndent(stream, indent); stream << "#pragma HLS INTERFACE s_axilite port=return bundle=control\n"; - stream << "\n\n"; - for (int i = 1;i < args.size();i++) { + stream << "\n"; + + // intermediate vars init alloc + for (size_t i = 0; i < arg_stream_types.size(); i++) { PrintIndent(stream, indent); - stream << Type2WrapStr(arg_types[i]); + stream << PrintHalideType(std::get<1>(arg_stream_types[i])); stream << " source_wrapper_temp_" << i; - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - stream << "[" << arr->shape[j] << "]"; - } + auto shape = std::get<2>(arg_stream_types[i]); + for (size_t j = 0; j < shape.size(); j++) + stream << "[" << shape[j] << "]"; + if (shape.size() == 0) stream << "[1]"; stream << ";\n"; } - for (int i = 1;i < args.size();i++) { - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { + for (size_t i = 0; i < arg_stream_types.size(); i++) { + auto shape = std::get<2>(arg_stream_types[i]); + for (size_t j = 0; j < shape.size(); j++) { PrintIndent(stream, indent); - stream << "for ( int i" << j << " = 0; "; - stream << "i" << j << " < " << arr->shape[j] << "; "; + stream << "for (int i" << j << " = 0; "; + stream << "i" << j << " < " << shape[j] << "; "; stream << "i" << j << "++) {\n"; indent += 2; - if (j == arr->ndim - 1) { + if (j == shape.size() - 1) { PrintIndent(stream, indent); stream << "source_wrapper_temp_" << i; - for (int k = 0;k < arr->ndim;k++) { + for (size_t k = 0; k < shape.size(); k++) { stream << "[i" << k << "]"; } stream << " = "; stream << "source_wrapper_" << j; - stream << "[i" << arr->ndim-1; + stream << "[i" << shape.size() - 1; int mul = 1; - for (int k = arr->ndim-2; k >= 0;k--) { - mul *= arr->shape[k+1]; - stream << "+ i" << k << "*" << mul; + for (size_t k = shape.size() - 1; k > 0; k--) { + mul *= shape[k]; + stream << "+ i" << k - 1 << "*" << mul; } stream << "];\n"; } } - for (int j = 0;j < arr->ndim;j++) { + for (size_t j = 0; j < shape.size(); j++) { indent -= 2; PrintIndent(stream, indent); stream << "}\n"; } } - stream << "\n\n"; + // print top func + stream << "\n"; PrintIndent(stream, indent); - stream << "default_function( "; - for (int i = 0;i < args.size();i++) { - if (i == 0) { - stream << "source_wrapper_" << i; - stream << "[0], "; - } else if (i !=0 && i!=args.size() - 1){ + stream << "top( "; + for (size_t i = 0;i < arg_stream_types.size(); i++) { + if (i != arg_stream_types.size() - 1){ stream << "source_wrapper_temp_" << i; stream << ", "; } else { @@ -423,40 +451,43 @@ void GenWrapperCode(TVMArgs& args, } } - stream << "\n\n"; + stream << "\n"; - int index = args.size() - 1; - TVMArray* arr = args[index]; - for (int i = 0;i < arr->ndim;i++) { - PrintIndent(stream, indent); - stream << "for ( int i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - - if (i == arr->ndim - 1) { + // read back return val + for (int k = arg_stream_types.size() - 1; + k > args.size() - 2; k--) { + auto shape = std::get<2>(arg_stream_types[k]); + for (size_t i = 0; i < shape.size(); i++) { PrintIndent(stream, indent); - stream << "source_wrapper_" << index; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2; j >= 0;j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << " ] = "; - - stream << "source_wrapper_temp_" << index; - for (int j = 0;j < arr->ndim;j++) { - stream << "[i" << j << "]"; + stream << "for (int i" << i << " = 0; "; + stream << "i" << i << " < " << shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + + if (i == shape.size() - 1) { + PrintIndent(stream, indent); + stream << "source_wrapper_" << k; + stream << "[i" << shape.size() - 1; + int mul = 1; + for (size_t j = shape.size() - 1; j > 0; j--) { + mul *= shape[j]; + stream << " + i" << j - 1 << "*" << mul; + } + stream << " ] = "; + + stream << "source_wrapper_temp_" << k; + for (size_t j = 0; j < shape.size(); j++) { + stream << "[i" << j << "]"; + } + stream <<";\n"; } - stream <<";\n"; } - } - for (int i = 0;i < arr->ndim;i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; + for (size_t i = 0;i < shape.size(); i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; } + } stream << "}\n"; indent -= 2; stream << "}\n"; @@ -468,10 +499,11 @@ void GenHostCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, LoweredFunc func, - std::string test_file) { + std::string pre_kernel, + std::string post_kernel) { int indent = 0; std::ofstream stream; - stream.open("digit_recognition.cpp"); + stream.open("host.cpp"); // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/host/digit_recognition.cpp"); stream << "#include \n"; stream << "#include \n"; @@ -541,36 +573,37 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << Type2Byte(arg_types[i]) << " "; stream << "arg_top_" << i; - stream << " = ("; - stream << Type2Byte(arg_types[i]); + stream << "[1] = { "; - stream << ")(arg_" << i << ")"; + stream << "arg_" << i << " }"; if (arg_types[i].fracs > 0) stream << " >> " << static_cast(arg_types[i].fracs); stream << ";\n"; - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; + // PrintIndent(stream, indent); + // stream << Type2Byte(arg_types[i]) << " "; + // stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; cnt += 1; } stream << "\n\n"; } // generate host side (before) on arg_top_k - PrintIndent(stream,indent); stream << "printf(\"Digit Recognition Application\\n\");\n"; + stream << "\n"; + PrintIndent(stream, indent); + stream << "// compute bofore kernel function"; + // stream being axis interface host, channel for kernel + stream << pre_kernel; - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// parse command line arguments for opencl version\n"; PrintIndent(stream, indent); stream << "std::string kernelFile(\"\");\n"; PrintIndent(stream, indent); stream << "parse_sdaccel_command_line_args(argc, argv, kernelFile);\n"; - stream << "\n\n"; - stream << "\n\n"; PrintIndent(stream, indent); stream << "// create OpenCL world\n"; @@ -585,34 +618,38 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "// create kernels\n"; PrintIndent(stream, indent); - stream << "CLKernel DigitRec(digit_rec_world.getContext(), digit_rec_world.getProgram(), \"DigitRec\", digit_rec_world.getDevice());\n"; + stream << "CLKernel App(digit_rec_world.getContext(), digit_rec_world.getProgram(), \"App\", digit_rec_world.getDevice());\n"; stream << "\n\n"; PrintIndent(stream, indent); stream << "// create mem objects\n"; for (int i = 0;i < args.size();i++) { PrintIndent(stream, indent); - if (cnt!=0) { - stream << "CLMemObj source_" << i; - stream << "((void*)fool_" << cnt - 1; - stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; - stream << "1, "; - stream << "CL_MEM_READ_WRITE);\n"; - cnt--; - continue; - } + // if (cnt!=0) { + // stream << "CLMemObj source_" << i; + // stream << "((void*)fool_" << cnt - 1; + // stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; + // stream << "1, "; + // stream << "CL_MEM_READ_WRITE);\n"; + // cnt--; + // continue; + // } stream << "CLMemObj source_" << i; stream << "((void*)arg_top_" << i; stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; // stream << ", sizeof(" << Type2ExtStr(arg_types[i]) << "), "; - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - if (j==0) { - stream << arr->shape[j] << " "; - } else { - stream << "* " << arr->shape[j]; + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + if (j==0) { + stream << arr->shape[j] << " "; + } else { + stream << "* " << arr->shape[j]; + } } + } else { + stream << "1"; } stream << ", "; stream << "CL_MEM_READ_WRITE);\n"; @@ -635,18 +672,17 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "int local_size[3] = {1, 1, 1};\n"; PrintIndent(stream, indent); - stream << "DigitRec.set_global(global_size);\n"; + stream << "App.set_global(global_size);\n"; PrintIndent(stream, indent); - stream << "DigitRec.set_local(local_size);\n"; + stream << "App.set_local(local_size);\n"; stream << "\n\n"; PrintIndent(stream, indent); stream << "// add them to the world\n"; PrintIndent(stream, indent); - stream << "digit_rec_world.addKernel(DigitRec);\n"; + stream << "digit_rec_world.addKernel(App);\n"; stream << "\n\n"; PrintIndent(stream, indent); stream << "// set kernel arguments\n"; - // TODO // PrintIndent(stream, indent); // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; for (int i = 0;i < args.size();i++) { @@ -655,22 +691,22 @@ void GenHostCode(TVMArgs& args, stream << ");\n"; } - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// run\n"; PrintIndent(stream, indent); - stream << "digit_rec_world.runKernels();\n"; - stream << "\n\n"; + stream << "digit_rec_world.runKernels();\n\n"; PrintIndent(stream, indent); stream << "// read the data back\n"; PrintIndent(stream, indent); stream << "digit_rec_world.readMemObj(2);\n"; - // generate host side (post) - stream << "\n\n"; + // generate host (post-kernel) + stream << "\n"; PrintIndent(stream, indent); stream << "// compute after kernel function\n"; - stream << test_file; + // stream being axis interface host, channel for kernel + stream << post_kernel; // copy to shared mem for (int i = 0; i < args.size(); i++) { @@ -694,9 +730,15 @@ void GenHostCode(TVMArgs& args, class SimModuleNode final : public ModuleNode { public: SimModuleNode(LoweredFunc func, - std::string host_code, + std::string pre_host_code, + std::string post_host_code, + std::vector>> arg_stream_types, std::string dev_code) - : func_(func), host_(host_code), dev_(dev_code) { + : func_(func), + pre_host_(pre_host_code), + post_host_(post_host_code), + arg_stream_types_(arg_stream_types), + dev_(dev_code) { } const char* type_key() const { @@ -711,17 +753,17 @@ class SimModuleNode final : public ModuleNode { if (args.size() != (int)func_->args.size()) LOG(FATAL) << "The function should take in " << func_->args.size() << " inputs but get " << args.size(); + std::vector shmids; std::vector arg_sizes; std::vector arg_types; - std::vector shmids; - // generate interface wrapper for kernel args CollectArgInfo(args, func_, arg_sizes, arg_types); GenSharedMem(args, shmids, arg_sizes); - GenWrapperCode(args, shmids, arg_types, func_); + // generate interface wrapper for kernel args + GenWrapperCode(args, shmids, arg_types, arg_stream_types_, func_); // host code invoking extern c wrapped hlsc kernel GenKernelCode(dev_); - GenHostCode(args, shmids, arg_types, func_, host_); + GenHostCode(args, shmids, arg_types, func_, pre_host_, post_host_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; @@ -743,39 +785,540 @@ class SimModuleNode final : public ModuleNode { private: LoweredFunc func_; - std::string host_; + std::string pre_host_; + std::string post_host_; + std::vector>> arg_stream_types_; std::string dev_; }; +using var2nameType = std::unordered_map>>; + Module CreateSimModule( LoweredFunc func, - std::string host_code, + std::string pre_host_code, + std::string post_host_code, + std::vector& arg_vars, + std::unordered_map& stream_table, + var2nameType& arg_top_vars, std::string dev_code) { + // process info: shape type and stream + std::vector>> arg_type; + for (size_t i = 0 ; i < arg_vars.size(); i++) { + auto v = arg_vars[i]; + auto nameType = arg_top_vars[v]; + bool is_stream; + if (stream_table[v]) + is_stream = true; + else is_stream = false; + auto item = std::make_tuple(is_stream, std::get<1>(nameType), + std::get<2>(nameType)); + arg_type.push_back(item); + } std::shared_ptr n = - std::make_shared(func, host_code, dev_code); + std::make_shared(func, pre_host_code, post_host_code, + arg_type, dev_code); return Module(n); } } // namespace runtime namespace codegen { +using var2nameType = std::unordered_map>>; + +// collect type info for vars +class TypeCollector final : public IRVisitor { + public: + var2nameType& top_args_; + TypeCollector(var2nameType& top_args) + : top_args_(top_args) {} + void Visit_(const Allocate *op) { + auto v = op->buffer_var.get(); + + // record type and shape + if (top_args_.count(v)) { + std::vector shape; + for (size_t i = 0; i < op->extents.size(); i++) + shape.push_back(op->extents[i].as()->value); + top_args_[v] = std::make_tuple( + std::get<0>(top_args_[v]), + op->type, shape); + } + IRVisitor::Visit_(op); + } +}; + +// record of vars for top func signature +// vars include passed-in and not registered vars on host +class StreamCollector final : public IRVisitor { + public: + StreamCollector(std::vector& stream_stmt_list, + std::vector& stream_expr_list, + std::vector& arg_vars, + std::unordered_map& stream_table, + std::string initial_scope) + : stream_stmt_list_(stream_stmt_list), + stream_expr_list_(stream_expr_list), + arg_vars_(arg_vars), + stream_table_(stream_table), + scope_(initial_scope) {} + + // record alloc on host + void Visit_(const Allocate *op) { + if (!switch_on) + this->HandleDef(op->buffer_var.get()); + IRVisitor::Visit_(op); + } + + void Visit_(const Load *op) { + if (!switch_on) { + this->HandleUse(op->buffer_var); + } + IRVisitor::Visit_(op); + } + + // update placeholder status + void Visit_(const Store* op) { + if (switch_on) { + if (auto val = op->value.as()) { + const Variable* v = val->buffer_var.get(); + for (size_t i = 0; i < arg_vars_.size(); i++) { + std::string name = arg_vars_[i]->name_hint; + if (v->name_hint.find(name) != std::string::npos) { + // record in VisitStmt StreamStmt + // LOG(WARNING) << op->buffer_var << ":" << v->name_hint; + } + } + } + } else { // count use on host + this->HandleUse(op->buffer_var); + } + IRVisitor::Visit_(op); + } + + void Visit_(const StreamStmt* op) { + if (switch_on) { // in xcel scope + const Variable* v = op->buffer_var.get(); + // LOG(WARNING) << v->name_hint; + } + IRVisitor::Visit_(op); + } + + void Visit_(const AttrStmt* op) { + if (op->attr_key == attr::device_scope) { + if (op->value.as()->value != scope_) + switch_on = true; + else switch_on = false; + } + IRVisitor::Visit_(op); + } + + // additional data saved into stream table (for streamed + // data we keep the new id for arg_stream in var_idmap, + // and non-streamed using the repalced arg_top_k name) + void HandleDef(const Variable* v) { + CHECK(!host_def_count_.count(v)) + << "variable " << v->name_hint + << " has already been defined, the Stmt is not SSA"; + CHECK(!host_use_count_.count(v)) + << "variable " << v->name_hint + << " has been used before definition!"; + host_use_count_[v] = 0; + host_def_count_[v] = 1; + } + + void HandleUse(const Expr& v) { + CHECK(v.as()); + Var var(v.node_); + auto it = host_use_count_.find(var.get()); + if (it != host_use_count_.end()) { + if (it->second >= 0) { + ++it->second; + } + } else { + if (!stream_table_.count(var.get())) { + host_undefined_.push_back(var); + host_use_count_[var.get()] = -1; + } + } + } + + bool host_scope_{false}; + Array host_undefined_; + std::unordered_map host_use_count_; + std::unordered_map host_def_count_; + + private: + std::vector& stream_stmt_list_; + std::vector& stream_expr_list_; + std::vector& arg_vars_; + std::unordered_map& stream_table_; + std::string scope_; + bool switch_on{true}; +}; + +// codegen for accelerators +class CodeGenXcel : public CodeGenVivadoHLS { + public: + int arg_top_count{0}; + std::string pre_kernel; + std::string post_kernel; + // map for generating wrapper + var2nameType arg_top_vars; + std::vector arg_vars; + std::unordered_map stream_table; + str2tupleMap map_arg_type_; + LoweredFunc f_; + + void AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { + map_arg_type_ = map_arg_type; f_ = f; + CodeGenVivadoHLS::AddFunction(f, map_arg_type); + }; + + void VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::device_scope) { + // print top( ... in host and enter fpga scope + if (op->value.as()->value == "fpga" && !fpga_scope_) { + fpga_scope_ = true; + PrintIndent(); + + // track the stream usage + std::vector stream_stmts; + std::vector stream_exprs; + StreamCollector collector(stream_stmts, stream_exprs, + arg_vars, stream_table, "cpu"); + collector.Visit(op->body); + + // update data type and name + for (auto k : collector.host_undefined_) { + auto v = k.get(); + arg_vars.push_back(v); + stream_table[v] = true; + auto tuple = arg_top_vars[v]; + arg_top_vars[v] = std::make_tuple(v->name_hint, + std::get<1>(tuple), + std::get<2>(tuple)); + } + TypeCollector visitor(arg_top_vars); + visitor.Visit(op->body); + + // generte function calls + stream << "top("; + int index = 0; + for (size_t i = 0; i < arg_vars.size(); i++) { + auto v = arg_vars[i]; + std::string arg_name; + if (stream_table[v]) + arg_name = std::get<0>(arg_top_vars[v]); + else arg_name = GetVarID(v); + if (index !=0) stream << ", "; + stream << arg_name; + // print kernel func signature + if (index !=0) arg_stream << ", "; + PrintType(std::get<1>(arg_top_vars[v]), arg_stream); + arg_stream << "* " << arg_name; + index++; + } + stream << ");\n"; + + // switch context to device scope + host_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + + // swtich from device to host + } else if (op->value.as()->value == "cpu" && + fpga_scope_) { + fpga_scope_ = false; + device_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + } + } + CodeGenC::VisitStmt_(op); + } + void VisitStmt_(const Store* op) { + std::string vid = GetVarID(op->buffer_var.get()); + if (vid.find("stream_") == std::string::npos) + CodeGenVivadoHLS::VisitStmt_(op); + }; + + void VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + // modify var idmap for passed in args + } else if (value.find("data") != std::string::npos || + value.substr(0, 3) == "arg") { + auto v = op->var.get(); + auto tuple = arg_top_vars[v]; + arg_vars.push_back(v); + stream_table[v] = false; + var_idmap_[v] = "arg_top_" + std::to_string(arg_top_count); + std::string api_name = "arg" + std::to_string(arg_top_count); + auto arg = map_arg_type_[api_name]; + // PrintType(std::get<1>(arg), arg_stream); + std::vector shape; + if (auto buf = f_->api_args[arg_top_count].as()) + for (size_t i = 0; i < buf->shape.size(); i++) + shape.push_back(buf->shape[i].as()->value); + arg_top_vars[v] = std::make_tuple(vid, std::get<1>(arg), shape); + arg_top_count += 1; + } + PrintStmt(op->body); + }; + + void VisitStmt_(const StreamStmt* op) { + //TODO: fix this + // std::string vid = GetVarID(op->buffer_var.get()); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); + PrintIndent(); + auto load_op = op->value.as(); + auto v = load_op->buffer_var.as(); + // placeholder args using recv name + if (stream_table.count(v)) { + auto tuple = arg_top_vars[v]; + vid.replace(vid.find("stream_send"), 12, "stream_recv"); + arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), + std::get<2>(tuple)); + stream_table[v] = true; + } // else: streamed externop defined in analysis + // PrintExpr(op->value, stream); + // stream << vid << ".write()\n"; + }; +}; + +// replace host-device interface args with pragma +class CodeGenHost : public CodeGenAOCL { + public: + int arg_top_count{0}; + std::string pre_kernel; + std::string post_kernel; + // map for generating wrapper + std::vector arg_vars; + std::unordered_map stream_table; + var2nameType arg_top_vars; + + void VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::device_scope) { + // print top( ... in host and enter fpga scope + if (op->value.as()->value == "fpga" && !fpga_scope_) { + fpga_scope_ = true; + PrintIndent(); + + // track the stream usage + std::vector stream_stmts; + std::vector stream_exprs; + var2nameType unreg_vars; + StreamCollector collector(stream_stmts, stream_exprs, + arg_vars, stream_table, "cpu"); + collector.Visit(op->body); + // update data type and name + for (size_t k = 0; k < arg_vars.size(); k ++) + arg_top_vars[arg_vars[k]]; + for (auto k : collector.host_undefined_) + arg_top_vars[k.get()]; + TypeCollector visitor(arg_top_vars); + visitor.Visit(op->body); + + // generte function calls + stream << "top(oo"; + // int index = 0; + // for (auto op : stream_stmts) { + // if (index !=0) stream << ", "; + // std::string vid; + // if (!var_idmap_.count(op->buffer_var.get())) + // vid = AllocVarID(op->buffer_var.get()); + // else vid = GetVarID(op->buffer_var.get()); + // stream << vid; + // if (vid.find("stream_send") != std::string::npos || + // vid.find("stream_recv") != std::string::npos) { + // if (index !=0) arg_stream << ", "; + // PrintType(op->buffer_var.type(), arg_stream); + // arg_stream << " " << vid; + // } + // index++; + // } + // for (auto op : stream_exprs) { + // if (index !=0) stream << ", "; + // std::string vid; + // if (!var_idmap_.count(op->buffer_var.get())) + // vid = AllocVarID(op->buffer_var.get()); + // else vid = GetVarID(op->buffer_var.get()); + // stream << vid; + // // stream << op->buffer_var.get()->name_hint; + // if (vid.find("stream_send") != std::string::npos || + // vid.find("stream_recv") != std::string::npos) { + // if (index !=0) arg_stream << ", "; + // PrintType(op->buffer_var.type(), arg_stream); + // arg_stream << " " << vid; + // } + // index++; + // } + stream << ");\n"; + + // switch context to device scope + host_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + + // swtich from device to host + } else if (op->value.as()->value == "cpu" && + fpga_scope_) { + fpga_scope_ = false; + device_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + } + } + CodeGenC::VisitStmt_(op); + } + + void VisitStmt_(const Allocate* op) { + std::string vid = AllocVarID(op->buffer_var.get()); + if (vid.find("stream_") != std::string::npos) { + // do not print alloc stream + this->PrintStmt(op->body); + } else { + CHECK(!is_zero(op->condition)); + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + var_shape_map_[buffer] = op->extents; + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); + + // initlize hls stream channel + if (vid.find("stream_in") != std::string::npos || + vid.find("stream_out") != std::string::npos) { + stream << "hls::stream<"; + PrintType(op->type, stream); + stream << "> " << vid << ";\n"; + } else { + PrintType(op->type, stream); + stream << ' '<< vid; + if (constant_size > 1) {// Transfer length one array to scalar + for (size_t i = 0; i < op->extents.size(); i++) { + stream << '['; + PrintExpr(op->extents[i], stream); + stream << "]"; + } + } + stream << ";\n"; + } + buf_length_map_[buffer] = constant_size; + RegisterHandleType(op->buffer_var.get(), op->type); + for (size_t i = 0; i < op->attrs.size(); i++) { + this->PrintStmt(op->attrs[i]); + } + this->PrintStmt(op->body); + } + }; + + void VisitExpr_(const StreamExpr* op, std::ostream& os) { + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); + // os << vid << ".read()"; + }; + + void VisitStmt_(const Store* op) { + std::string vid = GetVarID(op->buffer_var.get()); + if (vid.find("stream_") == std::string::npos) + CodeGenC::VisitStmt_(op); + }; + + void VisitStmt_(const StreamStmt* op) { + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); + PrintIndent(); + auto load_op = op->value.as(); + auto v = load_op->buffer_var.as(); + // placeholder args using recv name + if (stream_table.count(v)) { + auto tuple = arg_top_vars[v]; + arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), + std::get<2>(tuple)); + stream_table[v] = true; + } // else: streamed externop defined in analysis + // PrintExpr(op->value, stream); + // stream << vid << ".write()\n"; + }; + + void VisitStmt_(const LetStmt* op) { + std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); + PrintType(op->var.type(), this->stream); + this->stream << ' ' + << vid + << " = " << value << ";\n"; + // locate arg data and update arg_top_vars + } else if (value.find("data") != std::string::npos || + value.substr(0, 3) == "arg") { + auto v = op->var.get(); + auto tuple = arg_top_vars[v]; + arg_vars.push_back(v); + stream_table[v] = false; + var_idmap_[v] = "arg_top_" + std::to_string(arg_top_count); + arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), + std::get<2>(tuple)); + arg_top_count += 1; + } + PrintStmt(op->body); + }; + + // Split host into pre/post kernel + void SplitHost() { + std::string code = this->GetHost(); + size_t pos = code.find("top("); + pre_kernel = code.substr(0, pos -1); + post_kernel = code.substr(code.find('\n', pos) + 1); + } +}; + // unified simulation function for diff platforms runtime::Module BuildSimModule(Array funcs, Array attrs, Array values) { CodeAnalysMerlinC ca; - CodeGenAOCL cg_host; - CodeGenVivadoHLS cg_dev; + CodeGenHost cg_host; + CodeGenXcel cg_dev; for (LoweredFunc f : funcs) { - // analyze AST and collect arg info ca.AddFunction(f); str2tupleMap map_arg_type; map_arg_type = ca.Finish(); - // generate kernel code cg_host.AddFunction(f, map_arg_type); cg_dev.AddFunction(f, map_arg_type); } + cg_host.SplitHost(); return runtime::CreateSimModule(funcs[0], - cg_host.GetHost(), + cg_host.pre_kernel, + cg_host.post_kernel, + cg_dev.arg_vars, + cg_dev.stream_table, + cg_dev.arg_top_vars, cg_dev.GetDevice()); } diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index dd2754142..0b11fecf0 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -76,7 +76,7 @@ std::string CodeGenC::GetHost() { std::string postproc = host_stream.str(); postproc.erase(postproc.rfind("}") - 1, postproc.length() - 1); - postproc.erase(0, postproc.find("\n") + 1); + postproc.erase(0, postproc.find("{") + 1); return postproc + "\n\n"; } @@ -85,7 +85,7 @@ std::string CodeGenC::GetDevice() { device << "void top(" << arg_stream.str() << "){\n" << device_stream.str(); if (fpga_scope_) device << stream.str(); - return decl_stream.str() + device.str(); + return decl_stream.str() + device.str() + "}\n\n"; } std::string CodeGenC::Finish() { @@ -660,7 +660,7 @@ void CodeGenC::VisitStmt_(const Store* op) { Type t = op->value.type(); if (t.lanes() == 1) { std::string value = this->PrintExpr(op->value); - std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); + std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); this->PrintIndent(); stream << ref << " = " << value << ";\n"; } else { @@ -810,7 +810,10 @@ void CodeGenC::VisitStmt_(const LetStmt* op) { void CodeGenC::VisitStmt_(const Allocate* op) { CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); if (op->new_expr.defined()) { // Prefer global static allocation for the program CHECK_EQ(op->free_function, "nop"); @@ -837,7 +840,8 @@ void CodeGenC::VisitStmt_(const Allocate* op) { this->PrintStmt(op->body); } -// record of vars used in next scope switch +// record vars transferred between xcel and host +// collect info of needed args & streamed args (types) class StreamCollector final : public IRVisitor { public: StreamCollector(std::vector& stream_stmt_list, @@ -848,8 +852,9 @@ class StreamCollector final : public IRVisitor { scope_(initial_scope) {} void Visit_(const StreamExpr* op) { - if (switch_on) + if (switch_on) { stream_expr_list_.push_back(op); + } } void Visit_(const StreamStmt* op) { @@ -858,9 +863,11 @@ class StreamCollector final : public IRVisitor { } void Visit_(const AttrStmt* op) { - if (op->attr_key == attr::device_scope && - op->value.as()->value == scope_) - switch_on = false; + if (op->attr_key == attr::device_scope) { + if (op->value.as()->value != scope_) + switch_on = true; + else switch_on = false; + } this->Visit(op->body); } @@ -868,7 +875,7 @@ class StreamCollector final : public IRVisitor { std::vector& stream_stmt_list_; std::vector& stream_expr_list_; std::string scope_; - bool switch_on{true}; + bool switch_on{false}; }; void CodeGenC::VisitStmt_(const AttrStmt* op) { @@ -887,69 +894,6 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { const Variable* v = op->node.as(); CHECK(v); volatile_buf_.insert(v); - } else if (op->attr_key == ir::attr::device_scope) { - // print top( ... in host and enter fpga scope - if (op->value.as()->value == "fpga" && !fpga_scope_) { - fpga_scope_ = true; - PrintIndent(); - - // track the stream usage - std::vector stream_stmts; - std::vector stream_exprs; - StreamCollector collector(stream_stmts, stream_exprs, "cpu"); - collector.Visit(op->body); - - // generte function calls - stream << "top("; - int index = 0; - for (auto op : stream_stmts) { - if (index !=0) stream << ", "; - std::string vid = op->buffer_var.get()->name_hint; - stream << vid; - if (vid.find("stream_in") != std::string::npos || - vid.find("stream_out") != std::string::npos) { - if (index !=0) arg_stream << ", "; - PrintType(op->buffer_var.type(), arg_stream); - arg_stream << vid; - } - index++; - } - for (auto op : stream_exprs) { - if (index !=0) stream << ", "; - std::string vid = op->buffer_var.get()->name_hint; - stream << op->buffer_var.get()->name_hint; - if (vid.find("stream_in") != std::string::npos || - vid.find("stream_out") != std::string::npos) { - if (index !=0) arg_stream << ", "; - PrintType(op->buffer_var.type(), arg_stream); - arg_stream << " " << vid; - } - index++; - } - stream << ");\n"; - - // switch context to device scope - host_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // swtich from device to host - } else if (op->value.as()->value == "cpu" && - fpga_scope_) { - fpga_scope_ = false; - // add args after fpga block exited - // int i = 0; - // for (const auto & kv : top_data_type_) { - // PrintType(kv.second, host_stream); - // if (i != 0) stream << ","; - // host_stream << " " << kv.first; - // i++; - // } - // host_stream << ");\n"; - device_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - } } this->PrintStmt(op->body); } diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index 589ea25b6..7007f7e1c 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -164,53 +164,44 @@ void CodeGenHLSC::VisitStmt_(const IfThenElse* op) { } void CodeGenHLSC::VisitStmt_(const Allocate* op) { - const Variable* v = op->buffer_var.get(); - std::string key = v->name_hint; - for (size_t i = 0; i < key.size(); ++i) - if (key[i] == '.') key[i] = '_'; + CHECK(!is_zero(op->condition)); + std::string vid; + if (!var_idmap_.count(op->buffer_var.get())) + vid = AllocVarID(op->buffer_var.get()); + else vid = GetVarID(op->buffer_var.get()); + this->PrintIndent(); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + var_shape_map_[buffer] = op->extents; + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); - // reuse host var & extract StreamExpr - if (!fpga_scope_ && host_name_alloc_map_.count(key)) { - this->PrintIndent(); - stream << "hls::stream<> read\n"; - this->PrintStmt(op->body); + // initlize hls stream channel + if (vid.find("stream_") != std::string::npos) { + void(0); + // stream << "hls::stream<"; + // PrintType(op->type, stream); + // stream << "> " << vid << ";\n"; } else { - CHECK(!is_zero(op->condition)); - std::string vid = AllocVarID(op->buffer_var.get()); - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - var_shape_map_[buffer] = op->extents; - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - - // initlize hls stream channel - if (vid.find("stream_in") != std::string::npos || - vid.find("stream_out") != std::string::npos) { - stream << "hls::stream<"; - PrintType(op->type, stream); - stream << "> " << vid << ";\n"; - } else { - PrintType(op->type, stream); - stream << ' '<< vid; - if (constant_size > 1) {// Transfer length one array to scalar - for (size_t i = 0; i < op->extents.size(); i++) { - stream << '['; - PrintExpr(op->extents[i], stream); - stream << "]"; - } + PrintType(op->type, stream); + stream << ' '<< vid; + if (constant_size > 1) {// Transfer length one array to scalar + for (size_t i = 0; i < op->extents.size(); i++) { + stream << '['; + PrintExpr(op->extents[i], stream); + stream << "]"; } - stream << ";\n"; } - buf_length_map_[buffer] = constant_size; - RegisterHandleType(op->buffer_var.get(), op->type); - for (size_t i = 0; i < op->attrs.size(); i++) { - this->PrintStmt(op->attrs[i]); - } - this->PrintStmt(op->body); + stream << ";\n"; + } + buf_length_map_[buffer] = constant_size; + RegisterHandleType(op->buffer_var.get(), op->type); + for (size_t i = 0; i < op->attrs.size(); i++) { + this->PrintStmt(op->attrs[i]); } + this->PrintStmt(op->body); } } // namespace codegen diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index 37c884f15..9e447488a 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -148,6 +148,7 @@ void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { if (!var_idmap_.count(op->buffer_var.get())) vid = AllocVarID(op->buffer_var.get()); else vid = GetVarID(op->buffer_var.get()); + // std::string vid = GetVarID(op->buffer_var.get()); os << vid << ".read()"; } @@ -156,6 +157,7 @@ void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { if (!var_idmap_.count(op->buffer_var.get())) vid = AllocVarID(op->buffer_var.get()); else vid = GetVarID(op->buffer_var.get()); + // std::string vid = GetVarID(op->buffer_var.get()); PrintIndent(); stream << vid; switch (op->stream_type) { diff --git a/tvm/src/codegen/hlsc/codegen_vhls.h b/tvm/src/codegen/hlsc/codegen_vhls.h index a2dd5fa0e..b6d8dbd39 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.h +++ b/tvm/src/codegen/hlsc/codegen_vhls.h @@ -16,7 +16,7 @@ namespace TVM { namespace codegen { -class CodeGenVivadoHLS final : public CodeGenHLSC { +class CodeGenVivadoHLS : public CodeGenHLSC { public: void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); void PrintType(Type t, std::ostream& os) override; diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 4a503d80b..6fd8887fb 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -59,8 +59,13 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, // Write arguments for (size_t i = 0; i < f->args.size(); ++i) { + // alloc or get var name Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); + std::string vid; + if (!var_idmap_.count(v.get())) + vid = AllocVarID(v.get()); + else vid = GetVarID(v.get()); + if (i != 0) this->stream << ", "; if (map_arg_type.find(vid) == map_arg_type.end()) { LOG(WARNING) << vid << " type not found\n"; diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index ad6fd9556..1ca8f6058 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -160,6 +160,8 @@ void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { this->stream << ' ' << vid << " = " << value << ";\n"; + } else if (value.find(".data)") !=0) { + var_idmap_[op->var.get()] = "arg_top_" + vid; } PrintStmt(op->body); } diff --git a/tvm/src/pass/stream_inference.cc b/tvm/src/pass/stream_inference.cc index 9afe136d2..ec18b1871 100644 --- a/tvm/src/pass/stream_inference.cc +++ b/tvm/src/pass/stream_inference.cc @@ -11,6 +11,147 @@ namespace TVM { namespace ir { +// use/def analysis to capture host xcel deps +class StreamUseDefAnalysis : public IRMutator { + public: + Stmt Mutate_(const AttrStmt *op, const Stmt& s) final { + if (op->attr_key == attr::device_scope) { + if (op->value.as()->value == "fpga") + host_scope_ = false; + return IRMutator::Mutate_(op, s); + } else { + return IRMutator::Mutate_(op, s); + } + } + + Stmt Mutate_(const LetStmt *op, const Stmt& s) final { + this->HandleDef(op->var.get()); + Stmt body = this->Mutate(op->body); + Expr value = this->Mutate(op->value); + if (body.same_as(op->body) && + value.same_as(op->value)) { + return s; + } else { + return LetStmt::make(op->var, value, body); + } + } + + Stmt Mutate_(const For *op, const Stmt& s) final { + this->HandleDef(op->loop_var.get()); + return IRMutator::Mutate_(op, s); + } + + Stmt Mutate_(const Allocate *op, const Stmt& s) final { + this->HandleDef(op->buffer_var.get()); + return IRMutator::Mutate_(op, s); + } + + Stmt Mutate_(const Store *op, const Stmt& s) final { + this->HandleUse(op->buffer_var); + return IRMutator::Mutate_(op, s); + } + + Stmt Mutate_(const StreamStmt *op, const Stmt& s) final { + this->HandleUse(op->buffer_var); + return IRMutator::Mutate_(op, s); + } + + Expr Mutate_(const Let *op, const Expr& e) final { + this->HandleDef(op->var.get()); + Expr body = this->Mutate(op->body); + Expr value = this->Mutate(op->value); + if (body.same_as(op->body) && + value.same_as(op->value)) { + return e; + } else { + return Let::make(op->var, value, body); + } + } + + Expr Mutate_(const Variable *op, const Expr& e) final { + this->HandleUse(e); + return IRMutator::Mutate_(op, e); + } + + Expr Mutate_(const Load *op, const Expr& e) final { + this->HandleUse(op->buffer_var); + return IRMutator::Mutate_(op, e); + } + + Expr Mutate_(const StreamExpr *op, const Expr& e) final { + this->HandleUse(op->buffer_var); + return IRMutator::Mutate_(op, e); + } + + Stmt Mutate_(const KernelDef *op, const Stmt& s) { + for (auto arg : op->args) { + this->HandleDef(arg.get()); + } + Stmt body = this->Mutate(op->body); + for (auto arg : op->args) { + xcel_def_count_[arg.get()] = 0; + } + return s; + } + + void HandleDef(const Variable* v) { + if (host_scope_) { + CHECK(!host_def_count_.count(v)) + << "variable " << v->name_hint + << " has already been defined, the Stmt is not SSA"; + CHECK(!host_use_count_.count(v)) + << "variable " << v->name_hint + << " has been used before definition!"; + host_use_count_[v] = 0; + host_def_count_[v] = 1; + } else { + CHECK(!xcel_def_count_.count(v)) + << "variable " << v->name_hint + << " has already been defined, the Stmt is not SSA"; + CHECK(!xcel_use_count_.count(v)) + << "variable " << v->name_hint + << " has been used before definition!"; + xcel_use_count_[v] = 0; + xcel_def_count_[v] = 1; + } + } + + void HandleUse(const Expr& v) { + CHECK(v.as()); + Var var(v.node_); + if (host_scope_) { + auto it = host_use_count_.find(var.get()); + if (it != host_use_count_.end()) { + if (it->second >= 0) { + ++it->second; + } + } else { + host_undefined_.push_back(var); + host_use_count_[var.get()] = -1; + } + } else { + auto it = xcel_use_count_.find(var.get()); + if (it != xcel_use_count_.end()) { + if (it->second >= 0) { + ++it->second; + } + } else { + xcel_undefined_.push_back(var); + xcel_use_count_[var.get()] = -1; + } + } + } + + bool host_scope_{true}; + Array host_undefined_; + Array xcel_undefined_; + std::unordered_map host_use_count_; + std::unordered_map host_def_count_; + std::unordered_map xcel_use_count_; + std::unordered_map xcel_def_count_; +}; + + class StreamMutator : public IRMutator { public: explicit StreamMutator(int bus_bandwidth) { diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 9250950ac..5d2ee7cb1 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -28,12 +28,33 @@ size_t FindNodeRef(ArrayNode* array_node, const T& v) { return array_node->data.size(); } +// The replacer of cache. +class LoadReplacer : public ir::IRMutator { + public: + explicit LoadReplacer( + const std::unordered_map& vsub) + : vsub_(vsub) {} + + Expr Mutate_(const Load* op, const Expr& e) { + const Variable* var = op->buffer_var.as(); + auto it = vsub_.find(var); + if (it != vsub_.end()) + return Load::make(op->type, it->second, + op->index, op->predicate); + return e; + } + + private: + const std::unordered_map& vsub_; +}; + // The replacer of cache. class VarReplacer : public ir::IRMutator { public: explicit VarReplacer( const std::unordered_map& vsub) : vsub_(vsub) {} + Expr Mutate_(const Variable* op, const Expr& e) { auto it = vsub_.find(op); if (it != vsub_.end()) return it->second; @@ -387,6 +408,7 @@ Tensor Schedule::move_to(const Tensor& target, Stage target_stage = (*this)[target]; std::vector consumers; size_t num_stage = (*this)->stages.size(); + size_t min_pos = num_stage; ArrayNode* stages = (*this)->stages.CopyOnWrite(); Buffer target_buffer; @@ -394,6 +416,7 @@ Tensor Schedule::move_to(const Tensor& target, const PlaceholderOpNode* op = target_stage->op.as(); bool is_placeholder = op ? true : false; if (is_placeholder) { + min_pos = 0; for (size_t i = 0; i < num_stage; i++) { Stage s = (*this)->stages[i]; if (const ExternOpNode* op = s->op.as()) { @@ -406,17 +429,29 @@ Tensor Schedule::move_to(const Tensor& target, } } } - } else { // only consumed by self stage + } else { // move back the data after extern compute + min_pos = FindNodeRef(stages, target_stage) + 1; const ExternOpNode* op = target_stage->op.as(); target_buffer = op->output_placeholders[0]; - consumers.push_back(target_stage); + for (size_t i = 0; i < num_stage; i++) { + Stage s = (*this)->stages[i]; + if (const ExternOpNode* op = s->op.as()) { + for (size_t j = 0; j < op->inputs.size(); j++) { + if (op->output_placeholders[0] == op->input_placeholders[j]) { + consumers.push_back(s); + break; + } + } + } + } } - // build consumer (sender) stage + // build consumer (sender) stage which consumes data from original source + // and write into the streaming channel Array consumer_inputs; Array consumer_input_placeholders; Array consumer_output_placeholders; - std::string consumer_name = target_buffer->name + ".stream_in"; + std::string consumer_name = target_buffer->name + ".stream_send"; Buffer consumer_buffer = BufferNode::make(Var(consumer_name, Handle()), target->dtype, target->shape, @@ -428,58 +463,70 @@ Tensor Schedule::move_to(const Tensor& target, consumer_input_placeholders.push_back(target_buffer); consumer_output_placeholders.push_back(consumer_buffer); - // var.write(input_placeholder) - std::vector csm_indices; - std::vector csm_loop_vars; - for (size_t i = 0; i < target->shape.size(); i++) { - VarExpr iter("i" + std::to_string(i)); - csm_indices.push_back(iter); - csm_loop_vars.push_back(iter); - } - Expr csm_index = getIndex(csm_indices, target->shape); - Stmt consumer_body = StreamStmt::make(VarExpr(consumer_buffer->data), - csm_index, + // stream statement + // std::vector csm_indices; + // std::vector csm_loop_vars; + // for (size_t i = 0; i < target->shape.size(); i++) { + // VarExpr iter("i" + std::to_string(i)); + // csm_indices.push_back(iter); + // csm_loop_vars.push_back(iter); + // } + Expr csm_index = Expr(0); //getIndex(csm_indices, target->shape); + Expr load_expr = Load::make(target->dtype, + target_buffer->data, + csm_index, + UIntImm::make(UInt(1), 1)); + Stmt consumer_body = StreamStmt::make(consumer_buffer->data, + load_expr, stream_type, channel_depth); - for (size_t j = 0; j < target->shape.size(); j++) { - consumer_body = For::make( - VarExpr(csm_loop_vars[j]), - 0, target->shape[j], - ForType::Serial, - DeviceAPI::None, - consumer_body); - } + // for (size_t j = 0; j < target->shape.size(); j++) { + // consumer_body = For::make( + // VarExpr(csm_loop_vars[j]), + // 0, target->shape[j], + // ForType::Serial, + // DeviceAPI::None, + // consumer_body); + // } + // create new stage and return stream tensors - auto n = std::make_shared(); - n->name = consumer_name; - n->body = consumer_body; - n->inputs = consumer_inputs; - n->input_placeholders = consumer_input_placeholders; - n->output_placeholders = consumer_output_placeholders; - Operation consumer_op(n); + // auto n = std::make_shared(); + // n->name = consumer_name; + // n->body = consumer_body; + // n->inputs = consumer_inputs; + // n->input_placeholders = consumer_input_placeholders; + // n->output_placeholders = consumer_output_placeholders; + // Operation consumer_op(n); + Operation consumer_op = ExternOpNode::make(consumer_name, + "", + Array(), + consumer_inputs, + consumer_input_placeholders, + consumer_output_placeholders, + consumer_body); Stage consumer_stage = Stage(consumer_op); - size_t consumer_pos = FindNodeRef(stages, target_stage); - stages->data.insert(stages->data.begin() + consumer_pos, consumer_stage.node_); + stages->data.insert(stages->data.begin() + min_pos, consumer_stage.node_); (*this)->stage_map.Set(consumer_op, consumer_stage); - // build producer (receiver) stage + // build producer (receiver) stage which takes in data from streaming + // channel and provide data to orginal consumers Array producer_inputs; Array producer_input_placeholders; Array producer_output_placeholders; - std::string producer_name = target_buffer->name + ".stream_out"; - // Buffer producer_buffer = BufferNode::make(Var(target_buffer->name, Handle()), - // target->dtype, - // target->shape, - // Array(), - // Expr(), - // target_buffer->name, - // "", 0, 0); - // producer_inputs.push_back(consumer); + std::string producer_name = target_buffer->name + ".stream_recv"; + Buffer producer_buffer = BufferNode::make(Var(producer_name, Handle()), + target->dtype, + target->shape, + Array(), + Expr(), + producer_name, + "", 0, 0); + // producer_inputs.push_back(consumer_op.output(0)); // producer_input_placeholders.push_back(consumer_buffer); - producer_output_placeholders.push_back(target_buffer); + producer_output_placeholders.push_back(producer_buffer); // streaming producer tensor reading from placeholder Expr stream = StreamExpr::make(target->dtype, - VarExpr(consumer_buffer->data), + consumer_buffer->data, stream_type, channel_depth); // create for loops for tensor init @@ -490,19 +537,19 @@ Tensor Schedule::move_to(const Tensor& target, indices.push_back(iter); loop_vars.push_back(iter); } - Expr index = getIndex(indices, target->shape); - // store op initialized with Variable node - Stmt for_stmt = Store::make(VarExpr(target_buffer->data), + Expr index = Expr(0); //getIndex(indices, target->shape); + // store op initialized with variable node + Stmt for_stmt = Store::make(producer_buffer->data, stream, index, UIntImm::make(UInt(1), 1)); - for (size_t j = 0; j < target->shape.size(); j++) { - for_stmt = For::make( - VarExpr(loop_vars[j]), - 0, target->shape[j], - ForType::Serial, - DeviceAPI::None, - for_stmt); - } + // for (size_t j = 0; j < target->shape.size(); j++) { + // for_stmt = For::make( + // VarExpr(loop_vars[j]), + // 0, target->shape[j], + // ForType::Serial, + // DeviceAPI::None, + // for_stmt); + // } Expr device; switch (device_type) { case DeviceType::CPU: @@ -517,9 +564,9 @@ Tensor Schedule::move_to(const Tensor& target, } // attr annotates new scope Stmt body = AttrStmt::make( - VarExpr(target_buffer.node_), + target_buffer->data, "device_scope", device, for_stmt); - Tensor producer = ExternOpNode::make(target_buffer->name, + Tensor producer = ExternOpNode::make(producer_buffer->name, "", Array(), producer_inputs, @@ -529,24 +576,29 @@ Tensor Schedule::move_to(const Tensor& target, // create new stage and return stream tensors Stage producer_stage = Stage(producer->op); - size_t pos = FindNodeRef(stages, target_stage); - stages->data.insert(stages->data.begin() + pos, producer_stage.node_); + size_t pos = FindNodeRef(stages, consumer_stage); + stages->data.insert(stages->data.begin() + pos + 1, producer_stage.node_); (*this)->stage_map.Set(producer->op, producer_stage); // update consumer stages with new tensor and buffer + std::unordered_map vsub; + vsub[target_buffer->data.as()] = producer_buffer->data; for (size_t i = 0; i < consumers.size(); i++) { Stage s = consumers[i]; Array new_inputs; Array new_input_placeholders; const ExternOpNode* op = s->op.as(); - // new_inputs.push_back(producer); - // new_input_placeholders.push_back(producer_buffer); + new_inputs.push_back(producer); + new_input_placeholders.push_back(producer_buffer); for (size_t j = 0; j < op->inputs.size(); j++) { - new_inputs.push_back(op->inputs[j]); - new_input_placeholders.push_back(op->input_placeholders[j]); + if (target != op->inputs[j]) { + new_inputs.push_back(op->inputs[j]); + new_input_placeholders.push_back(op->input_placeholders[j]); + } } + Stmt body = LoadReplacer(vsub).Mutate(op->body); Stmt new_body = AttrStmt::make( - VarExpr(target_buffer.node_), + target_buffer->data, "device_scope", device, op->body); @@ -557,7 +609,7 @@ Tensor Schedule::move_to(const Tensor& target, new_inputs, new_input_placeholders, op->output_placeholders, - op->body); + body); } return producer; } diff --git a/tvm/src/schedule/schedule_ops.cc b/tvm/src/schedule/schedule_ops.cc index b4f8e7468..8156844f5 100644 --- a/tvm/src/schedule/schedule_ops.cc +++ b/tvm/src/schedule/schedule_ops.cc @@ -349,7 +349,7 @@ Stmt ScheduleOps( << "call schedule.normalize before scheduleops"; CHECK(s->op.defined()); // no need to specify place holder op. - if (s->op.as()) continue; + if (auto op = s->op.as()) continue; // Remove grouping sugar, get the real attach spec. Stage attach_spec = s.GetAttachSpec(); From c5907cff3fa35463e69530bd1ae1e17549757b27 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 22 Oct 2019 13:52:07 -0400 Subject: [PATCH 085/103] [update] build interface --- python/heterocl/devices.py | 116 +++++++++++++++------------- python/heterocl/tvm/build_module.py | 24 +++--- samples/digitrec/digitrec_vhls.py | 21 ++--- tvm/src/codegen/build_common.cc | 101 ++++++++++++++++-------- 4 files changed, 149 insertions(+), 113 deletions(-) diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 19d1bb1a2..50d5244ab 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -2,9 +2,6 @@ #pylint: disable=too-few-public-methods, too-many-return-statements from .debug import DeviceError -def map_gen(platform, types, model, mode): - pass - class platform(type): """The platform class for compute environment setups @@ -20,26 +17,39 @@ class platform(type): """ def __getattr__(cls, key): if key == "aws_f1": - host = CPU("x86", mode=cls.mode) - device = FPGA("xilinx") - return cls(host, device) + host = CPU("x86", compiler="aocl", lang="opencl") + xcel = FPGA("xilinx", compiler="vhls", lang="hlsc") elif key == "zynq": - host = CPU("arm", key) - device = FPGA("xilinx", key) - return cls(host, device) + host = CPU("arm") + xcel = FPGA("xilinx") elif key == "ppac": - host = CPU("riscv", key) - device = PIM("ppac") - return cls(host, device) + host = CPU("riscv") + xcel = PIM("ppac") else: # unsupported device - raise DeviceError("not supported") + raise DeviceError("not supported") + tool = Tooling(key, host, xcel) + return cls(host, xcel, tool) class env(metaclass=platform): mode = "sim" - def __init__(self, host, device): + def __init__(self, host, xcel, tool): self.host = host - self.xcel = device - self.tool = "" + self.xcel = xcel + self.tool = tool + + def __getattr__(self, key): + return self.tool.__getattr__(key) + + def __call__(self, host=None, xcel=None, tool=None): + if host: + assert isinstance(host, Device) + self.host = host + if xcel: + assert isinstance(xcel, Device) + self.xcel = xcel + if tool: + assert isinstance(tool, Tooling) + self.tool = tool def __str__(self): return str(self.host) + " : " + \ @@ -49,6 +59,17 @@ def __repr__(self): return str(self.host) + " : " + \ str(self.xcel) +class device(type): + def __getattr__(cls, key): + if key == "host": + return CPU("x86") + elif key == "xcel": + return FPGA("xilinx") + else: # unsupported device + raise DeviceError("not supported") + +class dev(metaclass=device): + pass class Tooling(object): """The base class for all device tooling @@ -64,31 +85,17 @@ class Tooling(object): model: str Model of device to place date """ - def __init__(self, types, model, platform, mode): - self.types = types - self.model = model + def __init__(self, platform, host, xcel): self.platform = platform - self.mode = mode - self.mapping = { "source" : "", - "sim" : "", - "impl" : "" } - if types == "CPU": # sim = impl - self.mapping["source"] = { "lang": "opencl", - "compile" : "aocl", - "options" : "" } - self.mapping["sim"] = { "env" : "sdaccel", - "compile" : "xcpp" } - if types == "FPGA": - self.mapping["source"] = { "lang": "hlsc", - "compile" : "vhls", - "options" : "" } - self.mapping["sim"] = {} - self.mapping["co-sim"] = {} - self.mapping["syn"] = { "compile" : "vivado_hls", - "callback" : ""} - self.mapping[""] = {} - else: # implementation - pass + self.mode = "sim" + self.host = host + self.xcel = xcel + self.mapping = {} + self.mapping["sim"] = { "type" : "csim", + "emulator" : "vivado_hls", + "options" : ""} + self.mapping["impl"] = { "compile" : "quartus", + "callback" : ""} def __getattr__(self, entry): return self.mapping[entry] @@ -110,51 +117,50 @@ class Device(object): model: str Model of device to place date """ - def __init__(self, types, model, platform, mode): + def __init__(self, types, model, **kwargs): self.types = types self.model = model - self.tool = Tooling(types, model, platform, mode) + self.impls = {"lang": "", + "compiler" : ""} + for key, value in kwargs.items(): + self.impls[key] = value def __getattr__(self, key): - return self.tool.__getattr__(key) + return self.impls[key] class CPU(Device): """cpu device with different models""" - def __init__(self, model, platform="aws_f1", mode="sim"): + def __init__(self, model, **kwargs): if model not in ["riscv", "arm", "x86", "sparc", "powerpc"]: raise DeviceError(model + " not supported yet") - super(CPU, self).__init__("CPU", model, - platform, mode) + super(CPU, self).__init__("CPU", model, **kwargs) def __repr__(self): return "CPU (" + str(self.model) + ")" class FPGA(Device): """fpga device with different models""" - def __init__(self, model, platform="aws_f1", mode="sim"): + def __init__(self, model, **kwargs): if model not in ["xilinx", "intel"]: raise DeviceError(model + " not supported yet") - super(FPGA, self).__init__("FPGA", model, - platform, mode) + super(FPGA, self).__init__("FPGA", model, **kwargs) def __repr__(self): return "FPGA (" + str(self.model) + ")" class GPU(Device): """gpu device with different models""" - def __init__(self, model, platform="aws_f1", mode="sim"): + def __init__(self, model, **kwargs): if model not in ["cuda", "rocm"]: raise DeviceError(model + " not supported yet") - super(GPU, self).__init__("GPU", model, - platform, mode) + super(GPU, self).__init__("GPU", model, **kwargs) def __repr__(self): return "GPU (" + str(self.model) + ")" class PIM(Device): """cpu device with different models""" - def __init__(self, model, platform="ppac", mode="sim"): + def __init__(self, model, **kwargs): if model not in ["ppac"]: raise DeviceError(model + " not supported yet") - super(CPU, self).__init__("PIM", model, - platform, mode) + super(CPU, self).__init__("PIM", model, **kwargs) def __repr__(self): return "PIM (" + str(self.model) + ")" diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 59dd84ee3..ca8097ce0 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -424,24 +424,24 @@ def build_fpga_kernel(sch, args, target, name="default_function"): fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] try: # generate and split code - # host = target.host.source['compile'] - # builder = getattr(codegen, "build_{0}".format(host)) - # host_code = builder(fdevice) - # findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") - # host_code = host_code[findex + 6 : rindex] - - # device = "aocl" # target.device.source['compile'] - # builder = getattr(codegen, "build_{0}".format(device)) - # device_code = builder(fdevice) - # findex, rindex = device_code.find("{device}"), device_code.rfind("{device}") - # device_code = device_code[findex + 8 : rindex] + host = target.host.compiler + builder = getattr(codegen, "build_{0}".format(host)) + host_code = builder(fdevice) + findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") + host_code = host_code[findex + 6 : rindex] + + xcel = target.xcel.compiler + builder = getattr(codegen, "build_{0}".format(xcel)) + xcel_code = builder(fdevice) + findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") + xcel_code = xcel_code[findex + 8 : rindex] # test build sim @register_func def tvm_callback_syn_postproc(code): return "test" - if target.mode == "source": return device_code + host_code + if target.mode == "source": return xcel_code + host_code elif target.mode == "sim": builder = getattr(codegen, "build_{0}".format("sim")) f = builder(fdevice, ["s"], ["wwq", "swsw"]) diff --git a/samples/digitrec/digitrec_vhls.py b/samples/digitrec/digitrec_vhls.py index 4797ece5f..8bd144968 100644 --- a/samples/digitrec/digitrec_vhls.py +++ b/samples/digitrec/digitrec_vhls.py @@ -121,8 +121,8 @@ def knn_vote(knn_mat, j): knn_update = knn.knn_update # s.stream_to(test_image, hcl.FPGA("intel")) -s.to(train_images, hcl.FPGA("intel")) -s.to(vote, hcl.CPU("x86")) +s.to(train_images, hcl.dev.xcel) +s.to(vote, hcl.dev.host) # Merge loop nests s[diff].compute_at(s[dist], dist.axis[1]) @@ -136,20 +136,15 @@ def knn_vote(knn_mat, j): s[knn_update].pipeline(knn_update.axis[0]) # At the end, we build the whole offloaded function. -print(hcl.lower(s)) +# print(hcl.lower(s)) target = hcl.env.aws_f1 -# target.tool.mode = "sim/impl" -# hcl.sim / sw -# hcl.impl # refer stage -> tool opt cli -# target.tool[''] -# target.host["lang" "compiler"] -# targte.host -# target.xcel # +target.tool.mode = "sim" +target.tool.sim["type"] = "cosim" +target.tool.sim["emulator"] = "vivado_hls" +# target.host.lang = "opencl" +# target.xcel.lang = "hlsc" f = hcl.build(s, target) -# print(f) -# import sys; sys.exit(1) - train_images, _, test_images, test_labels = read_digitrec_data() correct = 0.0 total_time = 0 diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 9b4de219b..7f78b9c02 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -500,14 +500,15 @@ void GenHostCode(TVMArgs& args, const std::vector& arg_types, LoweredFunc func, std::string pre_kernel, - std::string post_kernel) { + std::string post_kernel, + std::vector>>& arg_stream_types) { int indent = 0; std::ofstream stream; stream.open("host.cpp"); // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/host/digit_recognition.cpp"); stream << "#include \n"; stream << "#include \n"; - stream << "\n\n"; + stream << "\n"; stream << "// standard C/C++ headers\n"; stream << "#include \n"; stream << "#include \n"; @@ -515,14 +516,14 @@ void GenHostCode(TVMArgs& args, stream << "#include \n"; stream << "#include \n"; stream << "#include \n"; - stream << "\n\n"; + stream << "\n"; stream << "// opencl harness headers\n"; stream << "#include \"CLWorld.h\"\n"; stream << "#include \"CLKernel.h\"\n"; stream << "#include \"CLMemObj.h\"\n"; stream << "// harness namespace\n"; stream << "using namespace rosetta;\n"; - stream << "\n\n"; + stream << "\n"; stream << "//other headers\n"; stream << "#include \"utils.h\"\n"; stream << "#include \"typedefs.h\"\n"; @@ -585,12 +586,29 @@ void GenHostCode(TVMArgs& args, // stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; cnt += 1; } - stream << "\n\n"; + stream << "\n"; + } + // allocate mem for stream vars + for (size_t k = args.size(); k < arg_stream_types.size(); k++) { + auto type = std::get<1>(arg_stream_types[k]); + auto shape = std::get<2>(arg_stream_types[k]); + PrintIndent(stream, indent); + stream << Type2Byte(Type2TVMType(type)) << " " << "knn_mat["; + if (shape.size() > 0) { + for (size_t i = 0; i < shape.size(); i++) { + if (i != shape.size() - 1) + stream << shape[i] << " * "; + else stream << shape[i]; + } + } else { + stream << "1"; + } + stream << "];\n"; } // generate host side (before) on arg_top_k PrintIndent(stream,indent); - stream << "printf(\"Digit Recognition Application\\n\");\n"; + stream << "printf(\"Host Side Application\\n\");\n"; stream << "\n"; PrintIndent(stream, indent); stream << "// compute bofore kernel function"; @@ -604,12 +622,12 @@ void GenHostCode(TVMArgs& args, stream << "std::string kernelFile(\"\");\n"; PrintIndent(stream, indent); stream << "parse_sdaccel_command_line_args(argc, argv, kernelFile);\n"; - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// create OpenCL world\n"; PrintIndent(stream, indent); stream << "CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// add the bitstream file\n"; PrintIndent(stream, indent); @@ -654,11 +672,33 @@ void GenHostCode(TVMArgs& args, stream << ", "; stream << "CL_MEM_READ_WRITE);\n"; } + // addiion streamed data + for (size_t k = args.size(); k < arg_stream_types.size(); k++) { + auto type = std::get<1>(arg_stream_types[k]); + auto shape = std::get<2>(arg_stream_types[k]); + PrintIndent(stream, indent); + stream << "CLMemObj source_" << k; + stream << "((void*)knn_mat"; + stream << ", sizeof(" << Type2Byte(Type2TVMType(type)) << "), "; + if (shape.size() > 0) { + for (size_t j = 0; j < shape.size(); j++) { + if (j == 0) { + stream << shape[j] << " "; + } else { + stream << "* " << shape[j]; + } + } + } else { + stream << "1"; + } + stream << ", "; + stream << "CL_MEM_READ_WRITE);\n"; + } - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// add them to the world\n"; - for (int i = 0;i < args.size();i++) { + for (size_t i = 0;i < arg_stream_types.size();i++) { PrintIndent(stream, indent); stream << "digit_rec_world.addMemObj(source_" << i; stream << ");\n"; @@ -668,24 +708,30 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << " // set work size\n"; PrintIndent(stream, indent); - stream << "int global_size[3] = {1, 1, 1};\n"; + int size = arg_stream_types.size(); + std::string arr = "[" + std::to_string(size) + "] = {"; + for (int i = 0; i < size; i++) { + if (i != size -1) arr += "1, "; + else arr += "1};\n"; + } + stream << "int global_size" + arr; PrintIndent(stream, indent); - stream << "int local_size[3] = {1, 1, 1};\n"; + stream << "int local_size" + arr; PrintIndent(stream, indent); stream << "App.set_global(global_size);\n"; PrintIndent(stream, indent); stream << "App.set_local(local_size);\n"; - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// add them to the world\n"; PrintIndent(stream, indent); stream << "digit_rec_world.addKernel(App);\n"; - stream << "\n\n"; + stream << "\n"; PrintIndent(stream, indent); stream << "// set kernel arguments\n"; // PrintIndent(stream, indent); // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; - for (int i = 0;i < args.size();i++) { + for (size_t i = 0;i < arg_stream_types.size();i++) { PrintIndent(stream, indent); stream << "digit_rec_world.setMemKernelArg(0, "<< i << ", " << i; stream << ");\n"; @@ -762,8 +808,9 @@ class SimModuleNode final : public ModuleNode { // generate interface wrapper for kernel args GenWrapperCode(args, shmids, arg_types, arg_stream_types_, func_); // host code invoking extern c wrapped hlsc kernel + GenHostCode(args, shmids, arg_types, func_, + pre_host_, post_host_, arg_stream_types_); GenKernelCode(dev_); - GenHostCode(args, shmids, arg_types, func_, pre_host_, post_host_); // TODO: find a better way to do the following LOG(CLEAN) << "Compiling the generated HLS C code ..."; @@ -852,14 +899,10 @@ class TypeCollector final : public IRVisitor { // vars include passed-in and not registered vars on host class StreamCollector final : public IRVisitor { public: - StreamCollector(std::vector& stream_stmt_list, - std::vector& stream_expr_list, - std::vector& arg_vars, + StreamCollector(std::vector& arg_vars, std::unordered_map& stream_table, std::string initial_scope) - : stream_stmt_list_(stream_stmt_list), - stream_expr_list_(stream_expr_list), - arg_vars_(arg_vars), + : arg_vars_(arg_vars), stream_table_(stream_table), scope_(initial_scope) {} @@ -949,8 +992,6 @@ class StreamCollector final : public IRVisitor { std::unordered_map host_def_count_; private: - std::vector& stream_stmt_list_; - std::vector& stream_expr_list_; std::vector& arg_vars_; std::unordered_map& stream_table_; std::string scope_; @@ -984,10 +1025,7 @@ class CodeGenXcel : public CodeGenVivadoHLS { PrintIndent(); // track the stream usage - std::vector stream_stmts; - std::vector stream_exprs; - StreamCollector collector(stream_stmts, stream_exprs, - arg_vars, stream_table, "cpu"); + StreamCollector collector(arg_vars, stream_table, "cpu"); collector.Visit(op->body); // update data type and name @@ -1119,11 +1157,8 @@ class CodeGenHost : public CodeGenAOCL { PrintIndent(); // track the stream usage - std::vector stream_stmts; - std::vector stream_exprs; var2nameType unreg_vars; - StreamCollector collector(stream_stmts, stream_exprs, - arg_vars, stream_table, "cpu"); + StreamCollector collector(arg_vars, stream_table, "cpu"); collector.Visit(op->body); // update data type and name for (size_t k = 0; k < arg_vars.size(); k ++) @@ -1134,7 +1169,7 @@ class CodeGenHost : public CodeGenAOCL { visitor.Visit(op->body); // generte function calls - stream << "top(oo"; + stream << "top("; // int index = 0; // for (auto op : stream_stmts) { // if (index !=0) stream << ", "; From 99fe2b7f85c7b7284f3e7061bdc86d4738470008 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 31 Oct 2019 12:58:33 -0400 Subject: [PATCH 086/103] [update] new build interface --- python/heterocl/devices.py | 233 +++++++++------- python/heterocl/tools.py | 107 +++++++ python/heterocl/tvm/build_module.py | 19 +- python/heterocl/tvm/schedule.py | 2 +- samples/digitrec/digitrec_vhls.py | 26 +- tvm/src/codegen/build_common.cc | 162 ++++++++++- tvm/src/template/design/CLKernel.cpp | 67 +++++ tvm/src/template/design/CLKernel.h | 96 +++++++ tvm/src/template/design/CLMemObj.cpp | 57 ++++ tvm/src/template/design/CLMemObj.h | 57 ++++ tvm/src/template/design/CLWorld.cpp | 401 +++++++++++++++++++++++++++ tvm/src/template/design/CLWorld.h | 129 +++++++++ tvm/src/template/design/Makefile | 33 +++ tvm/src/template/design/harness.mk | 196 +++++++++++++ tvm/src/template/design/run.tcl | 14 + tvm/src/template/design/run_hw.sh | 28 ++ tvm/src/template/design/run_sw.sh | 51 ++++ tvm/src/template/design/utils.cpp | 46 +++ tvm/src/template/design/utils.h | 19 ++ 19 files changed, 1605 insertions(+), 138 deletions(-) create mode 100644 python/heterocl/tools.py create mode 100644 tvm/src/template/design/CLKernel.cpp create mode 100644 tvm/src/template/design/CLKernel.h create mode 100644 tvm/src/template/design/CLMemObj.cpp create mode 100644 tvm/src/template/design/CLMemObj.h create mode 100644 tvm/src/template/design/CLWorld.cpp create mode 100644 tvm/src/template/design/CLWorld.h create mode 100644 tvm/src/template/design/Makefile create mode 100644 tvm/src/template/design/harness.mk create mode 100644 tvm/src/template/design/run.tcl create mode 100755 tvm/src/template/design/run_hw.sh create mode 100755 tvm/src/template/design/run_sw.sh create mode 100644 tvm/src/template/design/utils.cpp create mode 100644 tvm/src/template/design/utils.h diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 50d5244ab..127dc9e81 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -1,82 +1,21 @@ """Define HeteroCL device types""" #pylint: disable=too-few-public-methods, too-many-return-statements from .debug import DeviceError +from .tools import option_table, model_table -class platform(type): - """The platform class for compute environment setups - - serves as meta-class for attr getting - default platform: aws_f1, zynq, ppac - - Parameters - ---------- - host: str - Device of device to place data - model: str - Model of device to place date - """ +class tooling(type): def __getattr__(cls, key): - if key == "aws_f1": - host = CPU("x86", compiler="aocl", lang="opencl") - xcel = FPGA("xilinx", compiler="vhls", lang="hlsc") - elif key == "zynq": - host = CPU("arm") - xcel = FPGA("xilinx") - elif key == "ppac": - host = CPU("riscv") - xcel = PIM("ppac") - else: # unsupported device - raise DeviceError("not supported") - tool = Tooling(key, host, xcel) - return cls(host, xcel, tool) - -class env(metaclass=platform): - mode = "sim" - def __init__(self, host, xcel, tool): - self.host = host - self.xcel = xcel - self.tool = tool - - def __getattr__(self, key): - return self.tool.__getattr__(key) - - def __call__(self, host=None, xcel=None, tool=None): - if host: - assert isinstance(host, Device) - self.host = host - if xcel: - assert isinstance(xcel, Device) - self.xcel = xcel - if tool: - assert isinstance(tool, Tooling) - self.tool = tool - - def __str__(self): - return str(self.host) + " : " + \ - str(self.xcel) - - def __repr__(self): - return str(self.host) + " : " + \ - str(self.xcel) - -class device(type): - def __getattr__(cls, key): - if key == "host": - return CPU("x86") - elif key == "xcel": - return FPGA("xilinx") + if key in option_table: + return cls(key, *option_table[key]) else: # unsupported device raise DeviceError("not supported") -class dev(metaclass=device): - pass - -class Tooling(object): +class tool(metaclass=tooling): """The base class for all device tooling - each device tooling object maintains a stage dict - including mapping from stage -> impl/sim tool + options - stop impl/sim where running into end of stage list + mode (sim/impl) is decided by tool configuration + e.g. run sw emulation by passing gcc / vivado_hls arg + and actual impl by passing sdaccel / aocl arg Parameters ---------- @@ -85,25 +24,34 @@ class Tooling(object): model: str Model of device to place date """ - def __init__(self, platform, host, xcel): - self.platform = platform - self.mode = "sim" - self.host = host - self.xcel = xcel - self.mapping = {} - self.mapping["sim"] = { "type" : "csim", - "emulator" : "vivado_hls", - "options" : ""} - self.mapping["impl"] = { "compile" : "quartus", - "callback" : ""} + def __init__(self, name, mode, kwargs): + self.name = name + self.mode = mode + self.options = kwargs def __getattr__(self, entry): return self.mapping[entry] + def __call__(self, mode, setting): + self.mode = mode + self.options = setting + return self + def __str__(self): - return str(self.platform) + ":" + \ - str(self.model) + "(" + \ - str(self.mode) + ")" + return str(self.name) + "-" + \ + str(self.mode) + ":\n" + \ + str(self.options) + + def __repr__(self): + return str(self.name) + "-" + \ + str(self.mode) + ":\n" + \ + str(self.options) + +tool_table = { + "aws_f1" : tool("sdaccel", *option_table["sdaccel"]), + "zc706" : tool("vivado_hls", *option_table["vivado_hls"]), + "stratix10_sx": tool("aocl", *option_table["aocl"]) +} class Device(object): """The base class for all device types @@ -117,53 +65,132 @@ class Device(object): model: str Model of device to place date """ - def __init__(self, types, model, **kwargs): + def __init__(self, types, vendor, + model, **kwargs): + self.vendor = vendor self.types = types self.model = model - self.impls = {"lang": "", - "compiler" : ""} + self.impls = {"lang": ""} for key, value in kwargs.items(): self.impls[key] = value def __getattr__(self, key): return self.impls[key] + def set_lang(self, lang): + assert lang in \ + ["opencl", "hlsc", "c", "opengl", "merlinc", "cuda", "metal"], \ + "unsupported lang sepc" + lang + self.impls["lang"] = lang + return self + class CPU(Device): """cpu device with different models""" - def __init__(self, model, **kwargs): - if model not in ["riscv", "arm", "x86", "sparc", "powerpc"]: - raise DeviceError(model + " not supported yet") - super(CPU, self).__init__("CPU", model, **kwargs) + def __init__(self, vendor, model, **kwargs): + if vendor not in ["riscv", "arm", "intel", "sparc", "powerpc"]: + raise DeviceError(vendor + " not supported yet") + assert "cpu_" + model in model_table[vendor], \ + model + " not supported yet" + super(CPU, self).__init__("CPU", vendor, model, **kwargs) def __repr__(self): - return "CPU (" + str(self.model) + ")" + return "cpu-" + self.vendor + "-" + str(self.model) + \ + ":" + self.impls["lang"] class FPGA(Device): """fpga device with different models""" - def __init__(self, model, **kwargs): - if model not in ["xilinx", "intel"]: - raise DeviceError(model + " not supported yet") - super(FPGA, self).__init__("FPGA", model, **kwargs) + def __init__(self, vendor, model, **kwargs): + if vendor not in ["xilinx", "intel"]: + raise DeviceError(vendor + " not supported yet") + assert "fpga_" + model in model_table[vendor], \ + model + " not supported yet" + super(FPGA, self).__init__("FPGA", vendor, model, **kwargs) def __repr__(self): - return "FPGA (" + str(self.model) + ")" + return "fpga-" + self.vendor + "-" + str(self.model) + \ + ":" + self.impls["lang"] class GPU(Device): """gpu device with different models""" - def __init__(self, model, **kwargs): - if model not in ["cuda", "rocm"]: - raise DeviceError(model + " not supported yet") - super(GPU, self).__init__("GPU", model, **kwargs) + def __init__(self, vendor, model, **kwargs): + if vendor not in ["nvidia", "amd"]: + raise DeviceError(vendor + " not supported yet") + assert "gpu_" + model in model_table[vendor], \ + model + " not supported yet" + super(GPU, self).__init__("GPU", vendor, model, **kwargs) def __repr__(self): - return "GPU (" + str(self.model) + ")" + return "gpu-" + self.vendor + "-" + str(self.model) + \ + ":" + self.impls["lang"] class PIM(Device): """cpu device with different models""" - def __init__(self, model, **kwargs): + def __init__(self, vendor, model, **kwargs): if model not in ["ppac"]: raise DeviceError(model + " not supported yet") - super(CPU, self).__init__("PIM", model, **kwargs) + super(PIM, self).__init__("PIM", vendor, model, **kwargs) def __repr__(self): return "PIM (" + str(self.model) + ")" +dev_table = { + "aws_f1" : [CPU("intel", "e5"), FPGA("xilinx", "xcvu19p")], + "zc706" : [CPU("arm", "a9"), FPGA("xilinx", "xc7z045")], + "stratix10_sx": [CPU("arm", "a53"), FPGA("intel", "stratix10_gx")] +} + +class env(type): + """The platform class for compute environment setups + + serves as meta-class for attr getting + default platform: aws_f1, zynq, ppac + + Parameters + ---------- + host: str + Device of device to place data + model: str + Model of device to place date + """ + def __getattr__(cls, key): + if key == "aws_f1": + devs = dev_table[key] + host = devs[0].set_lang("opencl") + xcel = devs[1].set_lang("hlsc") + elif key == "zynq": + host = CPU("arm") + xcel = FPGA("xilinx") + elif key == "ppac": + host = CPU("riscv") + xcel = PIM("ppac") + else: # unsupported device + raise DeviceError("not supported") + tool = tool_table[key] + return cls(key, devs, host, xcel, tool) + +class platform(metaclass=env): + def __init__(self, name, devs, host, xcel, tool): + self.name = name + self.devs = devs + self.host = host + self.xcel = xcel + self.tool = tool + + def __getattr__(self, key): + return self.tool.__getattr__(key) + + def __call__(self, tooling=None): + if tooling: # check and update + assert isinstance(tooling, tool) + self.tool = tooling + return self + + def __str__(self): + return str(self.name) + "(" + \ + str(self.host) + " : " + \ + str(self.xcel) + ")" + + def __repr__(self): + return str(self.name) + "(" + \ + str(self.host) + " : " + \ + str(self.xcel) + ")" + def device_to_str(dtype): """Convert a device type to string format. diff --git a/python/heterocl/tools.py b/python/heterocl/tools.py new file mode 100644 index 000000000..38d5e79d3 --- /dev/null +++ b/python/heterocl/tools.py @@ -0,0 +1,107 @@ +"""Define HeteroCL default tool settings""" +#pylint: disable=too-few-public-methods, too-many-return-statements + +model_table = { + "xilinx" : ["fpga_xc7z045", "fpga_xcvu19p"], + "intel" : ["cpu_e5", "cpu_i7", "fpga_stratix10_gx", + "fpga_stratix10_dx", "fpga_stratix10_mx"], + "arm" : ["cpu_a7", "cpu_a9", "cpu_a53"], + "riscv" : ["riscv"] +} + +option_table = { + "sdaccel" : ("sw_emu", {"version" : "2017.1", + "clock" : 1}), + "vivado_hls" : ("csim", {"version" : "2017.1"}), + + # refer to xilinx2016_1/ug904-vivado-implementation.pdf + "vivado" : ("pnr", + {"version" : "2017.1", + "logic" : ["Default", "Explore", "ExploreSequentialArea", "AddRemap", "ExploreArea"], + "placement" : ["Default", "Explore", "ExtraNetDelay_high", "ExtraNetDelay_medium", "ExtraNetDelay_low", "ExtraPostPlacementOpt", "WLDrivenBlockPlacement", "LateBlockPlacement", "AltSpreadLogic_low", "AltSpreadLogic_medium", "AltSpreadLogic_high"], + "routing" : ["Default", "Explore", "HigherDelayCost"], + "fanout_opt" : ["on", "off"], + "placement_opt" : ["on", "off"], + "critical_cell_opt" : ["on", "off"], + "critical_pin_opt" : ["on", "off"], + "retime" : ["on", "off"], + "rewire" : ["on", "off"], + }), + + "quartus" : ("pnr", + {"version" : "17.1", + "auto_dsp_recognition" : ['On', 'Off'], + "disable_register_merging_across_hierarchies": ['On', 'Off', 'Auto'], + "mux_restructure" : ['On', 'Off', 'Auto'], + "optimization_technique" : ['Area', 'Speed', 'Balanced'], + "synthesis_effort" : ['Auto', 'Fast'], + "synth_timing_driven_synthesis" : ['On', 'Off'], + "fitter_aggressive_routability_optimization" : ['Always', 'Automatically', 'Never'], + "fitter_effort" : ['Standard Fit', 'Auto Fit'], + "remove_duplicate_registers" : ['On', 'Off'], + "physical_synthesis" : ['On', 'Off'], + "adv_netlist_opt_synth_wysiwyg_remap" : ['On', 'Off'], + "allow_any_ram_size_for_recognition" : ['On', 'Off'], + "allow_any_rom_size_for_recognition" : ['On', 'Off'], + "allow_any_shift_register_size_for_recognition" : ['On', 'Off'], + "allow_power_up_dont_care" : ['On', 'Off'], + "allow_shift_register_merging_across_hierarchies" : ["Always", "Auto", "Off"], + "allow_synch_ctrl_usage" : ['On', 'Off'], + "auto_carry_chains" : ['On', 'Off'], + "auto_clock_enable_recognition" : ['On', 'Off'], + "auto_dsp_recognition" : ['On', 'Off'], + "auto_enable_smart_compile" : ['On', 'Off'], + "auto_open_drain_pins" : ['On', 'Off'], + "auto_ram_recognition" : ['On', 'Off'], + "auto_resource_sharing" : ['On', 'Off'], + "auto_rom_recognition" : ['On', 'Off'], + "auto_shift_register_recognition" : ["Always", "Auto", "Off"], + "disable_register_merging_across_hierarchies" : ["Auto", "On", "Off"], + "enable_state_machine_inference" : ['On', 'Off'], + "force_synch_clear" : ['On', 'Off'], + "ignore_carry_buffers" : ['On', 'Off'], + "ignore_cascade_buffers" : ['On', 'Off'], + "ignore_max_fanout_assignments" : ['On', 'Off'], + "infer_rams_from_raw_logic" : ['On', 'Off'], + "mux_restructure" : ["Auto", "On", "Off"], + "optimization_technique" : ["Area", "Balanced", "Speed"], + "optimize_power_during_synthesis" : ["Extra effort", "Normal compilation", "Off"], + "remove_duplicate_registers" : ['On', 'Off'], + "shift_register_recognition_aclr_signal" : ['On', 'Off'], + "state_machine_processing" : + ["Auto", "Gray", "Johnson, Minimal Bits", "One-Hot", "Sequential", "User-Encoded"], + "strict_ram_recognition" : ['On', 'Off'], + "synthesis_effort" : ["Auto", "Fast"], + "synthesis_keep_synch_clear_preset_behavior_in_unmapper" : ['On', 'Off'], + "synth_resource_aware_inference_for_block_ram" : ['On', 'Off'], + "synth_timing_driven_synthesis" : ['On', 'Off'], + "alm_register_packing_effort" : ["High", "Low", "Medium"], + "auto_delay_chains" : ['On', 'Off'], + "auto_delay_chains_for_high_fanout_input_pins" : ["On", "Off"], + "eco_optimize_timing" : ["On", "Off"], + "final_placement_optimization" : ["Always", "Automatically", "Never"], + "fitter_aggressive_routability_optimization" : ["Always", "Automatically", "Never"], + "fitter_effort" : ["Standard Fit", "Auto Fit"], + "optimize_for_metastability" : ["On", "Off"], + "optimize_hold_timing" : ["All Paths", "IO Paths and Minimum TPD Paths", "Off"], + "optimize_ioc_register_placement_for_timing" : + ["Normal", "Off", "Pack All IO Registers"], + "optimize_multi_corner_timing" : ['On', 'Off'], + "optimize_power_during_fitting" : ["Extra effort", "Normal compilation", "Off"], + "physical_synthesis" : ['On', 'Off'], + "placement_effort_multiplier" : [0.2, 0.5, 1.0, 2.0, 3.0, 4.0], + "programmable_power_technology_setting" : ["Automatic", "Force All Tiles with Failing Timing Paths to High Speed", "Force All Used Tiles to High Speed", "Minimize Power Only"], + "qii_auto_packed_registers" : ["Auto", "Minimize Area", "Minimize Area with Chains", "Normal", "Off", "Sparse", "Sparse Auto"], + "router_clocking_topology_analysis" : ['On', 'Off'], + "router_lcell_insertion_and_logic_duplication" : ["Auto", "On", "Off"], + "router_register_duplication" : ["Auto", "On", "Off"], + "router_timing_optimization_level" : ["MINIMUM", "Normal", "MAXIMUM"], + "seed" : (1, 5), + "tdc_aggressive_hold_closure_effort" : ['On', 'Off'], + "allow_register_retiming" : ['On', 'Off']}), + + "aocl" : ("emu", {"version" : "17.0", + "clokc" : 1.5, + }) +} + diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index ca8097ce0..7d07e13b5 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -22,7 +22,7 @@ from . import ndarray from . import target as _target from . import make -from ..devices import env +from ..devices import platform class DumpIR(object): """ @@ -424,25 +424,30 @@ def build_fpga_kernel(sch, args, target, name="default_function"): fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] try: # generate and split code - host = target.host.compiler + if "sdaccel" in str(target.tool): + host = target.host.lang.replace("opencl", "aocl") + xcel = target.xcel.lang.replace("hlsc", "vhls") builder = getattr(codegen, "build_{0}".format(host)) host_code = builder(fdevice) findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") host_code = host_code[findex + 6 : rindex] - xcel = target.xcel.compiler builder = getattr(codegen, "build_{0}".format(xcel)) xcel_code = builder(fdevice) findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") xcel_code = xcel_code[findex + 8 : rindex] - + # test build sim @register_func def tvm_callback_syn_postproc(code): return "test" - if target.mode == "source": return xcel_code + host_code - elif target.mode == "sim": + @register_func + def get_util_path(path): + return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/design/" + + if target.tool.mode == "source": return xcel_code + host_code + elif "emu" in str(target.tool.mode): builder = getattr(codegen, "build_{0}".format("sim")) f = builder(fdevice, ["s"], ["wwq", "swsw"]) return f @@ -499,7 +504,7 @@ def build(sch, target = _target.current_target() if target is None else target target = _target.create(target) if target else _target.create("llvm") else: # platform target - assert isinstance(target, env), "unsupported target type" + assert isinstance(target, platform), "unsupported target type" return build_fpga_kernel(sch, args, target, name=name) BuildConfig.current = build_config() diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index c07532efd..17b793189 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -354,7 +354,7 @@ def to(self, tensor, dst, src, """ # create producer and consumer for stream if isinstance(dst, Device): - dst = 1 if 'FPGA' in str(dst) else 0 + dst = 1 if 'fpga' in str(dst) else 0 return _api_internal._ScheduleMove(self, tensor, dst, types, depth, name) else: # connect kernel diff --git a/samples/digitrec/digitrec_vhls.py b/samples/digitrec/digitrec_vhls.py index 8bd144968..d87e5dedd 100644 --- a/samples/digitrec/digitrec_vhls.py +++ b/samples/digitrec/digitrec_vhls.py @@ -11,8 +11,15 @@ dtype_image = hcl.UInt(N) dtype_knnmat = hcl.UInt(max_bit) -def knn(test_image, train_images): +# set up the platform and tool +setting = { + "version" : "2019.1", + "clock" : 10 +} +tool = hcl.tool.vivado("csim", setting) +target = hcl.platform.aws_f1 +def knn(test_image, train_images): # Imperative programming and bit operations (§2) def popcount(num): out = hcl.local(0, "out") @@ -96,7 +103,7 @@ def knn_vote(knn_mat, j): hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") # Sixth step: compute the score baord ranking - knn_new = hcl.compute(knn_mat.shape, lambda x, y: knn_mat[x][y], "new") + knn_new = hcl.compute(knn_mat.shape, lambda x, y: knn_mat[x][y], "copy") knn_pred = hcl.compute((10,), lambda x: knn_vote(knn_mat, x), "vote") # computed data @@ -117,12 +124,12 @@ def knn_vote(knn_mat, j): diff = knn.diff dist = knn.dist -vote = knn.new +vote = knn.copy knn_update = knn.knn_update # s.stream_to(test_image, hcl.FPGA("intel")) -s.to(train_images, hcl.dev.xcel) -s.to(vote, hcl.dev.host) +s.to(train_images, target.xcel) +s.to(vote, target.host) # Merge loop nests s[diff].compute_at(s[dist], dist.axis[1]) @@ -137,12 +144,6 @@ def knn_vote(knn_mat, j): # At the end, we build the whole offloaded function. # print(hcl.lower(s)) -target = hcl.env.aws_f1 -target.tool.mode = "sim" -target.tool.sim["type"] = "cosim" -target.tool.sim["emulator"] = "vivado_hls" -# target.host.lang = "opencl" -# target.xcel.lang = "hlsc" f = hcl.build(s, target) train_images, _, test_images, test_labels = read_digitrec_data() @@ -158,9 +159,8 @@ def knn_vote(knn_mat, j): total_time = total_time + (time.time() - start) knn_mat = hcl_knn_pred.asnumpy() - print(knn_mat) - if knn_mat == test_labels[i]: + if np.argmax(knn_mat) == test_labels[i]: correct += 1 print("Average kernel time (s): {:.2f}".format(total_time/1)) diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 7f78b9c02..62c12602b 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -329,7 +329,7 @@ void PrintCopyBack(TVMArray* arr, void GenKernelCode(std::string test_file) { std::ofstream stream; // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/knn_vhls.cpp"); - stream.open("kernel.cpp"); + stream.open("__tmp__/kernel.cpp"); stream << test_file; stream.close(); } @@ -343,9 +343,10 @@ void GenWrapperCode(TVMArgs& args, std::ofstream stream; // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/digitrec.cpp"); int indent = 0; - stream.open("interface.cpp"); + std::string path(getenv("PWD")); + stream.open("__tmp__/interface.cpp"); stream << "#include \n"; - stream << "#include \"kernel.cpp\"\n"; + stream << "#include \"" + path + "/__tmp__/kernel.cpp\"\n"; stream << "\n\n"; stream << "extern \"C\" \n"; stream << "{\n"; @@ -373,7 +374,7 @@ void GenWrapperCode(TVMArgs& args, // memeory and control pragma for (size_t i = 0; i < arg_stream_types.size(); i++) { std::string interface; - if (std::get<0>(arg_stream_types[i])) interface = " axis "; + if (std::get<0>(arg_stream_types[i])) interface = " m_axi "; else interface = " m_axi "; PrintIndent(stream, indent); stream << "#pragma HLS INTERFACE" + interface + "port="; @@ -382,7 +383,7 @@ void GenWrapperCode(TVMArgs& args, } for (size_t i = 0; i < arg_stream_types.size(); i++) { std::string interface; - if (std::get<0>(arg_stream_types[i])) interface = " axis "; + if (std::get<0>(arg_stream_types[i])) interface = " s_axilite "; else interface = " s_axilite "; PrintIndent(stream, indent); stream << "#pragma HLS INTERFACE" + interface + "port="; @@ -504,7 +505,7 @@ void GenHostCode(TVMArgs& args, std::vector>>& arg_stream_types) { int indent = 0; std::ofstream stream; - stream.open("host.cpp"); + stream.open("__tmp__/host.cpp"); // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/host/digit_recognition.cpp"); stream << "#include \n"; stream << "#include \n"; @@ -526,7 +527,7 @@ void GenHostCode(TVMArgs& args, stream << "\n"; stream << "//other headers\n"; stream << "#include \"utils.h\"\n"; - stream << "#include \"typedefs.h\"\n"; + // stream << "#include \"typedefs.h\"\n"; stream << "int main(int argc, char ** argv) {\n"; indent += 2; @@ -805,21 +806,23 @@ class SimModuleNode final : public ModuleNode { CollectArgInfo(args, func_, arg_sizes, arg_types); GenSharedMem(args, shmids, arg_sizes); + + LOG(CLEAN) << "Generating harness files ..."; + system("rm -rf __tmp__; mkdir __tmp__"); // generate interface wrapper for kernel args GenWrapperCode(args, shmids, arg_types, arg_stream_types_, func_); // host code invoking extern c wrapped hlsc kernel GenHostCode(args, shmids, arg_types, func_, pre_host_, post_host_, arg_stream_types_); GenKernelCode(dev_); + std::string path; + if (const auto* f = Registry::Get("get_util_path")) + path = (*f)("aws_f1").operator std::string(); + system(("cp " + path + "/* __tmp__/").c_str()); - // TODO: find a better way to do the following - LOG(CLEAN) << "Compiling the generated HLS C code ..."; - system("g++ main.cpp -o out"); LOG(CLEAN) << "Running SW simulation ..."; - system("source ./run_sw.sh"); - system("./out"); + system("cd __tmp__; source ./run_sw.sh"); LOG(CLEAN) << "Finished C simulation"; - system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); // extract resource information if (const auto* f = Registry::Get("tvm_callback_syn_postproc")) { @@ -1055,7 +1058,10 @@ class CodeGenXcel : public CodeGenVivadoHLS { // print kernel func signature if (index !=0) arg_stream << ", "; PrintType(std::get<1>(arg_top_vars[v]), arg_stream); - arg_stream << "* " << arg_name; + auto shape = std::get<2>(arg_top_vars[v]); + arg_stream << " " << arg_name; + for (size_t k = 0; k < shape.size(); k++) + arg_stream << "[" << shape[k] << "]"; index++; } stream << ");\n"; @@ -1136,6 +1142,41 @@ class CodeGenXcel : public CodeGenVivadoHLS { // PrintExpr(op->value, stream); // stream << vid << ".write()\n"; }; + + void VisitStmt_(const Allocate* op) { + std::string vid = AllocVarID(op->buffer_var.get()); + CHECK(!is_zero(op->condition)); + int32_t constant_size = op->constant_allocation_size(); + CHECK_GT(constant_size, 0) + << "Can only handle constant size stack allocation for now"; + const Variable* buffer = op->buffer_var.as(); + var_shape_map_[buffer] = op->extents; + std::string scope = alloc_storage_scope_.at(buffer); + PrintStorageScope(scope, stream); + + // initlize hls stream channel + if (arg_top_vars.count(buffer) || + vid.find("stream_") != std::string::npos) { + } else { + this->PrintIndent(); + PrintType(op->type, stream); + stream << ' '<< vid; + if (constant_size > 1) {// Transfer length one array to scalar + for (size_t i = 0; i < op->extents.size(); i++) { + stream << '['; + PrintExpr(op->extents[i], stream); + stream << "]"; + } + } + stream << ";\n"; + } + buf_length_map_[buffer] = constant_size; + RegisterHandleType(op->buffer_var.get(), op->type); + for (size_t i = 0; i < op->attrs.size(); i++) { + this->PrintStmt(op->attrs[i]); + } + this->PrintStmt(op->body); + }; }; // replace host-device interface args with pragma @@ -1149,6 +1190,99 @@ class CodeGenHost : public CodeGenAOCL { std::unordered_map stream_table; var2nameType arg_top_vars; + void PrintType(Type t, std::ostream &os) { + int lanes = t.lanes(); + + if(t.is_handle()) + { + os << "void*";return; + } + if(t==Bool()) + { + os <<"bool"; return; + } + CHECK_EQ(lanes,1) + << "do not yet support vector types"; + + bool fail = false; + if(t.is_float()) + { + switch(t.bits()) + { + case 16: + os<<"half"; + // enable_fp16_ = true; + break; + case 32: + os<<"float"; + break; + case 64: + os<< "double"; + // enable_fp64_ = true; + break; + default: + fail = true; + break; + } + if(!fail && lanes ==1)return; + if(!fail&&(lanes >= 2 && lanes <=16)) + { + os<=2 && lanes <= 16)) + { + os< 64) { + os << "uint" << "64" << "_t"; return; + } else { + std::string str; + if (t.bits() <= 8) str = "8"; + else if (t.bits() <= 16) str = "16"; + else if (t.bits() <= 32) str = "32"; + else str = "64"; + os<< "uint"<< str <<"_t"; return; + } + } + if(t.is_int()) + { + if (t.bits() > 64) { + os << "int" << "64" << "_t"; return; + } else { + std::string str; + if (t.bits() <= 8) str = "8"; + else if (t.bits() <= 16) str = "16"; + else if (t.bits() <= 32) str = "32"; + else str = "64"; + os << "int" << str << "_t"; return; + } + } + } + } + + LOG(FATAL) << "Cannot convert type"<attr_key == ir::attr::device_scope) { // print top( ... in host and enter fpga scope diff --git a/tvm/src/template/design/CLKernel.cpp b/tvm/src/template/design/CLKernel.cpp new file mode 100644 index 000000000..84cf29465 --- /dev/null +++ b/tvm/src/template/design/CLKernel.cpp @@ -0,0 +1,67 @@ +/*===============================================================*/ +/* */ +/* CLKernel.cpp */ +/* */ +/* Defines the object class for an OpenCL kernel */ +/* */ +/*===============================================================*/ + +#include "CLKernel.h" +#include + +namespace rosetta +{ + // initialize the kernel from binary file + CLKernel::CLKernel(cl_context context, cl_program program, std::string kernel_name, cl_device_id device_id) + { + printf("Creating kernel %s ... ", kernel_name.c_str()); + + int err; + + // set the name and device ID + this->device_id = device_id; + this->kernel_name = kernel_name; + + // Create the compute kernel in the program we wish to run + kernel = clCreateKernel(program, kernel_name.c_str(), &err); + if (!kernel || err != CL_SUCCESS) + { + printf("Error: Failed to create compute kernel!\n"); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + + printf("Done!\n"); + } + + void CLKernel::set_global(int global_work_size[3]) + { + printf("Set global work size of kernel %s to [%d, %d, %d]\n", kernel_name.c_str(), + global_work_size[0], global_work_size[1], global_work_size[2]); + + for (int i = 0; i < 3; i ++ ) + this->global_size[i] = global_work_size[i]; + } + + void CLKernel::set_local(int local_work_size[3]) + { + printf("Set local work size of kernel %s to [%d, %d, %d]\n", kernel_name.c_str(), + local_work_size[0], local_work_size[1], local_work_size[2]); + + for (int i = 0; i < 3; i ++ ) + this->local_size[i] = local_work_size[i]; + } + + std::string CLKernel::get_name() + { + return this->kernel_name; + } + + void CLKernel::releaseKernel() + { + printf("Release kernel %s ... ", kernel_name.c_str()); + // release kernel + clReleaseKernel(kernel); + printf("Done!\n"); + } +} diff --git a/tvm/src/template/design/CLKernel.h b/tvm/src/template/design/CLKernel.h new file mode 100644 index 000000000..2933913c8 --- /dev/null +++ b/tvm/src/template/design/CLKernel.h @@ -0,0 +1,96 @@ +/*===============================================================*/ +/* */ +/* CLKernel.h */ +/* */ +/* Defines the object class for an OpenCL kernel */ +/* */ +/*===============================================================*/ + + +#ifndef __CLKernel__Harness__ +#define __CLKernel__Harness__ + +// standard headers +#include +#include +#include +// opencl header +#include +// CLMemObj is a member of this class +#include "CLMemObj.h" + +namespace rosetta +{ + + // wrapper class around an OpenCL kernel + class CLKernel + { + + friend class CLWorld; + + public: + + // constructor + // compiles the kernel + CLKernel(cl_context context, cl_program program, std::string kernel_name, cl_device_id device_id); + + // set global/local work group size + void set_global(int global_work_size[3]); + void set_local(int local_work_size[3]); + + // get kernel name + std::string get_name(); + + protected: + + // set cl_mem argument + int set_mem_arg(int id, cl_mem mem_obj) + { + int err; + err = clSetKernelArg(this->kernel, id, sizeof(mem_obj), &mem_obj); + if (err != CL_SUCCESS) + { + printf("Error: Failed to set kernel argument %d for kernel %s!\n", id, (this->kernel_name).c_str()); + printf("Error Code %d\n", err); + return EXIT_FAILURE; + } + + return err; + } + + // set memory arguments for this kernel + template + int set_const_arg(int id, T& mem_obj) + { + int err; + // printf("%d\n", mem_obj); + err = clSetKernelArg(this->kernel, id, sizeof(mem_obj), &mem_obj); + printf("****************\n"); + printf("%d\n", err); + if (err != CL_SUCCESS) + { + printf("Error: Failed to set kernel argument %d for kernel %s!\n", id, (this->kernel_name).c_str()); + printf("Error Code %d\n", err); + return EXIT_FAILURE; + } + + return err; + } + + void releaseKernel(); + + private: + + // global and local work group size + size_t global_size[3]; + size_t local_size[3]; + + // kernel information and objects + std::string kernel_name; + cl_device_id device_id; // target device id + cl_kernel kernel; // compute kernel + + }; + +} +#endif /* defined(__CLKernel__Harness__) */ diff --git a/tvm/src/template/design/CLMemObj.cpp b/tvm/src/template/design/CLMemObj.cpp new file mode 100644 index 000000000..a6fdecf4a --- /dev/null +++ b/tvm/src/template/design/CLMemObj.cpp @@ -0,0 +1,57 @@ +/*===============================================================*/ +/* */ +/* CLMemObj.cpp */ +/* */ +/* Implements the member functions of CLMemObj class */ +/* */ +/*===============================================================*/ + + +#include "CLMemObj.h" + +namespace rosetta +{ + // default constructor, initializes everything to 0 + CLMemObj::CLMemObj() + { + this->mem_data = nullptr; + this->elt_size = 0; + this->length = 0; + this->flags = 0; + this->bank = nullptr; + } + + // meaningful constructor, initialize data info constants + CLMemObj::CLMemObj(void *mem_data, int elt_size, int length, cl_mem_flags flags, cl_mem_ext_ptr_t* xil_ext ) + { + this->mem_data = mem_data; + this->elt_size = elt_size; + this->length = length; + this->flags = flags; + // can use Xilinx mem extensions to specify DDR bank + if (xil_ext != nullptr) + { + this->bank = new cl_mem_ext_ptr_t; + this->bank->flags = xil_ext->flags; + this->bank->obj = xil_ext->obj; + this->bank->param = 0; + } + else + this->bank = nullptr; + } + + // return the pointer to data + void * CLMemObj::get_data() { return mem_data; } + + // get size of each element + int CLMemObj::get_element_size() { return elt_size; } + + // get the number of elements in the buffer + int CLMemObj::get_length() { return length; } + + // get OpenCL memory flags + cl_mem_flags CLMemObj::get_flags() { return flags; } + + // get xilinx memory extension pointer + cl_mem_ext_ptr_t* CLMemObj::get_xil_ext_ptr() { return bank; } +} diff --git a/tvm/src/template/design/CLMemObj.h b/tvm/src/template/design/CLMemObj.h new file mode 100644 index 000000000..30e564aff --- /dev/null +++ b/tvm/src/template/design/CLMemObj.h @@ -0,0 +1,57 @@ +/*===============================================================*/ +/* */ +/* CLMemObj.h */ +/* */ +/* Defines the object class for an OpenCL memory buffer */ +/* */ +/*===============================================================*/ + + +#ifndef __CLMemObj__Harness__ +#define __CLMemObj__Harness__ + +// standard header for command line output +#include +// opencl header +#include +// xilinx opencl extension header +#include + +namespace rosetta +{ + // wrapper class around cl_mem + class CLMemObj + { + + friend class CLWorld; + + public: + + // default constructor + CLMemObj (); + // a meaningful constructor + CLMemObj (void* mem_data, int elt_size, int length, cl_mem_flags flags, cl_mem_ext_ptr_t* xil_ext = nullptr); + + // get information about the buffer + void* get_data(); + int get_element_size(); + int get_length(); + cl_mem_flags get_flags(); + cl_mem_ext_ptr_t* get_xil_ext_ptr(); + + private: + + // pointer to data + void *mem_data; + // size of each element + int elt_size; + // number of elements + int length; + // OpenCL memory flag + cl_mem_flags flags; + // Xilinx extension describing bank assignment + cl_mem_ext_ptr_t* bank; + }; +} + +#endif /* defined(__CLMemObj__Harness__) */ diff --git a/tvm/src/template/design/CLWorld.cpp b/tvm/src/template/design/CLWorld.cpp new file mode 100644 index 000000000..7be386df2 --- /dev/null +++ b/tvm/src/template/design/CLWorld.cpp @@ -0,0 +1,401 @@ +/*===============================================================*/ +/* */ +/* CLWorld.cpp */ +/* */ +/* Implementation of the CLWorld class */ +/* */ +/*===============================================================*/ + +#include "CLWorld.h" + +namespace rosetta +{ + // default constructor + // make sure it does something meaningful + CLWorld::CLWorld() + { + // default: run on alpha data 7v3 board + this->target_device_name = "xilinx:adm-pcie-7v3:1ddr:3.0"; + this->device_type = CL_DEVICE_TYPE_ACCELERATOR; + + // configure the OpenCL runtime + createWorld(); + } + + // meaningful constructor + // user specifies device + CLWorld::CLWorld(std::string target_device_name, cl_device_type device_type) + { + this->target_device_name = target_device_name; + this->device_type = device_type; + createWorld(); + } + + // get the compute device + cl_device_id CLWorld::getDevice() + { + return this->device_id; + } + + // get context + cl_context CLWorld::getContext() + { + return this->context; + } + + // get compute program + cl_program CLWorld::getProgram() + { + return this->program; + } + + // insert a new memory object + int CLWorld::addMemObj(CLMemObj &new_mem_obj) + { + int err; + + printf("Adding memory object into the world ... "); + + // first push the CLMemObj object into our vector + mem_objs.push_back(new_mem_obj); + + // then create the actual cl_mem buffer, push it into another vector + cl_mem buf; + + buf = clCreateBuffer(context, new_mem_obj.flags, new_mem_obj.elt_size * new_mem_obj.length, new_mem_obj.bank, &err); + if (err != CL_SUCCESS) + { + printf("Error creating buffer for memory object %d!\n", mem_objs.size()-1); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + + cl_mem_buffers.push_back(buf); + + // write the buffer onto the device if needed + if ((new_mem_obj.flags != CL_MEM_WRITE_ONLY) && (new_mem_obj.mem_data != nullptr)) + { + err = clEnqueueWriteBuffer(cmd_queue, buf, true, 0, new_mem_obj.elt_size * new_mem_obj.length, + new_mem_obj.mem_data, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error writing buffer %d onto the device!\n", mem_objs.size()-1); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + } + + printf("Done!\n"); + + return (mem_objs.size() - 1); + } + + int CLWorld::updateMemObj(int mem_idx) + { + printf("Updating mem object %d ... ", mem_idx); + + // write the buffer onto the device if needed + if (mem_objs[mem_idx].flags != CL_MEM_WRITE_ONLY) + { + int err = clEnqueueWriteBuffer(cmd_queue, cl_mem_buffers[mem_idx], true, 0, + mem_objs[mem_idx].elt_size * mem_objs[mem_idx].length, + mem_objs[mem_idx].mem_data, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error writing buffer %d onto the device!\n", mem_idx); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + } + else + printf("Buffer %d is write_only! Not updating it ... \n", mem_idx); + + return EXIT_SUCCESS; + } + + int CLWorld::readMemObj(int mem_idx) + { + printf("Reading mem object %d into host buffers ... ", mem_idx); + + int err = clEnqueueReadBuffer(cmd_queue, cl_mem_buffers[mem_idx], true, 0, + mem_objs[mem_idx].elt_size * mem_objs[mem_idx].length, + mem_objs[mem_idx].mem_data, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error reading kernel buffer %d!\n", mem_idx); + printf("Error code %d\n", err); + exit(EXIT_FAILURE); + } + + printf("Done!\n"); + + return err; + } + + + // create compute program from a file + // return error code + int CLWorld::addProgram(std::string filename) + { + printf("Adding binary program into the world ... "); + + // load the file + size_t code_size = (size_t) load_file_to_memory(filename.c_str()); + + // start to compile + int err; + cl_int create_binary_status; + + // Create the compute program from the source buffer + program = clCreateProgramWithBinary(context, 1, &device_id, (const size_t *) &code_size, + (const unsigned char **) &kernel_code, &create_binary_status, &err); + if (!program) + { + printf("Error: Failed to create compute program!\n"); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + + // Build the program executable + err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + size_t len; + char buffer[2048]; + + printf("Error: Failed to build program executable!\n"); + printf("Error Code %d\n", err); + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); + printf("%s\n", buffer); + exit(EXIT_FAILURE); + } + + printf("Done!\n"); + + return err; + } + + // insert a kernel into the world + // return the position of the kernel in the vector + int CLWorld::addKernel(CLKernel &new_kernel) + { + printf("Adding kernel %s into the world ... ", new_kernel.get_name().c_str()); + + kernels.push_back(new_kernel); + + printf("Done!\n"); + + return (kernels.size() - 1); + } + + // methods to set kernel arguments + // memory argument + int CLWorld::setMemKernelArg(int kernel_id, int pos, int arg_id) + { + printf("Set mem arg %d for kernel %d with mem object %d ... ", pos, kernel_id, arg_id); + + int err = kernels[kernel_id].set_mem_arg(pos, cl_mem_buffers[arg_id]); + if (err != CL_SUCCESS) + { + printf("Error setting kernel argument!\n"); + printf("Error code %d\n", err); + exit(EXIT_FAILURE); + } + + printf("Done!\n"); + + return err; + } + + // run all kernels + // return error code + int CLWorld::runKernels(bool flush) + { + printf("Start kernel execution ... "); + + int err; + + // wait for previous write buffer tasks to finish + printf("Waiting for queue... \n"); + clFinish(cmd_queue); + + // enqueue all the kernels + // temporarily we assume kernels won't have any dependency between them + // or the dependency is handled inside kernels (such as pipes, etc. ) + for (int i = 0; i < kernels.size(); i ++ ) + { + printf("Start kernel %d!\n", i); + err = clEnqueueNDRangeKernel(cmd_queue, kernels[i].kernel, 3, NULL, kernels[i].global_size, kernels[i].local_size, + 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error enqueuing kernel %d!\n", i); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + } + + // wait for them to finish + printf("Waiting for kernels ... \n"); + clFinish(cmd_queue); + + // remove all of them from the vector + // so that this function can be called multiple times + // at a cost that kernels won't be released automatically + if (flush) + { + int total_size = kernels.size(); + for (int i = 0; i < total_size; i ++ ) + kernels.pop_back(); + } + + printf("Done!\n"); + + return err; + } + + // create runtime environment + int CLWorld::createWorld() + { + printf("Initializing OpenCL runtime environment ... "); + + int err; + + // scan the machine for available OpenCL platforms + cl_uint platform_cnt; + cl_platform_id platforms[16]; + err = clGetPlatformIDs(16, platforms, &platform_cnt); + if (err != CL_SUCCESS) + { + printf("Error: Failed to find an OpenCL platform!\n"); + printf("Error Code %d\n", err); + printf("Test failed\n"); + exit(EXIT_FAILURE); + } + printf("INFO: Found %d platforms\n", platform_cnt); + + + // find the target device + char device_name[1024]; + cl_device_id devices[16]; + cl_uint device_cnt; + bool found_device = false; + // scan all platforms + for (int p = 0; (p < platform_cnt) & (!found_device); p ++ ) + { + err = clGetDeviceIDs(platforms[p], this->device_type, 16, devices, &device_cnt); + if (err != CL_SUCCESS) + { + printf("Error: Failed to create a device group for platform %d!\n", p); + printf("Error Code %d\n", err); + printf("Test failed\n"); + exit(EXIT_FAILURE); + } + // iterate through all devices on the platform + for (int d = 0; (d < device_cnt) & (!found_device); d ++ ) + { + err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 1024, device_name, 0); + if (err != CL_SUCCESS) + { + printf("Error: Failed to get device name for device %d on platform %d!\n", d, p); + printf("Error Code %d\n", err); + printf("Test failed\n"); + exit(EXIT_FAILURE); + } + + if (std::string(device_name) == this->target_device_name) + { + this->platform = platforms[p]; + this->device_id = devices[d]; + found_device = true; + printf("Selected device %d on platform %d as target device!\n", d, p); + } + } + } + + if (!found_device) + { + printf("Error: Target device %s is not found!\n", (this->target_device_name).c_str()); + exit(EXIT_FAILURE); + } + + // create context and command queue + this->context = clCreateContext(0, 1, &(this->device_id), 0, 0, &err); + if (!(this->context)) + { + printf("Error: Failed to create a compute context!\n"); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + this->cmd_queue = clCreateCommandQueue(this->context, this->device_id, + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + &err); + if (!(this->cmd_queue)) + { + printf("Error: Failed to create a command queue!\n"); + printf("Error Code %d\n", err); + exit(EXIT_FAILURE); + } + + printf("Done!\n"); + + return err; + } + + // read kernel binary file into memory + int CLWorld::load_file_to_memory(const char *filename) + { + int size = 0; + FILE *f = fopen(filename, "rb"); + if (f == NULL) + { + kernel_code = NULL; + printf("Can not open kernel file!\n"); + exit(-1); + } + fseek(f, 0, SEEK_END); + size = ftell(f); + printf("Size of the file is %ld\n", size); + fseek(f, 0, SEEK_SET); + kernel_code = new char[size+1]; + if ((unsigned int) size != fread(kernel_code, sizeof(char), size, f)) + { + delete []kernel_code; + printf("Reading kernel failed!\n"); + exit(-2); + } + fclose(f); + (kernel_code)[size] = 0; + return size; + } + + + // release all runtime constructs + void CLWorld::releaseWorld() + { + printf("Cleaning up OpenCL opjects ... "); + + // release memory objects + for (int i = 0; i < cl_mem_buffers.size(); i ++ ) + clReleaseMemObject(cl_mem_buffers[i]); + + // release program + delete []kernel_code; + clReleaseProgram(program); + + // release kernels + for (int i = 0; i < kernels.size(); i ++ ) + kernels[i].releaseKernel(); + + // release device and context + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + + printf("Done!\n"); + } + +} + + + + diff --git a/tvm/src/template/design/CLWorld.h b/tvm/src/template/design/CLWorld.h new file mode 100644 index 000000000..9624687aa --- /dev/null +++ b/tvm/src/template/design/CLWorld.h @@ -0,0 +1,129 @@ +/*===============================================================*/ +/* */ +/* CLWorld.h */ +/* */ +/* Defines the object class for OpenCL context */ +/* */ +/*===============================================================*/ + + +#ifndef __CLWorld__Harness__ +#define __CLWorld__Harness__ + +// standard headers +#include +#include +#include +// opencl header +#include +// CLKernel and CLMemObj are members of this class +#include "CLKernel.h" +#include "CLMemObj.h" + +namespace rosetta +{ + + class CLWorld + { + + public: + + // default constructor + CLWorld(); + + // meaningful constructor + CLWorld(std::string target_device_name, cl_device_type device_type); + + // get the compute device associated with this world + cl_device_id getDevice(); + + // get the compute context associated with this world + cl_context getContext(); + + // get the binary program + cl_program getProgram(); + + // insert a compute program + int addProgram(std::string filename); + + // insert a kernel + int addKernel(CLKernel &new_kernel); + + // insert a memory object + int addMemObj(CLMemObj &new_mem_obj); + + // update a memory object (write new value) + int updateMemObj(int mem_id); + + // read a memory object + int readMemObj(int mem_id); + + // set memory kernel argument + int setMemKernelArg(int kernel_id, int pos, int mem_id); + + // set constant kernel argument + template + int setConstKernelArg(int kernel_id, int pos, T& arg) + { + // printf("%lu\n", arg); + printf("Set const arg %d for kernel %d ... ", pos, kernel_id); + + int err = kernels[kernel_id].set_const_arg(pos, arg); + if (err != CL_SUCCESS) + { + printf("Error setting kernel argument!\n"); + printf("Error code %d\n", err); + exit(EXIT_FAILURE); + } + + printf("Done!\n"); + + return err; + } + + // run kernels + int runKernels(bool flush = false); + + // clean up + void releaseWorld(); + + private: + + // OpenCL runtime variables + + // the platform we will use + cl_platform_id platform; + + // the device we will use + std::string target_device_name; // device name + cl_device_type device_type; // device type + cl_device_id device_id; // device id + + // compute context + cl_context context; + + // command queue + cl_command_queue cmd_queue; + + // binary program for the device + char* kernel_code; + cl_program program; + + // kernels + std::vector kernels; + + // memory objects + std::vector mem_objs; + // actual OpenCL memory buffers + std::vector cl_mem_buffers; + + // function to create the OpenCL runtime + int createWorld(); + + // load binary file into memory + int load_file_to_memory(const char *filename); + }; + +} + +#endif diff --git a/tvm/src/template/design/Makefile b/tvm/src/template/design/Makefile new file mode 100644 index 000000000..282f67921 --- /dev/null +++ b/tvm/src/template/design/Makefile @@ -0,0 +1,33 @@ +# Set kernel name +KERNEL_NAME = App + +# Set host source and headers +# HOST_SRC_CPP = ./src/host/digit_recognition.cpp ./src/host/utils.cpp ./src/host/check_result.cpp +HOST_SRC_CPP = host.cpp utils.cpp +# HOST_SRC_H = ./src/host/utils.h ./src/host/check_result.h ./src/host/typedefs.h ./src/host/testing_data.h \ + ./src/host/training_data.h +HOST_SRC_H = utils.h +# DATA = ./data/*.dat + + +# Set host code include paths +HOST_INC = -I/opt/Xilinx/Vivado/2018.2.op2258646/include/ +HOST_LIB = -L/opt/Xilinx/Vivado/2018.2.op2258646/lib/ + +# Set kernel file +OCL_KERNEL_SRC = interface.cpp +# OCL_KERNEL_H = ./src/host/typedefs.h +# SDSOC_KERNEL_SRC = ./src/sdsoc/digitrec.cpp +# SDSOC_KERNEL_H = ./src/host/typedefs.h +# SW_KERNEL_SRC = ./src/sw/digitrec_sw.cpp +# SW_KERNEL_H = ./src/host/typedefs.h ./src/sw/digitrec_sw.h + +# Set opencl kernel arguments +# log: removed --report system +OCL_KERNEL_ARGS = --max_memory_ports all + +#------------------------- +# Leave the rest to harness +#------------------------- +include harness.mk + diff --git a/tvm/src/template/design/harness.mk b/tvm/src/template/design/harness.mk new file mode 100644 index 000000000..23856f9c7 --- /dev/null +++ b/tvm/src/template/design/harness.mk @@ -0,0 +1,196 @@ +# ======================================== Check Xilinx SDX Environment Settings ================================================== # +ifndef XILINX_SDX + $(error Environment variable XILINX_SDX is required and should point to SDx install area) +endif + +# =============================================== Tools Used in Rosetta =========================================================== # + +# sdaccel tools +OCL_CXX = xcpp +XOCC = xocc + +# sdsoc tools +SDSXX = sds++ + +# default sw compiler +SW_CXX = g++ + +# ============================================= SDAccel Platform and Target Settings ============================================== # + +# Set Default OpenCL device and platform +USR_PLATFORM = n +OCL_DEVICE = xilinx:adm-pcie-7v3:1ddr:3.0 +OCL_PLATFORM = one_of_default_platforms + +# Check if the user specified opencl platform +ifneq ($(OCL_PLATFORM), one_of_default_platforms) + USR_PLATFORM=y +endif + +# Check OCL_TARGET value +OCL_TARGET = sw_emu +ifeq ($(OCL_TARGET),sw_emu) +else ifeq ($(OCL_TARGET),hw_emu) +else ifeq ($(OCL_TARGET),hw) +else + $(error "OCL_TARGET does not support the $(OCL_TARGET) value. Supported values are: sw_emu, hw_emu, hw") +endif + +# Check opencl kernel file type +OCL_KERNEL_TYPE = ocl + +ifeq ($(suffix $(OCL_KERNEL_SRC)),.cl) + OCL_KERNEL_TYPE=ocl +else + OCL_KERNEL_TYPE=c +endif + +# OpenCL runtime Libraries +OPENCL_INC = $(XILINX_SDX)/runtime/include/1_2 +OPENCL_LIB = $(XILINX_SDX)/runtime/lib/x86_64 + +# opencl harness files +OCL_HARNESS_DIR = . +OCL_HARNESS_SRC_CPP = $(OCL_HARNESS_DIR)/CLKernel.cpp $(OCL_HARNESS_DIR)/CLMemObj.cpp $(OCL_HARNESS_DIR)/CLWorld.cpp +OCL_HARNESS_SRC_H = $(OCL_HARNESS_DIR)/CLKernel.h $(OCL_HARNESS_DIR)/CLMemObj.h $(OCL_HARNESS_DIR)/CLWorld.h + +# host compilation flags +OCL_HOST_FLAGS = -DOCL -g -lxilinxopencl -I$(OPENCL_INC) $(HOST_INC) -L$(OPENCL_LIB) $(HOST_LIB) -I$(OCL_HARNESS_DIR) -I$(APPLICATION_DIR) + +# xclbin compilation flags +XCLBIN_FLAGS = -s -t $(OCL_TARGET) -g + +# change OCL_HOST_FLAG +ifdef K_CONST + OCL_HOST_FLAGS += -DK_CONST=$(K_CONST) +endif +ifdef NUM_ITER + OCL_HOST_FLAGS += -DNUM_ITER=$(NUM_ITER) +endif +ifdef FIXED_FLAG + OCL_HOST_FLAGS += -DFIXED_TYPE +endif + + +ifneq ($(KERNEL_TYPE),ocl) + XCLBIN_FLAGS += --kernel $(KERNEL_NAME) +endif + +ifeq ($(USR_PLATFORM),n) + XCLBIN_FLAGS += --xdevice $(OCL_DEVICE) +else + XCLBIN_FLAGS += --platform $(OCL_PLATFORM) +endif + + +# change XCLBIN_FLAGS +ifdef K_CONST + XCLBIN_FLAGS += -DK_CONST=$(K_CONST) +endif +ifdef NUM_ITER + XCLBIN_FLAGS += -DNUM_ITER=$(NUM_ITER) +endif +ifdef FIXED_FLAG + XCLBIN_FLAGS += -DFIXED_TYPE +endif + + +XCLBIN_FLAGS += $(OCL_KERNEL_ARGS) + + +# host exe +OCL_HOST_EXE = $(KERNEL_NAME)_host.exe + +# Kernel XCLBIN file +XCLBIN = $(KERNEL_NAME).$(OCL_TARGET).xclbin +XO = $(KERNEL_NAME).$(OCL_TARGET).xo + +# =============================================== SDSoC Platform and Target Settings ============================================== # + +# platform +SDSOC_PLATFORM = zc706 + +# executable +SDSOC_EXE = $(KERNEL_NAME).elf + +# sds++ flags +SDSFLAGS = -sds-pf $(SDSOC_PLATFORM) -sds-hw $(KERNEL_NAME) $(SDSOC_KERNEL_SRC) -sds-end -clkid 3 \ + -poll-mode 1 -verbose +SDSCFLAGS += -DSDSOC -Wall -O3 -c +SDSCFLAGS += -MMD -MP -MF"$(@:%.o=%.d)" +SDSLFLAGS = -O3 + +# objects +ALL_SDSOC_SRC = $(HOST_SRC_CPP) $(SDSOC_KERNEL_SRC) +OBJECTS := $(ALL_SDSOC_SRC:.cpp=.o) +DEPS := $(OBJECTS:.o=.d) + +# =============================================== Pure Software Compilation Settings ============================================== # + +# compiler flags +SW_FLAGS = -DSW -O3 + +# sw executable +SW_EXE = $(KERNEL_NAME)_sw.exe + +# ========================================================= Rules ================================================================= # + +# we will have 4 top-level rules: ocl, sdsoc, sw and clean +# default to sw + +.PHONY: all ocl sdsoc sw clean + +all: sw + +# ocl rules +ocl: $(OCL_HOST_EXE) $(XCLBIN) + +# ocl secondary rule: host executable +$(OCL_HOST_EXE): $(HOST_SRC_CPP) $(HOST_SRC_H) $(OCL_HARNESS_SRC_CPP) $(OCL_HARNESS_SRC_H) $(DATA) + $(OCL_CXX) $(OCL_HOST_FLAGS) -o $@ $(HOST_SRC_CPP) $(OCL_HARNESS_SRC_CPP) + +# ocl secondary rule: xclbin +$(XCLBIN): $(XO) + $(XOCC) -l $(XCLBIN_FLAGS) -o $@ $(XO) + +# ocl secondary rule: xo +$(XO): $(OCL_KERNEL_SRC) $(OCL_KERNEL_H) + $(XOCC) -c $(XCLBIN_FLAGS) -o $@ $(OCL_KERNEL_SRC) + +# sdsoc rules +sdsoc: $(SDSOC_EXE) + +$(SDSOC_EXE): $(OBJECTS) + $(SDSXX) $(SDSFLAGS) $(SDSLFLAGS) ${OBJECTS} -o $@ + +-include $(DEPS) + +%.o: %.cpp + $(SDSXX) $(SDSFLAGS) $(SDSCFLAGS) $< -o $@ + + +# software rules +sw: $(HOST_SRC_CPP) $(HOST_SRC_H) $(SW_KENREL_SRC) $(SW_KERNEL_H) $(DATA) + $(SW_CXX) $(SW_FLAGS) -o $(SW_EXE) $(HOST_SRC_CPP) $(SW_KERNEL_SRC) + +# cleanup +clean: + @echo "Cleaning old files" + rm -rf *.exe + rm -rf *.elf + rm -rf *.xclbin + rm -rf *.bit + rm -rf *.rpt + rm -rf system_estimate.xtxt + rm -rf _xocc* + rm -rf _sds + rm -rf sd_card + rm -rf .Xil + rm -rf ./src/host/*.d + rm -rf ./src/sdsoc/*.o + rm -rf ./src/sdsoc/*.d + rm -rf ./src/host/*.o + rm -rf *.dat + rm -rf *.html + rm -rf *.csv + rm -rf *.json diff --git a/tvm/src/template/design/run.tcl b/tvm/src/template/design/run.tcl new file mode 100644 index 000000000..0d6dca4b5 --- /dev/null +++ b/tvm/src/template/design/run.tcl @@ -0,0 +1,14 @@ +set hls_prj digitrec.prj +open_project ${hls_prj} -reset +set_top default_function +add_files -tb main.cpp +add_files -tb data + +open_solution "solution1" +set_part {xc7z020clg484-1} +create_clock -period 10 + +csim_design -O +csynth_design +#cosim_design +exit diff --git a/tvm/src/template/design/run_hw.sh b/tvm/src/template/design/run_hw.sh new file mode 100755 index 000000000..f65d28e6d --- /dev/null +++ b/tvm/src/template/design/run_hw.sh @@ -0,0 +1,28 @@ +#===============================================================# +# # +# run_hw.sh # +# # +# A bash script to synthesize and generate bitstream # +# # +# # +#===============================================================# + + +#!/bin/bash +make clean + +# the k value of KNN, default is 3 +k_value=3 +# the directory of this lab +app_dir=`pwd` + +### COMPILATION +# create some blank-line space for easy readability +echo ""; echo ""; echo "" ; echo "" +echo "####################################################" +echo " Synthesize and Generate Bitstream with K_CONST=$k_value" +echo "####################################################" +make ocl OCL_TARGET=hw OCL_PLATFORM=$AWS_PLATFORM APPLICATION_DIR=$app_dir K_CONST=$k_value +#export XCL_EMULATION_MODE=hw_emu +#./DigitRec_host.exe -f DigitRec.hw_emu.xclbin + diff --git a/tvm/src/template/design/run_sw.sh b/tvm/src/template/design/run_sw.sh new file mode 100755 index 000000000..80ba00495 --- /dev/null +++ b/tvm/src/template/design/run_sw.sh @@ -0,0 +1,51 @@ +#===============================================================# +# # +# run1.sh # +# # +# A bash script to run the software emulation # +# # +# # +#===============================================================# + + +#!/bin/bash +make clean + +# check env variable setup +if [ -z "$AWS_PLATFORM" ]; then + echo "AWS_PLATFORM not set up; use default" + export AWS_PLATFORM=xilinx:adm-pcie-7v3:1ddr:3.0 +fi + +# set up emulation configuration +echo "#################################################" +echo " Setting emulation configuration..." +echo "#################################################" +export LC_CTYPE=en_US.UTF-8 +export LC_ALL=en_US.UTF-8 +export XCL_EMULATION_MODE=true +emconfigutil --platform=$AWS_PLATFORM + +# the k value of KNN, default is 3 +k_value=3 +# the directory of this lab +app_dir=`pwd` + +### COMPILATION +# create some blank-line space for easy readability +echo ""; echo ""; echo "" ; echo "" +echo "####################################################" +echo " Compiling project with K_CONST=$k_value" +echo "####################################################" +make ocl OCL_TARGET=sw_emu OCL_PLATFORM=$AWS_PLATFORM APPLICATION_DIR=$app_dir K_CONST=$k_value + + +### EXECUTION +echo ""; echo ""; echo "" ; echo "" +echo "####################################################" +echo " Executing DigitRec with K_CONST=$k_value" +echo "####################################################" +export XCL_EMULATION_MODE=sw_emu +#export XCL_EMULATION_MODE=hw_emu +./App_host.exe -f App.sw_emu.xclbin + diff --git a/tvm/src/template/design/utils.cpp b/tvm/src/template/design/utils.cpp new file mode 100644 index 000000000..0e6dd632e --- /dev/null +++ b/tvm/src/template/design/utils.cpp @@ -0,0 +1,46 @@ +/*===============================================================*/ +/* */ +/* utils.cpp */ +/* */ +/* Utility functions */ +/* */ +/*===============================================================*/ + +#include +#include +#include +#include + +#include "utils.h" + +void print_usage(char* filename) +{ + printf("usage: %s \n", filename); + printf(" -f [kernel file]\n"); +} + +void parse_sdaccel_command_line_args( + int argc, + char** argv, + std::string& kernelFile) +{ + + int c = 0; + + while ((c = getopt(argc, argv, "f:")) != -1) + { + switch (c) + { + case 'f': + kernelFile = optarg; + break; + default: + { + print_usage(argv[0]); + exit(-1); + } + } // matching on arguments + } // while args present +} + + diff --git a/tvm/src/template/design/utils.h b/tvm/src/template/design/utils.h new file mode 100644 index 000000000..a3ab77437 --- /dev/null +++ b/tvm/src/template/design/utils.h @@ -0,0 +1,19 @@ +/*===============================================================*/ +/* */ +/* utils.h */ +/* */ +/* Utility functions */ +/* */ +/*===============================================================*/ + +#include +//target device +const std::string TARGET_DEVICE = "xilinx_aws-vu9p-f1-04261818_dynamic_5_0"; + +void print_usage(char* filename); + +void parse_sdaccel_command_line_args( + int argc, + char** argv, + std::string& kernelFile); + From a18f24f09fec4e6e430d01aba877e25e1ee2000d Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 31 Oct 2019 17:18:52 -0400 Subject: [PATCH 087/103] [fix] temp update --- samples/digitrec/__init__.py | 0 .../digitrec/digitrec/data/testing_set.dat | 180 -- .../digitrec/digitrec/data/training_set_0.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_1.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_2.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_3.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_4.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_5.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_6.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_7.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_8.dat | 1800 ----------------- .../digitrec/digitrec/data/training_set_9.dat | 1800 ----------------- samples/digitrec/digitrec/hcl_code_dig.py | 150 -- samples/digitrec/digitrec_main.py | 29 +- samples/digitrec/host.cpp | 152 -- samples/digitrec/interface.cpp | 47 - samples/digitrec/kernel.cpp | 64 - samples/digitrec/knn_aocl.cl | 35 - tvm/src/codegen/build_common.cc | 16 +- 19 files changed, 21 insertions(+), 18652 deletions(-) delete mode 100644 samples/digitrec/__init__.py delete mode 100644 samples/digitrec/digitrec/data/testing_set.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_0.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_1.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_2.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_3.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_4.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_5.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_6.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_7.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_8.dat delete mode 100644 samples/digitrec/digitrec/data/training_set_9.dat delete mode 100644 samples/digitrec/digitrec/hcl_code_dig.py delete mode 100644 samples/digitrec/host.cpp delete mode 100644 samples/digitrec/interface.cpp delete mode 100644 samples/digitrec/kernel.cpp delete mode 100644 samples/digitrec/knn_aocl.cl diff --git a/samples/digitrec/__init__.py b/samples/digitrec/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/samples/digitrec/digitrec/data/testing_set.dat b/samples/digitrec/digitrec/data/testing_set.dat deleted file mode 100644 index df55b0afb..000000000 --- a/samples/digitrec/digitrec/data/testing_set.dat +++ /dev/null @@ -1,180 +0,0 @@ -0x3041060800,1 -0x4081020400,1 -0x4081020400,1 -0x2041020400,1 -0x4081020600,1 -0x6081020c00,1 -0x20c1020800,1 -0x4081020400,1 -0x6081020400,1 -0x2081020400,1 -0xc081830600,1 -0x1061861800,1 -0x20c1060c00,1 -0x4081020400,1 -0x20c1020400,1 -0x20c1020400,1 -0x4081020400,1 -0x3041860800,1 -0x20c1860800,1 -0x40c1020400,1 -0x20c1060800,1 -0x2041020c00,1 -0x2041860c00,1 -0x4183070300,1 -0xe1c1860f00,2 -0xe041060f00,2 -0xc3c1060e00,2 -0x8181e70c00,2 -0x8181078000,2 -0x181078000,2 -0xc041078000,2 -0xe060c7be00,2 -0xe040820c00,2 -0x3c7cb8000,2 -0xe260870f00,2 -0x20e041060f00,2 -0x30e78f0a00,2 -0xc040870c00,2 -0xf1618f9800,2 -0xe040831e00,2 -0x20c1060200,2 -0x6043870a00,2 -0x61e0878c00,2 -0x6141870200,2 -0xc08207c000,2 -0xe041040e00,2 -0xe0c10f9f00,2 -0x70608f1a00,2 -0x20a041079e00,2 -0xe061879e00,2 -0x1e0c3060f80,2 -0xe041020e00,2 -0x10f0208f1800,2 -0xe0c307c000,2 -0x20e041040e00,2 -0xc0c1860300,2 -0x60434b0e00,2 -0x70e18f9c00,2 -0x41c18f8900,2 -0x105126cb1e00,2 -0x2070618e1f00,2 -0xe06381111c,3 -0xe0c1c31c00,3 -0x1c3c1858f00,3 -0xe143c19e00,3 -0xe0c1c19e00,3 -0x40c3810a00,3 -0x60c3030a08,3 -0xe041811e10,3 -0xe0c3010e00,3 -0x6043811c00,3 -0x1c083808e00,3 -0xe081811e00,3 -0xe041811e00,3 -0xc081010400,3 -0xf0c0cf9f00,3 -0xe1c3c19e00,3 -0x6080810c00,3 -0x1f063809e00,3 -0x10142878100,4 -0x91a3c90400,4 -0x11438a0400,4 -0x8147878200,4 -0x21e48f0600,4 -0x9143c70408,4 -0x8363830600,4 -0x11238a0000,4 -0x8163830608,4 -0x3147c70400,4 -0x8142830400,4 -0x2347830304,4 -0x1162870400,4 -0x1326cf0200,4 -0x9648e0800,4 -0x10a38b0400,4 -0x1167860800,4 -0x41c3830408,4 -0x10364cf8300,4 -0x41a7860800,4 -0x1167820c10,4 -0xa1448f8200,4 -0xc2878200,4 -0x1a7c70408,4 -0x51c68f8600,4 -0x11e7cd0200,4 -0x7103810600,5 -0xf303c68700,5 -0xe303c48f00,5 -0x23e6070608,5 -0xf3020800,5 -0xf183899e00,5 -0x1e3020c00,5 -0x31e30b1c00,5 -0x1e30f0c00,5 -0x6183030e00,5 -0x30c1021800,5 -0x6081810c00,5 -0xc2020c00,5 -0x3183811e00,5 -0xc103808e00,5 -0x71e3851e00,5 -0x7181830c00,5 -0x1071038d0e00,5 -0x6083c78c00,6 -0x408103870000,6 -0x106103078700,6 -0x41c3870e00,6 -0xc107cf8e00,6 -0x6083870400,6 -0x2081060400,6 -0x18387c6c700,6 -0x20c107c78400,6 -0x3c0820408,7 -0x41c0820410,7 -0x23e081060c,7 -0x83c0830608,7 -0xe0820c10,7 -0x1c0810200,7 -0xc1c3830408,7 -0x1e6830c10,7 -0x41c0830408,7 -0x3e4c30c10,7 -0x71c1820820,7 -0x3e4c10204,7 -0xe1c1e71810,7 -0x3c1830608,7 -0x3c4810608,7 -0x41e0830c10,7 -0xe160820820,7 -0xe043860408,7 -0x23c0830408,7 -0x21e0820810,7 -0x1c3c0820408,7 -0x3c0810608,7 -0x3e7c10608,7 -0x3160820c10,7 -0x81c1830600,7 -0x43c0830408,7 -0xc3c0830408,7 -0x6123850e00,8 -0x41c1060c00,8 -0x3c7810204,9 -0x41c3810204,9 -0x6166c70600,9 -0xc103810204,9 -0xe3664d8e00,0 -0xe3e64d8e00,0 -0x6142850c00,0 -0x30a2c91c00,0 -0xe3e7488f00,0 -0x61a24c8e00,0 -0x31e64f0c00,0 -0x60c2850e00,0 -0x30f24d1c00,0 -0x61e2c58e00,0 -0xe3e6cf9e00,0 -0x60c2850c00,0 -0x71a2499c00,0 -0x71a24c9a00,0 -0x31e6cf8c00,0 diff --git a/samples/digitrec/digitrec/data/training_set_0.dat b/samples/digitrec/digitrec/data/training_set_0.dat deleted file mode 100644 index b408c8fff..000000000 --- a/samples/digitrec/digitrec/data/training_set_0.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x30e3cd1c00, -0x61e2cd0e00, -0x31a6491c00, -0x6142cc8e00, -0x71a6c91c00, -0xe1264d8e00, -0x61a2450e00, -0x61e76f8f00, -0xe1224c8e00, -0x20e24f9c00, -0xe162c70c00, -0x71e7cd9e00, -0x1e6698800, -0x61e64d9e08, -0xc3e44c8e00, -0x60e2450c00, -0x61e64d8e00, -0x71e64d8e00, -0x7164cf8e00, -0x20e2cf1800, -0x71a64f8e00, -0x21e2891e00, -0x60e2450e00, -0xc162470c00, -0xc3e44c8f00, -0x30e3491c00, -0x61e2c50e00, -0xe142850e00, -0x6142850e00, -0x1e4298c00, -0x4164498400, -0x6122458c00, -0xe1c2448e08, -0x71a64b1c00, -0x6142c50e00, -0x71a6499e00, -0x30e34f9c00, -0xe364489e00, -0xe3644d8e00, -0x61e24d8e00, -0xe1e64d8e00, -0x71f6499e00, -0xe1e2cd8e00, -0xe1e24c8f08, -0x61644d8e00, -0x61c6898e08, -0x71a4491c00, -0x30a2850c00, -0x71a6499c00, -0x61e4488f00, -0x20e2491c00, -0x71e64f9c00, -0x61c2850c00, -0x6164491c00, -0x71e74b9e10, -0x71224d8e00, -0xe3e4488f00, -0x20c2850c00, -0x20e34f0000, -0x30a2451c00, -0x6126488e00, -0x61a6488e00, -0x60e3470c00, -0xe166448f00, -0x20c3850c00, -0xe1e24d8e00, -0x1e6498e00, -0x21a4688e00, -0x20e2cd1c00, -0xf3e4499e08, -0x6142c50e00, -0xe144488e00, -0x60c3850c00, -0xf3e6cf9f00, -0x61c2448a08, -0x71e6489e00, -0x41c2448e00, -0xe3e74f8f00, -0x61e74d8f00, -0xe3264c8f00, -0x31a24c8e00, -0x61e2cd0e00, -0x11e64f8c00, -0x3e7c78600, -0x20e3850c00, -0x71a2491c00, -0x71e3c70c00, -0x6166489e00, -0xc142c58e00, -0x61e6c99e00, -0x61a64d8e00, -0x61a2c91c00, -0x6224488e00, -0x30e34f1c00, -0x71a6491c00, -0x61e64d9e00, -0x60c2850c00, -0xf1664d8e00, -0x1071264f8e00, -0x4144499c00, -0xe146cd8e00, -0x30e2491c00, -0x61c2cd0e00, -0x4142850e00, -0xe166458e00, -0x61e64f8c00, -0x6126448e00, -0x61e7cf9e00, -0x41c2c48600, -0x20a6698c00, -0x61e7cf8e00, -0x71e64f9e00, -0xf324498e00, -0x1071a24f1c00, -0x61c2448e00, -0xe2a4688f00, -0x61e6499e00, -0x4142850e08, -0xe124488f00, -0x30a24f9c00, -0x71e6cd1c10, -0xe1e2cd8e08, -0xc1224d8e00, -0x6124489e00, -0x10a2499c00, -0xa126458e00, -0xf324689e00, -0x71a2491c00, -0x61e2cd0e00, -0x61c2cd0e00, -0x61c2850c00, -0x41e66ccf00, -0x61a2cf0c00, -0x71a24d9e00, -0xe3a4499e00, -0xe64f0000, -0xe142870e00, -0x60c2850c00, -0x61a2450c00, -0x6124689e00, -0x6146890e00, -0x30e2cb1c00, -0x61e2499e10, -0x20e3850c00, -0x61e6499e00, -0x81e4498c00, -0x71a2450c00, -0x61e64d9e00, -0xe1624d8e00, -0x71e64f9c00, -0xc162458e00, -0x61e2c58e00, -0x71e28f1c00, -0x61a2491e00, -0x30a2850c00, -0x51e2489c00, -0x43e66f8e00, -0x71e7cf1c00, -0x60e2850c00, -0x71e4499e00, -0x71e34f9e00, -0xe144488e00, -0xe3e46f8e00, -0x61e6cf1c10, -0x61c2c50e00, -0x70e3cd0c00, -0x61428d0e00, -0xe122c70c00, -0xc1c4c98e00, -0xe124448e00, -0x71e64d9e00, -0x60e34d0c00, -0x61e7c48f00, -0xe164489f00, -0x61e6c99e00, -0x61a6699e00, -0x61e6c91e00, -0x61e24d9e00, -0x61c2448e00, -0x61e2cf8e00, -0x61e2499e00, -0xe1664c8e00, -0x30a2491c00, -0xf2a42c8f08, -0x366cf8000, -0x41e4488e00, -0x61e64b9c00, -0xe1e6cd9e00, -0x6162850c00, -0xe1e6cd8f08, -0x1e46f8e00, -0x61a3468e00, -0x41c2850e00, -0x6324488e00, -0x71e4c99e00, -0x6162448e00, -0x61c3c78e00, -0x61e34d9e00, -0xe1264d9e00, -0x71a2491c00, -0xe3e64c8f00, -0x30e2cf1c00, -0xe162448e00, -0xc142448e00, -0x61e2450e00, -0x61e6cd8e00, -0x61a24d9e00, -0x6142cd0e00, -0x21e64f1800, -0xe364488e00, -0x1e64f8400, -0x1e3e44d8e00, -0x61c2850e00, -0x71e2cf1c00, -0x21e24d9c00, -0x60c2850e00, -0x6142850e00, -0x61a6498e00, -0x61c2448e00, -0x61a4488e00, -0x61e4488f00, -0x71a6499e00, -0x61c2c50c00, -0x71c4c98e00, -0x6124499e00, -0x71e24d1c00, -0x61e64c8f08, -0x41e4488e00, -0xe146448e00, -0x61e2850c00, -0x1e44c8700, -0x41e2440e00, -0xe1a4488e08, -0xc146848f00, -0xe3c6cf8f00, -0x31e6cb1c00, -0xe1c2c58e00, -0x71e74d9e00, -0x61e2488e00, -0x71c28d1e00, -0xf3664d8e00, -0x63e44f9e00, -0x6166499c00, -0x61e64d8e00, -0x61c0cf0e00, -0xc2860000, -0x61e24f0c00, -0x63e2cd8e00, -0x41c3458e00, -0x61e6cf8e00, -0xe2cf0800, -0x30e3cf1c00, -0xe122448e00, -0x6142c50c00, -0x61e3c58e08, -0x10e38f1c00, -0xe162448e00, -0x63e6ef8e00, -0x4142850e00, -0x61c2c50c00, -0x4142850c00, -0x61c2c50e00, -0x71e2cf1c00, -0xe1e64c8f08, -0x61e48c8f00, -0xe162478e00, -0xe1e2c91e00, -0x61e3c50e00, -0x21e64f1800, -0x61a64c8e00, -0x61a2448e00, -0x63e44f8c00, -0xe3644c8f00, -0xe1e2cd8e00, -0x31e2491e00, -0x71a24d1c00, -0x4142858e00, -0xc364488f00, -0x71e64f9c00, -0x61664c8e00, -0x20c28d0c00, -0xc1e6448e00, -0x61e64c8e00, -0x61c2850c00, -0x20e3451c00, -0x30e28f1c00, -0x1e46d8400, -0x71a6499e00, -0x61a24d0c00, -0x61e2c51e10, -0xe1264c8e00, -0x6122499e00, -0x61e2cd8e00, -0x61e2cd9e00, -0xc1444c8f00, -0x63e46d8e00, -0x61a6688f00, -0x61c3cf1e00, -0xf1a64d8e00, -0xc142448f00, -0xe122448f00, -0x60c1870c00, -0x6140c90a00, -0xc1c3850e00, -0x60c2c50e00, -0x71a2c91c00, -0x60c38d0c00, -0x41e64c8e00, -0x61e4489e00, -0x31e64b1800, -0xe1e24d9e00, -0x61e2458e00, -0x61c24c8f00, -0x41c2448e00, -0x6126499c00, -0x20a64f9c00, -0x60e3cd1c00, -0x71e64d9e00, -0xc1c4488e00, -0x1e6cf8e00, -0x71e34d1c00, -0xe324499e00, -0x61a64d8e00, -0xe1664d8e00, -0x4102450c00, -0x83e44c8e00, -0x71a2499400, -0x71a64d9e00, -0x61644c8f00, -0x3e44c8600, -0xc3e4688e00, -0x41c24d8e00, -0x50a24d9e00, -0xe1664c8e00, -0x3126491c00, -0x60e2c70c00, -0x61c2c50e00, -0x71a6491e00, -0x7126490e00, -0x61624c8e08, -0x71264d8e00, -0x71a6499e00, -0x31e6491c00, -0x7166499e00, -0x30a24d9c00, -0x71e6cd0e00, -0x6164488e00, -0xe3e64c9e00, -0x30a2c91c00, -0x71e6499e00, -0x43e46c8e00, -0xe1264d8e00, -0x6142850600, -0x61e2c99e00, -0x61c3c50e00, -0x21e6489e00, -0xe3644c8e00, -0x70a2491c00, -0xe3e64d9e00, -0x60c3850c00, -0x31e64d9e10, -0x61e24d8e00, -0xe146858600, -0x4142458e00, -0xf3244d8e00, -0x20e2850c00, -0xe346448f00, -0x71a6499e00, -0x6122488e00, -0xb334491c00, -0x61e24d9e00, -0xc2850c00, -0x61e34d9e10, -0xa3644f8e00, -0x61e2cd0c00, -0x71b24b1c00, -0x71e64b9e00, -0x61b64f0800, -0x71e6cf9c00, -0x71b4499c00, -0x71a6489e00, -0x61e2cd1c00, -0x61e6cd8e00, -0x20a2cf0c00, -0x6122488e00, -0x61c2c70c00, -0xe1664c8e00, -0xe126489e00, -0x31b64f0800, -0x7026cb1c00, -0xc1c2c58e00, -0xf126688e00, -0x41e44f8c00, -0x4182850e00, -0xe3e64d8f00, -0x71a24d9c10, -0x7126498e00, -0x61e24c9e00, -0xe1e3c50e08, -0x20c2850e00, -0x21e4688e00, -0xe3264c8f00, -0x61666f9e00, -0x60c3850c00, -0x61e7699e00, -0xe264488e00, -0x61e2458e00, -0x30a3c50c00, -0x61e64d9e00, -0xe102448a08, -0x61e76f9e00, -0xe1624c8e00, -0x30d6099e00, -0x61e6cc9f00, -0x6142450e00, -0xc142448e00, -0x71e2499e00, -0x61c2cd0e00, -0x6164489e00, -0x61e64d9e00, -0x71224c8e00, -0x30e3cf0c00, -0x4142850600, -0x73e66f9e00, -0x20c2861800, -0x61e7cf9e00, -0x61c2850c00, -0x61a3468e00, -0x81e3478e00, -0x71e6491e00, -0x61e6499200, -0x69e2cf0c00, -0x3082450c00, -0xe1664d8e00, -0x61e6499c00, -0x61c2850e00, -0x60e3c50c00, -0x61a24d8e00, -0x61a2458e00, -0xe124490c00, -0x41e2478e00, -0x60e2851c00, -0x61e2cd8e00, -0x61e24f8800, -0x60e2491e00, -0x30e3cd1c00, -0x71e76c8f00, -0xe1e4499e00, -0x61a44d9e00, -0x30e3cf1c00, -0x20e2850c00, -0xe2a4488f00, -0x60e2478c00, -0x73e64f8e00, -0x61e2c50e00, -0x61e2c50c00, -0xe3644c8f00, -0x7164489e00, -0x21c28f0c00, -0x30e6cf1800, -0x63e4499e00, -0xe142850c00, -0x30e2491e10, -0x20e34f1c00, -0xc142c58600, -0x20a2450c00, -0x71a4489e00, -0x60e2450c00, -0x61e4c88f08, -0x30e34f1c00, -0x6142850c00, -0xe1264d8e00, -0x61e64c8e00, -0x23e44c8e00, -0xc244488f00, -0xc3e46c8e00, -0xc3c6448700, -0x61e4c88f08, -0x30a64b9c00, -0x20e24d1e00, -0xe166498e00, -0x61e64d8e00, -0x61e2c50e00, -0x71e6cb9c00, -0xe324489e00, -0x60e2c50e00, -0x61e64c8e00, -0x7122450e00, -0x1e7c89e00, -0x71a64b1c00, -0xe3e44c8e08, -0x71e4499e00, -0x20a24f8800, -0x63e4499e00, -0xe3e6cf8e00, -0xe3a4488f00, -0x63e66f8e00, -0xe324688e00, -0x41c2c48e00, -0x61e2491c00, -0xc3e64f8700, -0x61a6498e00, -0xe1e46f9e00, -0x61e64d8e00, -0x6166499e00, -0x30e34d1c00, -0xc1e66c8e00, -0xe324488e08, -0x6122448e00, -0xc1c2448e00, -0x61e6cd8e00, -0x21e74f1800, -0x30e24b1c00, -0x60c2850e00, -0x60c2850c00, -0x61e64f9e00, -0x3e74d9c00, -0x61a2850e00, -0x61e64d9e00, -0x31e64f9800, -0x71a24f1c00, -0x71e6c91c00, -0x61c2440e00, -0x71a6499e00, -0x70a24d1c00, -0x61e24d9e10, -0x6124498e00, -0x61e2458e00, -0x61e4489e00, -0xc1c3870600, -0x61c2850e00, -0xf3a4489e00, -0xe1e6448f00, -0x71a2491c10, -0x61e2450c00, -0x71e24d9e00, -0x6120410410, -0x41c6cd0e00, -0xc3e64f8e00, -0x20c3c50c00, -0x63e4688e00, -0x60e2491c00, -0xe324498e00, -0x71e64c8e00, -0x23e44f8e00, -0x61e3c58e08, -0xe1664c8e00, -0xc3e4488f08, -0x71b64f8e00, -0x31e64b9c00, -0xe1a24c8e08, -0x63644d8e00, -0xe3e64d9e00, -0x30e3491c00, -0x7224499c00, -0x4364478200, -0x61c28f0c00, -0x61a6498e00, -0x61c3c50c00, -0x60c2458e00, -0x61e2478e00, -0x71e7cd1c00, -0x41c2c50e00, -0x60c2850c00, -0x21e4489e00, -0xe1244c8e00, -0xe3e66ccf08, -0x61c38f0c00, -0x60c2c50c00, -0x21c2c50e00, -0x61e66f8400, -0x71e64c9f00, -0x61428f0c00, -0x20c1870c00, -0x61e6488e00, -0xf1e64d8e00, -0xe3a4698e00, -0xe1e24d8e00, -0x61a2cd0e00, -0x6122440e00, -0xf324488e00, -0xe3cf1c00, -0xe166cf8e00, -0x61e24c9e00, -0x61e2850c00, -0x71a74d9e00, -0x60c2850c00, -0xe1e6489f00, -0x71e3cf1c00, -0xf1e64f9c10, -0xe2450c00, -0x71e6cd9e00, -0x61e64f8c00, -0x61e6489e00, -0x6124490000, -0xf3e4499e00, -0x71a24d1c00, -0x384e78000, -0xf1e24d8e00, -0xf124499e00, -0x10e38e1800, -0x71e6cf9c00, -0xc3c6cf8f00, -0x71e64f9e00, -0x61a64c8e00, -0x60c2c50e00, -0x23e4688e00, -0xc3e4488f00, -0x61624c8e00, -0x6164499e00, -0x73e64f8e00, -0x71a64d8e00, -0x70e2870c00, -0x61e6c90e00, -0x71a64d8e00, -0x31e6cb1c00, -0x30c3870c00, -0x21c2cd1c00, -0x1e46f8000, -0x41e64f9e00, -0x21e2490600, -0xb326448f00, -0x6166c50e00, -0x71a24c9e10, -0x31a2cf1c00, -0xe364488e00, -0x7126499c00, -0x61e74b9e00, -0xe3e44c8e00, -0x60e3870c00, -0x61c28d0c00, -0x7324698e00, -0xe142850e00, -0x30e2491c00, -0xe1e64d8e00, -0x6142448e00, -0x71e64d9e00, -0xc142c50e00, -0xe146448f00, -0xc1c2858600, -0x21e4489e00, -0x61224c8e00, -0x71e24d9e00, -0x71e6cb1c00, -0xc3c6c58e00, -0x61e6cd0e00, -0x7126499e00, -0x31e6cb9c00, -0x20e38d1c00, -0xf324699e00, -0x4126488008, -0x61e2cd8e00, -0x61e6688e00, -0x71e6499c00, -0xe124689e00, -0x61224d8e00, -0x61e74f8c00, -0x142c50c00, -0x1e6cf9c00, -0x63e64d8e00, -0x10e34f1c00, -0x61e2448e08, -0x60c2850c00, -0x21e3cf1c00, -0x31e64f1c00, -0x6142c99200, -0x60c6c91c00, -0x43e44f8c00, -0x61a6491c00, -0x41464c8e00, -0x60a2081e00, -0x71a2491e10, -0x31e68b1c00, -0x6162cf1c00, -0xe3e64c8f08, -0x41e64f9c00, -0x61c2c58e00, -0xe142448e00, -0x40c2850e00, -0x61e24d9e00, -0x61e6c90e00, -0x71e6cf9c00, -0x61e24d8e00, -0x71a64c9e00, -0xe2e4498e00, -0xc344488e00, -0x61e3cf9e00, -0x6143850e00, -0xc1e62c8e00, -0x71e2c91c00, -0x6142850c00, -0x71e74d9e10, -0x41c2850e00, -0x61e2cd0c00, -0x71246f8e00, -0x61e24c9e00, -0x71a64b9c00, -0x61e66c9e10, -0x30a2cd0c00, -0xf1e34f9e10, -0x61a6490c00, -0x30c3450c00, -0x71a6cb1c00, -0x61a6488e00, -0x61c24d8e00, -0x61e6cd1e00, -0x41e66d9e00, -0x30e2cb1c00, -0x70e3cd1c00, -0xe144498e00, -0x41e6499e00, -0x61a2c58e00, -0x71e24d9e00, -0x20e24f0800, -0x71224d8e00, -0x6122450e00, -0x21e6499c00, -0x61e64f9e00, -0xe142c50e00, -0x31e6cb1800, -0x61e64c8e00, -0xe1c6cd9e00, -0x31e24d9c00, -0x31e6cb1c00, -0x60c2870c00, -0x71a2450c00, -0xe3e64d9e00, -0x61e2c58e00, -0xe1624d8c00, -0x61c2cd8e08, -0x63e64d8e00, -0xe1c6cd8e00, -0x71e64d9e00, -0x70e2c50c00, -0x30e3cd1c00, -0x61e64d9e00, -0x31e74f9e00, -0x41e64f8c00, -0x21e6489e00, -0xe324488e00, -0xf3e4499e00, -0xe3246c8f00, -0x61e66f8e00, -0x61c2c50e00, -0x61e3478e08, -0xe3e44c8e08, -0x1f3e7cf8f00, -0xe3e4cf8e00, -0x30a2491c00, -0xf126491c00, -0x61e2cf0c00, -0xc3870000, -0x30e3cf1800, -0x71e2cf0e00, -0x61224d8e00, -0x20c28f1c00, -0x1e366468e00, -0x20e24f1c00, -0x61624d0e00, -0x6122448e00, -0xe124488f08, -0x61a6499e00, -0x71a6491c00, -0x61e2c50e00, -0x20a28d1c00, -0xe3a64c890c, -0x31a6699c00, -0xe1e24d8e00, -0x61a64d8e00, -0x61e2c58e00, -0x70a2491c00, -0x20e3cf1c00, -0x41c2cf8e00, -0xe1a4499e00, -0x61c2c50e00, -0xc3e44c8f00, -0x1c366458e00, -0x61428d0c00, -0x30e3cf1c00, -0x6142c50e00, -0x41e2c50e00, -0xc3e4488f00, -0xc142c48e00, -0x6142cc8e00, -0x70a3451e10, -0x71e7cf9c00, -0x31e3491c00, -0x61c2488e00, -0x6364489e00, -0x71a4499e00, -0xe3e4499e08, -0x61e2c58e00, -0x61e2c58e00, -0x60c3850c00, -0xe1a3458e00, -0x71e24d8c00, -0x71a6499e00, -0xe224488e00, -0xe364499e00, -0x61e2450e00, -0x21e4c99e00, -0x61c2cd0e00, -0x63e64c8f00, -0x6124488e00, -0x71e7cf9e00, -0x21e64d8e00, -0x71e6cf8e00, -0xe3664c8e00, -0xe122448e00, -0x61e4488e00, -0x71e7cf9e00, -0x60a2448e00, -0x61e6499e00, -0xe142c58e00, -0x71664c8e00, -0xe142850e00, -0x61e2cd0c00, -0x61e2c50e00, -0x41e66f8c00, -0x6160890c00, -0x70a24d1c00, -0x71a6489e00, -0x61c3c70e00, -0x71e6499e10, -0x61e6cd8e00, -0xd3266c8e00, -0x30a24d1c00, -0x6124488e00, -0x61c3850e08, -0xe142448e00, -0x30e2cf1c00, -0xe1e2cf8e00, -0x7162c91c00, -0x71e6499e00, -0xe24f1c00, -0x61a2699e00, -0x61664d8c00, -0x61e6cd0e00, -0xe2244c8e00, -0x61e2cd0c00, -0xe3644c8e00, -0x71a6499e00, -0x7124491c00, -0x30a2491c00, -0xe364489e00, -0x61e6499e00, -0xc1c3850e08, -0x30e2cb1c00, -0x31a64f1c00, -0x71e2cd0e00, -0x30a2cf1c00, -0xf126699e00, -0x71e34b1c00, -0xc142448600, -0x31a64b9c00, -0x31a64f9c00, -0xc2e4488e00, -0x1a2a5484f00, -0xf366cd8e00, -0x61e44f9c00, -0x1e6c98c00, -0x41e6498e00, -0x71a24d9e00, -0x61c2cd0e00, -0x71a7cb9c00, -0xe3266c8f08, -0x31e24f1c00, -0x61464c8e00, -0x61c2c50e00, -0xe162458e08, -0x61e6489e00, -0x61e64c8e08, -0xc142450e00, -0x61e6cf8c00, -0x61c2850e00, -0x61e2c50c00, -0x63e46f8e00, -0x20c2850c00, -0x61e64c8e00, -0xe1a2cf8e00, -0x71a6489e08, -0x61c2870c00, -0x71a6c98e00, -0x6162458e00, -0x71a6491e00, -0xe342cd8f08, -0x41c2448e08, -0x31e24d9e00, -0x61c6c90e00, -0x7324498c00, -0x7124488e00, -0x60c3850c00, -0x41e4688e00, -0x31e6491c00, -0x61a64c8e00, -0x60c2850e00, -0x21e24d9e00, -0x71a3cf1c00, -0x71e2499e10, -0x71a2458e00, -0x61a2450c00, -0xe3264c8e00, -0xe142448e00, -0x71a2499c00, -0xc144488a00, -0x71a6cf1c00, -0x71e6c91c00, -0x41c2850c00, -0x71e2cf1c00, -0x71664d9e00, -0x1c3c444c700, -0xe1c6c78e00, -0x61c2cc8e00, -0xc3e64f8600, -0x61e6499e00, -0xe3e64c9f08, -0x61e64ccf00, -0x31e6cd1e00, -0x61c2850e00, -0xe162458e00, -0xe3874c8f00, -0x61428d0c00, -0x1e6488e00, -0x61e24d8e00, -0xe3244c8e00, -0x61a24d0c00, -0x61e2cf1c00, -0x1e64f8400, -0x61c2c58e00, -0x21666f8c00, -0x61c2c50e08, -0x71e24d9e00, -0xc1e6498f00, -0x61c2c50e00, -0x61e2c50e00, -0x6102440a00, -0xc146448f00, -0x60c3c50e00, -0xf324489f00, -0x61c28d0e00, -0x41c2850e00, -0x41e2c50e00, -0x63e44f8c00, -0x71e2cb1c00, -0x61e7850e00, -0x61e64b9c00, -0x61a2458e00, -0x1e64d8e00, -0x61a6499e00, -0x20e28d0c00, -0xe162c58e00, -0x81624c8e00, -0x1e54f8c00, -0xe164c99e00, -0x11a24b1c00, -0x20e64f0800, -0x30e3cf1c00, -0x61e2c50e00, -0x6124488e00, -0xc3e4498e08, -0xc1e0408618, -0x6122450e00, -0x1e7488e00, -0x20c3870c00, -0xe162448600, -0x30a2491c00, -0x21e2c50c00, -0x61c2c50e00, -0xe1264c8e00, -0x60c2448e00, -0xe1e64c8f00, -0x20e34f1800, -0xf1e64d9e10, -0x61c3c50c00, -0x71e2cd1e00, -0x1e3e6ef8f00, -0x61e24d9c00, -0x6164488e00, -0x61e4499e00, -0x61e7cd9e00, -0x30a24b1c00, -0x20c28f0c00, -0xe1224d8e00, -0x61264d0e00, -0x61e2cd0e00, -0x71a26d9e00, -0x21a64f9c00, -0x71e7cf1c00, -0xe124698e00, -0x61e4489e00, -0x61c2c78e00, -0x61664d8e00, -0x71a6498e00, -0x61e2448e08, -0xc1c3850c00, -0x60e2450c00, -0xe3264ccf00, -0xe2c4488e00, -0x41e2499c00, -0x60c2850c00, -0x30e3cf1800, -0x71e2cd1c00, -0x61c3870c00, -0x41e6499c00, -0x30e24b1c00, -0x41e44f8400, -0xc1c2cf8e00, -0x61e6490e00, -0x21e7499e18, -0xc166448e00, -0x23e4498e00, -0x31e44f1c00, -0x61e24d8e00, -0x6124488e00, -0xe3e44c8f00, -0x61a2499c00, -0x61e6499e00, -0x6166c90e00, -0xe1e7c58e00, -0x70e24d9e00, -0xe3644c8f00, -0x71a64f9e00, -0xe3644f8e00, -0x3c426c200, -0x7126448e00, -0xc142c58600, -0x6142850c00, -0x61c2cd8e00, -0x30a3499e00, -0x71e2c99c00, -0x71a2450c00, -0x71a2470c00, -0x71a64b1c00, -0x41e46f8600, -0x61e2c50e00, -0xe3e4489e00, -0x70e3c70c00, -0x61624d8e00, -0x71e6cd9e00, -0x61e6cf9e00, -0x6142c50c00, -0xc1c64c8e00, -0xc3e444c700, -0x163e6cd8e08, -0x61e2499e00, -0x20a2450c00, -0xe1e6cd8e00, -0xe1c2c58e00, -0xe364488e00, -0xe1224d0c00, -0x6164488e00, -0xc1c3870600, -0x61e6cd9e00, -0xc3644d8e00, -0x10c2090800, -0x61e6cd8f00, -0x60e3c51c10, -0xe3664c8e00, -0x21e74d9e00, -0x60c2870600, -0x6162448e00, -0xe1a2499e10, -0x61c2850e00, -0xe2244c8e00, -0x30e2cf1800, -0x61c2c50e08, -0xe366448e00, -0x30a24c9e00, -0x30e24d1c00, -0x1122cb1c00, -0x41e6489f00, -0x83c6c48f00, -0x30e64b1c00, -0x61e2450e00, -0x61c24d8e00, -0x1e6499c00, -0x71a6499c00, -0xe162450e00, -0x61e24d9e00, -0x71e4499e00, -0xf324699e00, -0xe164498e00, -0xe142c50e00, -0x30e28e1c00, -0x61624d9e00, -0xe3264c8e08, -0x61e64f8e00, -0x61e28d0c00, -0x6124489e00, -0xc146448e00, -0xe1a2458e00, -0x30e2cf1c00, -0x61c6c98e00, -0xc244489f00, -0x30e3cd9e00, -0x6142448e00, -0x61e6499e00, -0xe1e24d8e00, -0x31b24d9e00, -0xc3646f8e00, -0xe122458e00, -0x71b24d9e00, -0xe124499e00, -0x71e24f8e00, -0x30c28d1c00, -0x61a2499c00, -0x61c2458e00, -0x41c6cd9e00, -0xe1c2c78e00, -0xe324488e00, -0x61e24d8e00, -0x61c2850c00, -0x31a24d9e00, -0x61e4488e08, -0xe122450e00, -0x60c3c50c00, -0x61e64b9c00, -0xe122448f08, -0x6166491e00, -0x60e34f9c00, -0x60c3870c00, -0xe364488f00, -0xe324499e00, -0x61e4488e00, -0x61c2c50e00, -0x61c2cd0e00, -0x71a64d9e00, -0x21e34d9e00, -0x61a0498e00, -0x61e64f8e00, -0x60c3c70c00, -0xf1224d9e00, -0x61c2870c00, -0xe162448e00, -0xe2450800, -0x71a6499e00, -0x71e66d9e10, -0x30e3cf0c00, -0xe166498c00, -0x20e2c91e00, -0x61e6c99c00, -0x70e3cf1c00, -0x1e366478f00, -0xe3e64f8e00, -0x6142850600, -0x61e6491c00, -0x41c2850c00, -0x71a6489e00, -0x61e64f8c00, -0x61e24f0c00, -0xe1264c8e08, -0x71a6499e00, -0xb1a64d8e00, -0x61a24c8e00, -0x21e4481e00, -0x61c2850e08, -0xc142458600, -0x30a3cf1800, -0x43c5488f00, -0x71e6489f00, -0x61224d8e00, -0x71a6499e10, -0x2122450c00, -0x71e6c99e00, -0x30e6491c00, -0x61e4c90c00, -0x71e2cf1c00, -0xe364498e00, -0x6142448c00, -0xe1c2c58e00, -0x6126448e00, -0x30e24b1c00, -0x61e64d9e00, -0x21e6499c00, -0x6122488e08, -0x71a7491c00, -0x61e6499e00, -0x61e2c50e00, -0xf3244c8f08, -0x20e2401e10, -0x61e64d9e00, -0x61c28f0c00, -0x61a2850c00, -0x61e2c50e00, -0x61a24d8e00, -0x6142450c00, -0xe122448e00, -0x1e2e44c8f0c, -0xc0c1850e00, -0x61e24d8e00, -0x31e24b1c00, -0x21e6488e00, -0xe244c88f00, -0x61224d8c00, -0xa224488f00, -0xc364488e00, -0xe1e64c8f00, -0xe1e6cd8e00, -0xc146448f00, -0xc3c44e8600, -0xe1e6cd8e00, -0xe144489e00, -0x20c2870c00, -0x6142cd0c00, -0x61a2c70c00, -0x61e64c8e00, -0x6124409e00, -0x61861800, -0x30e24d9e00, -0x70e2871c00, -0x41e4288e00, -0x60c2850c00, -0xc162458e00, -0xe1e64f8e00, -0x61e64f9e00, -0x61e66d9e00, -0xc3644c8f00, -0x61c2c50e00, -0x61e6489e08, -0x61e6c70e00, -0xe1664d8e00, -0x60c2850e00, -0x60c1830c00, -0x61c3c50e00, -0x61c6cd0e00, -0xe162c50600, -0x61e6499e00, -0x61e74d9e00, -0xe226448e00, -0x6142448e00, -0x61e4cf1c00, -0x7124499e00, -0x7122cd1c00, -0x61e2cf0c00, -0x31e6cb1c00, -0xc142448600, -0xe3e4499e00, -0x107224491c00, -0x7164489e08, -0x10e38f1c00, -0x61e2c50e00, -0xf126689f00, -0xc3644d8e00, -0x61224d0c00, -0x31a64f1c00, -0x61664d8e00, -0x41644c8f00, -0x61c3830400, -0x70a24d1c00, -0x20e34d9c00, -0x20a2c91c00, -0xe1266c8f08, -0x30e3cf1800, -0x61e24d0e00, -0x30a34d8e00, -0xe364488f08, -0x60c2c70c00, -0x61e64c8e00, -0xc1e66c8f00, -0x7126498e00, -0x6364488f00, -0x61c2850e00, -0x30a2c50c00, -0x71e3cf1e00, -0x30a24d0c00, -0x61c2850c00, -0x20e2870c00, -0x71a6499e00, -0x30e3cf1800, -0x71e74f9e00, -0x61c2458e00, -0x61e2cd1e00, -0x71e6cf8e00, -0xe2a448cf00, -0x61e6499e00, -0x61c2c50e00, -0xe3644c8f00, -0x30a24b1c00, -0xe3e6cf8e00, -0x1e364489f00, -0xe1664c8e00, -0xe1264d8e00, -0xe64f0800, -0xe1e64c8e00, -0x1e1e1448e08, -0xe166cd8e00, -0x61c2c50e00, -0x40c2850a00, -0x61e6488f08, -0x7024c91c00, -0xc142c58600, -0x6164488e00, -0xe1e64d9e00, -0x30e2cd1e00, -0x71a4499e00, -0x60e64b9c00, -0xc142850600, -0x31a2cb1c00, -0x60e24d1e10, -0x61e24d8e00, -0x61a24d8e00, -0x6162cd0c00, -0x63e4498e00, -0x70a2499c00, -0xe324688f00, -0x71264d9c00, -0x61a24d8e00, -0xc146448e00, -0x6164c98e00, -0x6126489f00, -0x71a2450c00, -0x30e2870c00, -0x61a24b1c00, -0x71a2699e00, -0xe1a64c8e08, -0xe164488e00, -0x61e74f9e00, -0xe142c50e00, -0x7126488e00, -0x61e3cf9e00, -0x61e4698c00, -0x61e2458e00, -0x61c2850c00, -0x61e6499e00, -0x71e24d9e10, -0x71a6cf9e00, -0x60c2cd0e00, -0x30c2830c00, -0xe1e3c50e08, -0x31e2cd1e00, -0xc3c4489f00, -0x61e2850e00, -0xe364488f00, -0x61e2cd0e00, -0x61e6c90e00, -0xf3e66d9e00, -0x6166499e00, -0xe366cd8e00, -0xe224448e00, -0x41c2858e00, -0xe3e4489e08, -0x61e24d9c00, -0x71e66f8e00, -0x61c2c58e00, -0x61224d8e00, -0xf1e64f9e00, -0x61e2c58e00, -0x71e2cf8e00, -0xe364699e00, -0x61e64f9e00, -0x61e64c9e08, -0xe224499e00, -0x71e2cd1c10, -0xa3e6488f08, -0x30a24d1c00, -0x38e2491c00, -0x61e2458e00, -0xe1e2458e00, -0x60e3cd1c10, -0x61e64d8e00, -0x71e74d9e00, -0xc3c7cf8f00, -0x61e6cf8c00, -0xe1264d8e00, -0x31e24d1c00, -0x61664d8e00, -0x71a4489e10, -0x31e7cf9e00, -0x6162458e00, -0x61e6499e00, -0x61e2850e00, -0x31a2cf1c00, -0xe1644d9e00, -0x61e6cd8e00, -0x6142850e00, -0x71a64c8e00, -0x20e3851c10, -0xe3e64c8f04, -0x61224d8e00, -0x30e3cf1800, -0x61c2c50e00, -0x6162c50e00, -0xe1c7448f00, -0xe3644c4f00, -0x61224d0e00, -0x1e7c70400, -0xf3e66c9f00, -0x61e6489e00, -0x38c28f1c00, -0xe324488e00, -0x71a6491e00, -0x6142850c00, -0x61a2450e00, -0x6122448e00, -0xe1e64c8f00, -0x20e3850c00, -0x71e2c70c00, -0x61e6cf9e00, -0x63e4cf9e00, -0xe1264b8e00, -0x7122891c00, -0x41c2c48e00, -0x70e2cd0c00, -0x71e64d9c00, -0xe326448e00, -0xe144cd8e00, -0x41c2cc8f00, -0x63e644c700, -0x71e64d8e00, -0x61c6cf8e00, -0x1e76f8c00, -0x7122491e00, -0x30c3850c00, -0x61c2870c00, -0x61a2458e00, -0xc3644ccf00, -0x1e54f8c00, -0x60c3870c00, -0x30e3870c00, -0x61e2850c00, -0x21e64f8c00, -0x11e6c91c00, -0x41c2850600, -0x30e2491c00, -0xc1c2cc8e00, -0x61e6499c00, -0x20c2870c00, -0xe1c2c50e00, -0xc164289e00, -0x41c2850600, -0x6142c58e00, -0x63c4cc8f00, -0x30a64b1800, -0xe3664c8e00, -0x83e3458e00, -0x21e64b9e00, -0xc1c4cd0e00, -0x61e3451e10, -0x7104048e00, -0x30a24c9e00, -0x21664f1800, -0xf1264d9e00, -0x41c2850e00, -0xc146c58e00, -0xe1e3850e00, -0xc1464c8f00, -0x6126488e00, -0x61a2448e00, -0x70e64b1c00, -0x71e6488e08, -0x71e6499e00, -0x30a24d1c00, -0x61e2cd0e00, -0x1e64f8000, -0x30e3cf1c00, -0x6124491e00, -0x61e6c99e00, -0x61e6cd9e08, -0x61c2850e00, -0xe3264c8e08, -0x60c2870c00, -0x20e3cd9e00, -0x43c74ccf00, -0x30e3cd1c00, -0x70a24d0c00, -0x21e6498c00, -0x61e24c9e00, -0x73e6499e00, -0xe162488e00, -0x30e3cf1c00, -0x60c2450c00, -0xe144488e00, -0xe122458e00, -0x60c2cd0e00, -0x71a64d9e00, -0xe324488900, -0x30e34d1c00, -0x61e2448e00, -0x61624c8e00, -0x61a4289e00, -0x61e2cd8e00, -0x71e74d9e00, -0x30e2cf1c00, -0x71a24d9e00, -0xc102450c00, -0x71e6cf1c00, -0x71a6cd0e00, -0x71e64f9c00, -0x41e44f8c00, -0x61e64f9e00, -0x70e2cd1e00, -0x6122450e00, -0x6166489e00, -0x71e2cd0c00, -0x21e6499c00, -0x71a24d8e00, -0x61a24d8c00, -0xc1c2850e00, -0xe3e44c8f00, -0x6122448e00, -0x1e44f8000, -0xe1e2499e00, -0xc1c3870e08, -0x61e6cd0c00, -0x61a4489e00, -0x71a6491e00, -0x61c2850c00, -0xe126448e00, -0x61644d8e00, -0xc3e6448f00, -0x71a2450c00, -0x1e74f9e00, -0xe1264c8e00, -0x6166489e00, -0x31a24b1e00, -0x61a4499e00, -0xc3e644cf00, -0x61e2c70e08, -0x61e2448e00, -0x30e6499e00, -0x31a2491c00, -0x7126499e00, -0x71a6499e00, -0x1e3e44c8f00, -0x61e64f9e00, -0xe1e648d900, -0xe346448e00, -0xc3e66d8e00, -0x61f74f9e00, -0x30e2450c00, -0x71a64d9e00, -0x61c2458e00, -0x4164489e00, -0xe1644c8f00, -0x60a2450c00, -0x6120488e00, -0x31e24f9c00, -0xc126458e00, -0xe1664d8e00, -0x60c3870c00, -0x60e2870c00, -0x61c6cf9e00, -0x40c2850e00, -0x61e2c70e00, -0x60c2850c00, -0x6162450e00, -0x40c2850c00, -0x31f7499e00, -0x21c2490e00, -0x1e64f8800, -0x71a2cd1c00, -0x61224c8e00, -0x1e4499e00, -0x20e2850e00, -0xe324498e00, -0x71a6c91c00, -0x60e2cd9e00, -0x60e24f1c00, -0x6142488b08, -0xe3e64d9e00, -0x73e4489f00, -0xc2244c8700, -0x21e7cf9c00, -0xe1664c8e00, -0xe142458e00, -0x71a4489e00, -0x61c24c8e00, -0x61e64d8e00, -0x41e4498c00, -0x10f2489200, -0x7122491600, -0x61e5489e10, -0x63e64f8c00, -0x61e6499e00, -0xf326498e00, -0x30e28f1c00, -0x71e2c91c00, -0xe3e36d9e00, -0x30a3cf0c00, -0xe1a6489e00, -0x30e3870c00, -0x6106c98e00, -0x30a34f1c00, -0xe1c3850e00, -0x6122489e10, -0x21e2c51c10, -0x1e3264c8f00, -0x71e3c70c00, -0x61c2850c00, -0x60c2870c00, -0x61c2850e00, -0x61e6689f00, -0x71e6cf1c10, -0x30a34f1c00, -0x61264c8e00, -0x61c3c70c00, -0x61a2488e00, -0x10e24b0800, -0xc166488e00, -0xe3e6cd8e00, -0x30c3871c00, -0x41424d8e00, -0x61e64f9e00, -0x71a64d9e00, -0x61e24c8f00, -0x61c2c50c00, -0x61c2cf0c00, -0x6164489e00, -0x61c24d8e00, -0xe264489e00, -0xc126448600, -0x6162cd0c00, -0x6124488e00, -0xe1e64d8e08, -0xf1264c8e00, -0xe162cd8e00, -0x6324488e00, -0x71a2cf1c00, -0xe1c2cd8e08, -0xe3664d9e00, -0xe3e4488f00, -0x61c64d8e08, -0x61e2cf1c00, -0x31e64f8e00, -0x6104490e00, -0x31e3cf1c00, -0xc1c3870600, -0x71e6c91c00, -0x6142850e00, -0xe1e6cd8e00, -0x30a24d1c00, -0x60a2450c00, -0x6142499e00, -0x61624d8e00, -0x61e64d9e00, -0xe3e64d8e08, -0xe1c68d0e00, -0x61664d0e00, -0x30e34f9c00, -0x41e64d8e00, -0xe166448f00, -0x10f24b1c00, -0x30828f1800, -0x20a3450c00, -0xe142c58e00, -0xe164489e00, -0xc2c4484f00, -0x71a6491c00, -0x30e2cb1c00, -0x71a6488f00, -0xe3264c8e08, -0x6126498e00, -0x11f64f8800, -0xe1664c8f00, -0x71a6688e00, -0x71a64d9e00, -0x30e64b1e00, -0x30e2cd1c00, -0x41c2858e00, -0x61e2cf0c00, -0x31a6cf1800, -0x60a2440a00, -0x71a6499c00, -0xe1e6688f00, -0x70e2cd9c00, -0x61c2cd0e00, -0x20e2cd9e00, -0x71a66d8e00, -0x61e6cd9e00, -0xc364488f00, -0x1e6cf8e00, -0x61a2458e00, -0x61a64c8e08, -0xe3e64f9e00, -0x61a2459e10, -0x63e4488e00, -0x31a44b0c00, -0x71a2499e10, -0x71f24d9e00, -0x71a24d1c10, -0x6142850c00, -0x83c6478200, -0x71e6cd1c00, -0x61c2c48e08, -0x82c44c8e00, -0xe162458e00, -0x41c2c58600, -0x1c44f8000, -0x61a64d8e00, -0x30a24f9800, -0x20e264cf8e00, -0xf1264d8e00, -0x41c6c89e00, -0x61e2c58e00, -0x6142450e00, -0x61e2c58e00, -0x61624d8e00, -0x70e2c70c00, -0xe162cd8e00, -0x61e2cd0c00, -0x21e2449800, -0x30e3cf1c00, -0x60e2c91e00, -0x61e2850c00, -0x60e38d1c10, -0xe3a4488f00, -0x61e2cf0c00, -0x61e24f8c00, -0xe1664c8f08, -0x1e3e64c8f08, -0x61c28d0c00, -0x1e4499800, -0x61c3c58e08, -0x71a24d0e00, -0x61e6489f08, -0x61e3870c00, -0x70a2450c00, -0x60a2448e00, -0x71e6cd9e00, -0x6324489f00, -0xe162458e00, -0x71e24f1c00, -0x71a64f8e00, -0x61264d8e00, -0x61e2c50e00, -0x20e24b9c00, -0x71e6499208, -0x61a4488e00, -0x61e2c50e00, -0x6182850e00, -0xe142c58e08, -0x61a44f8c00, -0x70a2491c00, -0x41c2cc8e00, -0xe3e647c700, -0xe3e4488e00, -0x30a2871c00, -0x6162458e00, -0x61e4499e00, -0xe1e4488e00, -0xe142450e00, -0x41c2850e00, -0x61c2c70c00, -0x61224c8900, -0xc3644e8700, -0xc1c2c50e00, -0x61a6499e10, -0x30e2499e00, -0x61e6cd0e00, -0x61c2850e00, -0x61a4499e00, -0x71e2491c00, -0x61c2488e00, -0xe1e64c9f00, -0x60c2850c00, -0xc3c4c98e00, -0x71a6698e00, -0x71a6499c00, -0x31e64f9e00, -0x61e7ef8c00, -0x61c64c8f00, -0x6164488e00, -0xe1e2478e00, -0x61c2850e00, -0x61a24d1c00, -0x41c2870600, -0x61a4489e00, -0x41e4698c00, -0x71e6cf1c00, -0xe1e64f8e00, -0x71a2cd1c00, -0x20e24b9c00, -0xe124489f00, -0x61e2470c00, -0x61624d8e00, -0x6162448a00, -0x61e24d9a10, -0x61e2cf0c00, -0x31e24d9c00, -0x61c6c58f00, -0xe1e64f8e00, -0x102e6678400, -0x61a2448e00, -0x6122448e00, -0xc142850e00, -0x30e24f1c00, -0xe122458e00, -0x60e3cf1c00, -0x61e6499c00, -0x71a44b9c00, -0xe3664c8f08, -0x71a64d9e00, -0x21e64d0c00, -0x41c2448e00, -0x20c2850c00, -0x61e24c9e00, -0x30a2491c00, -0x61c2850c00, -0xe3e4488e08, -0x21e7cf9e00, diff --git a/samples/digitrec/digitrec/data/training_set_1.dat b/samples/digitrec/digitrec/data/training_set_1.dat deleted file mode 100644 index e3070666e..000000000 --- a/samples/digitrec/digitrec/data/training_set_1.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x2041020c00, -0x4081020400, -0x4081020400, -0x2041020800, -0x4081020c00, -0xc181830600, -0x60c1020400, -0x2001020400, -0x20c1060400, -0x60c1020c00, -0x1041020800, -0x1e041811f00, -0x1041861800, -0x20c1060c00, -0x1041020800, -0x20c3860800, -0x4081020400, -0x6081020400, -0xc1020800, -0x2041060800, -0x4081020600, -0x3041861800, -0x2041020800, -0x4081020400, -0x2041020c00, -0x60c1830600, -0x4081020c00, -0x3041860800, -0x2041060800, -0x4081020400, -0x20c1060810, -0x20c1820c00, -0x4081020400, -0x4081020400, -0x1041060800, -0x60c1060400, -0x60c1020400, -0x4081020400, -0x4081830600, -0x4081020400, -0x70c3870c00, -0x3041061800, -0x20c1060800, -0x4081020400, -0x4081020600, -0x60c1830400, -0x4081020400, -0x4081020400, -0x2041020800, -0x4081020400, -0x2001020400, -0x4081020400, -0x2041020c00, -0x1041041000, -0x2041060800, -0x4081020400, -0x4081020400, -0x4081020400, -0x3041861800, -0x2041020c00, -0x20c1020800, -0x60c1830400, -0x20c1020800, -0x1041060800, -0x2041020400, -0x6081020c00, -0x4081420c00, -0x4081020400, -0x40c1830400, -0xc1020400, -0x4081020200, -0x4081020400, -0x20c1020c00, -0x20c1020c00, -0x1041060800, -0x6081020600, -0x4081820400, -0x6081020400, -0x60c1020c00, -0x2081020400, -0x60c1060c00, -0x4081020400, -0x4081020c00, -0x20c1020400, -0x20c1060800, -0x4081020400, -0x60c1060c00, -0x2081020800, -0x20c1020400, -0x3041860c00, -0x4081020600, -0x4081020400, -0x1041860800, -0x2041060800, -0x81020400, -0x60c1020400, -0x2041020800, -0x4081020400, -0x4081020400, -0x2041860800, -0x20c1060800, -0x20c1020c00, -0x60c1820400, -0x20c1020400, -0x20c1020400, -0x20c1820810, -0x20c1020800, -0x20c1020c00, -0x60c1020c00, -0x4081020400, -0x2041020800, -0x3041860800, -0x20c1020800, -0x1041860800, -0x40c1020400, -0x4081020400, -0x20c1060800, -0x60c1060c00, -0x4081020400, -0x3041861800, -0x20a1040900, -0x4081020400, -0x2041060800, -0x20c1060800, -0x4081020400, -0x60c1020400, -0x2041820c00, -0x4081020600, -0x4081020200, -0x20c1860800, -0x2041020400, -0x4081020400, -0x20c1860c00, -0x20c1020800, -0xc183830600, -0x204080030c00, -0x2081020400, -0x40c1830e00, -0x20c1020800, -0x2041020c00, -0x2041020400, -0x60c1820400, -0x2041060800, -0x20c1020800, -0x20c1020c00, -0x2041020c00, -0x4081020400, -0x2041060800, -0x2041820c00, -0x30c3821c00, -0x4081020400, -0x6081020400, -0x4081030600, -0x2081020400, -0x20c1020800, -0x1041860800, -0x4081020400, -0x2041020800, -0x20c1020400, -0x40c1020400, -0x20c1020400, -0x20c1020c00, -0x3041060800, -0x8101030200, -0x1041041800, -0x20c1060800, -0x4081020400, -0x60c1870c00, -0x81020400, -0x20c1060800, -0x4181830600, -0x6081020400, -0x6081020604, -0x1041060800, -0x20c1020c00, -0x20c1820c00, -0x2041060800, -0x20c1020c00, -0x3041060800, -0x20c1060800, -0x4081020400, -0x3041060800, -0x60c1060400, -0x4081020400, -0x20c1820400, -0x4081020400, -0x4081020400, -0x4081020400, -0x2081020400, -0x2041020800, -0x2041060800, -0x2041820c00, -0x40c1830400, -0x3041060800, -0x4081020600, -0x20c1020c00, -0x20c1020800, -0x2081020800, -0x4081020600, -0x20c1060800, -0x4083020400, -0x20c1020400, -0x4081020400, -0x60c1060c00, -0x60c1820400, -0xc081020400, -0x2041020000, -0x4081030600, -0x204183870600, -0x60c1020400, -0x1041061800, -0x60c1020c00, -0x60c1020400, -0x4081830600, -0x60c1020408, -0x20c1020800, -0x2041020800, -0x4081020400, -0x4081030600, -0x4081020400, -0x4081830600, -0x40c1020400, -0x2081020400, -0x4081020400, -0x4081020200, -0x30c1060c00, -0x4081020400, -0x4081020c00, -0x20c1020c00, -0xc081030600, -0x2041860800, -0x4081020400, -0x4081020400, -0x6081020400, -0x4081830600, -0x40c1820400, -0x4081020400, -0x4081020400, -0x4081020400, -0x2041020800, -0x20c1060c00, -0x20c1020400, -0x20c1020800, -0x3041060800, -0x40c1820400, -0x4081020400, -0x2041820c00, -0x20c1861800, -0x60c1020400, -0xc1020400, -0x4081020400, -0x2041020c00, -0x4081020400, -0x6081020400, -0x20c1860800, -0x40c1820400, -0x4081020400, -0x4081020400, -0x4081020600, -0x1041060800, -0x60c1060c00, -0x4081020400, -0x4081020400, -0xc1020000, -0x4081020400, -0x4081830200, -0x4081030600, -0x81020400, -0x8181030600, -0x20c1060c00, -0x3041060800, -0x2041060800, -0x81020400, -0x1041060800, -0x4081020400, -0x4081020600, -0x4081020400, -0x20c1860c00, -0x3041861800, -0x60c1020c00, -0x20c1020c00, -0x4081020600, -0x4081020400, -0x4081020400, -0x3041060800, -0x2041020800, -0x40c1020400, -0x30c1060800, -0x4081830600, -0x20c1020c00, -0x2081020c00, -0xc081030600, -0x2041820800, -0x4081020400, -0x4081020400, -0x20c1020c00, -0x2041020c00, -0x20c1060800, -0x4081020600, -0x40c1020400, -0x60c1020c00, -0x3041861800, -0x2041060800, -0x20c1020400, -0x4081020400, -0x60c1860c00, -0x2081020400, -0x4081020400, -0x60c1020400, -0x4081020400, -0x4081020400, -0x20c1020400, -0x2081020400, -0x4081830400, -0x1041860c00, -0x30c1861800, -0x20c1020c00, -0x40c1830600, -0x20c1020400, -0x20c1020c00, -0x20c1820c00, -0x4081020400, -0x20c1020800, -0x1061861800, -0x4081020400, -0x81020400, -0x4081020600, -0x20c1860800, -0x4081020600, -0x2041820800, -0xc081020200, -0x4081020400, -0x20c1060800, -0x20c1060800, -0x40c1820400, -0x20c1020800, -0x4081020400, -0x20c1820c00, -0x1041060800, -0x4081020400, -0x2041020c00, -0x1041060800, -0x2041060800, -0x20c1020400, -0x20c1820c00, -0x2041020800, -0x2041060800, -0x6081060400, -0x81020400, -0x40c1020400, -0x20c1860800, -0x40c1830400, -0x4081020600, -0x40c1830400, -0x40c1020400, -0x6081020c00, -0x20c1060800, -0x4081020400, -0x40c1020400, -0x40c1020400, -0x4081030200, -0x2041860800, -0x2081020c00, -0x40c1820400, -0x60c1020c00, -0x30e38e1800, -0x2081020400, -0x60c1020400, -0x20c1820800, -0x2041020800, -0x60c1860c00, -0xc183870400, -0x60c1820c00, -0x40c1020c00, -0x20c1060800, -0x2041020800, -0x2041020800, -0x40c1820400, -0x60c1020c00, -0x4081020400, -0x4081020400, -0x4081020400, -0x20c1020c00, -0x6081020c00, -0x4081020400, -0x20c1060800, -0x20c1020c00, -0x2081020400, -0x81020400, -0x6081020400, -0x81020c00, -0x20c1060800, -0x20c1860800, -0x4081020400, -0x4081020400, -0x20c1020400, -0x2041020400, -0x20c1020400, -0x60c1860c00, -0x4081030200, -0x4081020400, -0x2041060800, -0x4081020400, -0x2041020400, -0x4081020400, -0x20c1020c00, -0x20c1020800, -0x4081030600, -0x2041060800, -0x4181020600, -0x4081020400, -0x4081020400, -0x4081020400, -0x60c1020400, -0x60c1820c00, -0x2041860800, -0x4081020400, -0x20c1020c00, -0x2041020c00, -0x4081020400, -0x1041041800, -0x2041020800, -0x20c1020c00, -0x4081030600, -0x20c1860c00, -0x2041020400, -0x2081020800, -0x2081020400, -0x20c1060800, -0x60c1020c00, -0x1041860800, -0x81020400, -0x2041060800, -0x4081830600, -0x20c1060800, -0x60c1820400, -0x4081020400, -0x20c1060c00, -0x20c1020400, -0x60c1020e00, -0x20c1820800, -0x3041860800, -0x4081020400, -0x2041020c00, -0x60c1020400, -0x40c1820400, -0x4081020400, -0x4081020400, -0x4081030600, -0x20c1060c00, -0x2041020800, -0x2081020400, -0x4081020600, -0x4081020400, -0x40c1820400, -0x4081020400, -0x4081020400, -0x4081020400, -0x20c1060800, -0x20c1820c00, -0x20c1820c00, -0x4081030600, -0x60c1020e00, -0x60c1820400, -0x20c1020800, -0x4081830200, -0x4083020600, -0x1061861800, -0x4081020400, -0x60c1820c00, -0x6081020c00, -0x40c1830600, -0x4081020400, -0x181020200, -0x6081020600, -0x20c1860c00, -0x4081030600, -0x40c1830400, -0x4081020400, -0x61c4821e00, -0xc1020400, -0x20c1820c00, -0x40c1830c00, -0x4081830600, -0x2041820800, -0x20c1020400, -0x20c1060800, -0x4081020400, -0x30c1860800, -0x4081060c00, -0x2081020400, -0x4081020400, -0x20c1020400, -0x4081020400, -0x4081020600, -0x4081020600, -0x4081020400, -0x1061861800, -0x60c1020c00, -0x1041060800, -0x1041861800, -0x20c1860c00, -0xc081020600, -0x8181830600, -0xe040830e00, -0x60c1020400, -0x20c1060800, -0x20c1060800, -0x2041860800, -0x4081030600, -0x20c1060800, -0x20c1020c00, -0x2041860800, -0x6081020400, -0x20c1060800, -0x2041020400, -0x4081020400, -0x4081020400, -0x60c1860c00, -0x20c1020400, -0x2081020c00, -0x20c1020c00, -0x20c1860c00, -0x4081020600, -0x4081020600, -0x1061861800, -0x20c1020c00, -0x20c1020400, -0x2041060800, -0x20c1020c00, -0x4081020400, -0x60c1020400, -0x2041060800, -0x4081030600, -0x4081020400, -0x30c1060800, -0x2041020c00, -0x4081020400, -0x20c1020800, -0x20c1020400, -0x4081020400, -0x1041861800, -0x40c1020e00, -0x4081020400, -0x1041020800, -0x4081030200, -0x4081020000, -0x20c1020400, -0x2041820c00, -0x4081020600, -0x20c1060c00, -0x40c1020c00, -0x2081020400, -0x3041860800, -0x1061861800, -0x20c1060800, -0x4081020c00, -0x20c1060800, -0x204081031c00, -0x4081020400, -0x2041020800, -0x4081020600, -0x1041040c00, -0x4081020400, -0x20c1860c00, -0x4081030600, -0x20c1020400, -0x20c1060c00, -0x4081020400, -0x20c1020400, -0x20c1020c00, -0x20c1020400, -0x4081020400, -0x4081020600, -0x6081020600, -0x20c1060c00, -0x4081020600, -0x20c1020c00, -0x60c1820400, -0x2041020800, -0x2041060800, -0x2041860800, -0x81020400, -0x4081020400, -0x4081030200, -0x3041860c00, -0x4081020400, -0x4081020400, -0x60c1820c00, -0x4081020600, -0x6081020400, -0x20c1060c00, -0x4081020400, -0x20c3060800, -0x4081030600, -0x4081020400, -0x20c1020400, -0x20c1860c00, -0x20c1020c00, -0x60c1060c00, -0x40c1830400, -0x4081020400, -0x1061861800, -0x6081020400, -0x4081020400, -0x4081020600, -0x4081020600, -0x20c1860c00, -0x4081020400, -0x4081020400, -0x20c1020c00, -0x4081020400, -0x20c1020800, -0x2041820800, -0x20c1020400, -0x4081020400, -0x4081020400, -0x3041060800, -0x60c1820c00, -0x4081020400, -0x4081030600, -0x3061860800, -0x4081020400, -0x40c1020400, -0x60c1820c00, -0x4183030600, -0x20c1060800, -0x60c1860c00, -0x4081020600, -0x60c1020800, -0x4081820600, -0x1061861800, -0x20c1020400, -0x2041060800, -0x20c1020400, -0x3041060800, -0x20c3820408, -0x60c1830600, -0x4081020400, -0x40c1020400, -0x4081020400, -0x60c1020400, -0x60c1020600, -0x2041860800, -0x20c1060800, -0x4081020400, -0x4081030600, -0x4081020400, -0x20c1020400, -0x20c1060800, -0x20c1060800, -0x2081020400, -0x1041060800, -0x20c1060c00, -0x2041860800, -0x40c1020400, -0x4081020400, -0x4081020400, -0x20c1020c00, -0x4081020600, -0x2081020400, -0x4081030600, -0x20c1820c00, -0xe0c1830e00, -0x20c1020c00, -0x2041020c00, -0x20c30c4000, -0x2041860800, -0x2041060800, -0x2041020400, -0x20c1020400, -0x4081020400, -0x20c1020400, -0xc1020400, -0xc081030200, -0x20c1820810, -0x2041060800, -0x4081020400, -0x2041060800, -0x81020400, -0x2041020c00, -0x4081020400, -0x4081020600, -0x4081020600, -0x4081020400, -0xc081020700, -0x20c1020400, -0x3041860800, -0x2041020800, -0x20c1060c00, -0x60c1020400, -0x4081020400, -0x20c1060800, -0x2041060800, -0x20c1860c00, -0x40c1020400, -0x20c1020400, -0x20c1060800, -0x20c1020c00, -0x2081020400, -0x40c1020400, -0x4081020400, -0x20c1060800, -0x2081020400, -0x4081030600, -0x4081020400, -0x20c1020c00, -0x4081020400, -0x20c1020400, -0x2041060800, -0x2081020400, -0x41c3830600, -0x20c1020c00, -0x4081020400, -0x20c1860c00, -0x4081030400, -0x8081020200, -0x60c1820400, -0x4081020400, -0x2041060c00, -0xc181030600, -0x4081020400, -0x40c1060c00, -0x20c1020c00, -0x4081020400, -0x2081020c00, -0x2041820c00, -0x40c1820400, -0x2041060800, -0x2041860c00, -0x4181030200, -0x4081020400, -0x3041861800, -0x20c1060c00, -0x4081020400, -0x4081020400, -0xc181020600, -0x4081020600, -0x20c1820c00, -0x6081020400, -0x40c1830400, -0x4081020400, -0x3041060800, -0x4081020400, -0x20c1060c00, -0x2041860800, -0x4081020400, -0x2041020800, -0x2081020400, -0x41020400, -0x20c1020400, -0x4081020600, -0x4081020400, -0x61c1830e00, -0x20c1020800, -0x20c1020c00, -0x20c1060800, -0x60c1838e00, -0x2041020800, -0x4081020600, -0x20c1860c00, -0x4081020600, -0x20c1060800, -0x60c1020400, -0x2001020800, -0x4081020400, -0x2081020c00, -0x4081020400, -0x6081020400, -0x3041820800, -0x4081020400, -0x4083068800, -0x20c1060c00, -0x40c1830600, -0x2041820800, -0x20c1060800, -0xc1c3870600, -0x1041041000, -0x20c1860c00, -0x4081030400, -0x2041020800, -0xc081030600, -0x4081020400, -0x4081020400, -0x2041860800, -0x60c1820c00, -0x6081020400, -0x60c1020400, -0xc0c1830400, -0x4081020400, -0x20c1060800, -0x4081020c00, -0x2041020800, -0x6081020c00, -0x4081020400, -0x2081020400, -0x20c1860800, -0x4081020400, -0x40c3820400, -0x60c1020400, -0x20c1060800, -0x81020400, -0x2081020400, -0x2041020800, -0x1041860800, -0x6081020400, -0x20c1020400, -0x2041060800, -0x81020400, -0x20c1020c00, -0x20c3060c00, -0x20c1060800, -0x20c1020400, -0x60c1860c00, -0x4081020200, -0x4081020400, -0xc081030200, -0x20c1020800, -0x2041820800, -0x2041060800, -0x60c1060c00, -0x4081020400, -0x2041060800, -0x4081020400, -0x4081020400, -0x81020400, -0x6081020c00, -0x4081020400, -0x2041060800, -0x4081020400, -0x20c1060c00, -0x4081020600, -0x4081020400, -0x4081020400, -0x20c1060800, -0x2041020800, -0x2041020800, -0x20c1020800, -0x2041820800, -0x60c1020400, -0x4081020400, -0x30c1060800, -0x1061860800, -0x4081030600, -0xc081830600, -0x20c1020c00, -0x81020400, -0x8081030200, -0x20c1020800, -0x20c1020800, -0x4081020400, -0x81020400, -0x20c1860c00, -0x4081020200, -0x20c3820400, -0x4081020400, -0x81020400, -0x60c1020c00, -0x4081020400, -0x30c1860800, -0x4081020400, -0x81020800, -0x20c1060800, -0x4081020400, -0x20c1020400, -0x20c1020c00, -0x20c1020c00, -0x3041060c00, -0x2041860c00, -0x1041041000, -0x2081020400, -0x20c1860800, -0x4081020200, -0x3041060800, -0x2081020400, -0x20c1020c00, -0x4081020400, -0x4081020400, -0x20c1860c00, -0x1041061800, -0x20c1020c00, -0x20c1060800, -0x4081020400, -0x2041020800, -0x2041060800, -0x4081020400, -0x4081020400, -0x4081020400, -0x20c1060800, -0x4081020400, -0x4081020400, -0x4081030400, -0x60c1020400, -0x1041861800, -0x2041020c00, -0x6081020c00, -0x20c1060800, -0x3041060800, -0x3041860800, -0x1e1c1870f00, -0x4081020400, -0x30c1860800, -0x20c1020c00, -0x6081020c00, -0x2041020800, -0x4081020400, -0x30c1060800, -0x4081020400, -0x20c1020400, -0x2081020400, -0x4081030600, -0x20c1020400, -0x6081020c00, -0x2041820c00, -0x60c1020400, -0x4081020400, -0x6081020400, -0x4081020400, -0x4081020600, -0x2081020400, -0x2041821800, -0x20c1020400, -0x4081020400, -0x4081020400, -0x4081020400, -0x2081020800, -0x2041020800, -0x4081030600, -0x4081020400, -0x2041020800, -0x1041041000, -0x4081820400, -0x3041060800, -0x4081020400, -0x20c1820400, -0x4081020400, -0x6081020400, -0x2041060800, -0x4081020600, -0x60c1860c00, -0x4081830600, -0x4081020600, -0x20c1020400, -0x1041860800, -0x4081020400, -0x20c1020400, -0x20c1060c00, -0x60c1020c00, -0x4081020400, -0x4081020400, -0x4081020400, -0x4081020600, -0xc1020400, -0x4081030600, -0x2041020c00, -0x1061861800, -0x20c1020c00, -0x20c1020800, -0x4081020400, -0x4183830600, -0x4081020600, -0x1041861800, -0x4081020000, -0x20c1020400, -0x20c1020c00, -0x4081020200, -0x4081020600, -0x20c1020400, -0x4081020400, -0x4081020400, -0x20c1060c00, -0x4081020400, -0x2041860800, -0x2081020400, -0x1041861800, -0x40c1020400, -0x40c1020400, -0x2041020c00, -0x20c1860c00, -0x4081020600, -0xc081020400, -0x4081020400, -0x4081020400, -0x2041020400, -0x2041820c00, -0x20c1060800, -0x2041020400, -0x30c1060c00, -0x60c1020400, -0x20c1060800, -0x20c1860800, -0x2041020800, -0x4081020400, -0x20c1020400, -0x20c1020800, -0x20c1860c00, -0x60c1020400, -0x4081020400, -0x20c1060800, -0x20c1020400, -0x3041061800, -0x3041060800, -0x4081020400, -0x4081020400, -0x2041060800, -0x60c1020c00, -0x4081020400, -0x4081020400, -0x2041860800, -0x3041020800, -0x2041020800, -0x2041061800, -0x2041060800, -0x3041041800, -0x4081020400, -0x2041020800, -0x4081020400, -0x4081020400, -0x6081020400, -0x4081020600, -0x20c1020c00, -0x4081020408, -0x4081020400, -0x4081020c00, -0x4081020400, -0x2081020c00, -0x4081020400, -0x2041020400, -0x20c1020400, -0x4081020200, -0x20c1820c00, -0x6081020800, -0x4081020400, -0x4081020600, -0x3041860800, -0x60c1020400, -0x20c1020c00, -0x20c1060c00, -0x20c1020800, -0x4081020400, -0x4081020400, -0x60c1060c00, -0x3041061800, -0x2041860800, -0x60c1020600, -0x3041860800, -0x20c1860c00, -0x60c1020400, -0x4081020400, -0x4081830408, -0x40c1830600, -0x4081020400, -0x60c1020c00, -0x20c1060800, -0x10c1060800, -0x1041861800, -0x4183020600, -0x3041860800, -0x4081020400, -0x20c1060800, -0x4081030600, -0x60c1830600, -0x4081020400, -0x2041020800, -0x20c1060800, -0x2041820800, -0x2041060800, -0x2041020400, -0x4081020600, -0x20c1020408, -0x4081020c00, -0x40c1020400, -0x1041861800, -0x4081020400, -0x2041020c00, -0x81020400, -0x3041060800, -0x4081020200, -0x4081020400, -0x4081020400, -0x4081030200, -0x3041061800, -0x20c1060c00, -0x4081020400, -0x4081020400, -0x4081020c00, -0x4081020400, -0x4081020600, -0x30c3861c00, -0xc181030600, -0x4081020400, -0x4081020400, -0x4081020400, -0x4081030600, -0x20c1820c00, -0x20c1060800, -0x2041020c00, -0x2041020800, -0x4081020400, -0x81020200, -0x4081020400, -0x4081020400, -0x30c1060800, -0x20c1020800, -0x4081020400, -0x3041860800, -0x4081020400, -0x20c1060800, -0x4081020400, -0x60c1820400, -0x4081020600, -0x4081020200, -0x40c1830400, -0x20c1020c00, -0x4081020400, -0x2081060800, -0x2041020400, -0x4081020400, -0x4081020400, -0x2041860c00, -0x40c1020400, -0x20c1020c00, -0x4081020400, -0x4081020400, -0x1061861800, -0x3061861800, -0x60c1820c00, -0x20e1060c00, -0x20c1060800, -0x81020400, -0x8081020600, -0x4081020400, -0x4081020400, -0x4081020c00, -0x40c1820400, -0x4081030400, -0x20c1020400, -0x4081020400, -0x6083020600, -0x4081030600, -0x60c1020c00, -0x81020400, -0x40c1020400, -0x4081020400, -0x4081020400, -0x81020c00, -0x60c1820400, -0x3041860800, -0x4081020400, -0x20c1020c00, -0x2041820c00, -0x60c1020c00, -0x2041060800, -0x30c1860800, -0x2081020400, -0x20c1060c00, -0x4081020400, -0x60c1020400, -0x3041820800, -0x20c1020400, -0x4081020400, -0x4081020400, -0x4081020400, -0x4081030600, -0x4081020400, -0x20c1020c00, -0x1061041000, -0x2041060800, -0x4081020400, -0x6081020400, -0x4081020400, -0x2041020800, -0x1041860800, -0x4081030600, -0x2041020800, -0x4081020400, -0x60c1020400, -0x20c1060800, -0x20c1060800, -0x81020000, -0x2041020400, -0xc1020400, -0x4081020c00, -0x20c1060800, -0x6081020400, -0x60c1020400, -0x2041820c00, -0x20c1020c00, -0x4081830400, -0x3041860800, -0x4081020600, -0x4081020400, -0x6081020400, -0x4081030600, -0x4081020400, -0x20c1020c00, -0x81020400, -0x4081020400, -0x20c1060800, -0x20c1060800, -0x20c1060c00, -0x4081020600, -0x60c1820c00, -0x4081030600, -0x4081020400, -0x2041020400, -0x20c1020400, -0x20c1020800, -0x4081030600, -0x4081020400, -0x4081020600, -0x2081020400, -0x30c1060800, -0x3041860800, -0x4081020600, -0x1041861000, -0x4081020200, -0x20c1060800, -0x2041020800, -0x20c1060800, -0x4081020600, -0x4081020400, -0x20c1820800, -0x4081020400, -0x4081020400, -0x3041861800, -0x4081830600, -0x4081020400, -0x20c1020800, -0x81020400, -0xc0c1820400, -0x4081020400, -0x4081020400, -0x3041860800, -0x60c1020400, -0x4081020400, -0x60c1820c00, -0x2041060800, -0x4081020400, -0x60c1020c00, -0x4081820400, -0x4081020400, -0x4081020400, -0x2041020800, -0x2041060800, -0x20c1060800, -0x4081020400, -0x4081020400, -0x20c1060800, -0x20c1060c00, -0x20c1020c00, -0x4081020400, -0x4081020400, -0x60c1020400, -0x2041060800, -0x81020400, -0x4081020400, -0x20c1020800, -0x1041061800, -0x20c1060800, -0x4081020400, -0x60c1020400, -0x2041060c00, -0x4081020400, -0x4081030600, -0x4081020400, -0x20c1020400, -0x4081020400, -0x40c1020400, -0x60c1020400, -0x20c1020400, -0x4081020600, -0x20c1020c00, -0x4081020400, -0x204181078c00, -0x60c1820400, -0x2041060c00, -0x20c1060400, -0x4081020400, -0x3041861800, -0x6081020400, -0x20c1060800, -0x4081020400, -0x4081020400, -0x4081020200, -0x81020400, -0x2041020800, -0x4081020400, -0x20c1020c00, -0x4081020400, -0x4081020400, -0x3041861800, -0x4081020400, -0x3041060800, -0x6081020400, -0x30c3860800, -0x20c1020800, -0x60c1020c00, -0x4081020400, -0x2041020c00, -0x4081020400, -0x20c1020c00, -0x4081020400, -0x60c1860400, -0x2081020400, -0x20c1020400, -0x20c1020800, -0x1041060800, -0x20c1020400, -0x20c1060800, -0x4081020400, -0x2081020400, -0x4081830600, -0x4081020400, -0x2041060810, -0xc1020400, -0x41c1830400, -0x2041860800, -0x20c1020c00, -0x20c1820c00, -0x8181030600, -0x2041060800, -0x4081020400, -0x1041860800, -0x4081020400, -0x40c1830600, -0x4181810c00, -0x4081020400, -0x60c1060800, -0x81020400, -0x20c1020400, -0x81020400, -0x4081020400, -0x4081020400, -0x4081020400, -0x20c1820c00, -0x20c1060800, -0x4081020600, -0x2041020c00, -0x4083020600, -0x2041060800, -0x40c1020c00, -0x4081020400, -0xc1020400, -0x2041060800, -0x40c1820400, -0x4081020400, -0x40c1020400, -0x4081020400, -0x6081060b00, -0x4081020400, -0x4081020c00, -0x4081020400, -0x40c3830600, -0x2041860800, -0x2081020800, -0x4081020600, -0x2001020400, -0xc081020600, -0x4081020400, -0x4081030400, -0xc1020400, -0x20c1060c00, -0x4081020400, -0x20c1060c00, -0x40c1020400, -0x20c1020400, -0x60c1820c00, -0x40c1830400, -0x20c1820400, -0x81020400, -0x2041060800, -0x6081020400, -0x2041020800, -0x8081020200, -0xc081020600, -0x2081020400, -0x204181878e00, -0xc081030600, -0x6081020400, -0x4081020400, -0x4081020c00, -0x1061861800, -0x20c1060800, -0x4081020400, -0x4081020600, -0x20c1020c00, -0x20c1020800, -0x4081020400, -0x3041860c00, -0x4081020400, -0x4181020400, -0x20c1820c00, -0x20c1020800, -0x30c1060800, -0x4081030600, -0x3041860800, -0x2081020400, -0x20c1860800, -0x4081020400, -0x2041020400, -0x20c1020400, -0x4081020600, -0x60c1820c00, -0x2081020400, -0x40c1060c00, -0x4081020400, -0x6081020400, -0x20c1060400, -0x40c1830600, -0x60c3830600, -0xc081830e00, -0x2041060800, -0x4081020400, -0x60c1020c00, -0x4081020600, -0x2041060800, -0x3041860800, -0x4081020400, -0x8081030200, -0xc1020800, -0x1061861800, -0x40c1860c00, -0x20c1060800, -0x2081020800, -0x4081020000, -0x30c3860c00, -0x60c3870600, -0x30c3861800, -0x20c1020400, -0x81020400, -0x4081020400, -0x4081020400, -0x40c1020400, -0x2041060800, -0x4081020400, -0x20c1860800, -0x4081020400, -0x2081020400, -0x2041060800, -0x4081030200, -0x4081030600, -0x20c1020400, -0x61c3820e00, -0x4081020400, -0x30c1861800, -0x4081020400, -0x20c1020400, -0x4081020400, -0x20c1020400, -0x4181830600, -0x40c1820600, -0x20c1020c00, -0x4081020400, -0x4081020200, -0x4081020400, -0x4081020400, -0x4081020600, -0x60c3870c00, -0x2041060800, -0x2041020000, -0x2041860800, -0x40c1020400, -0x40c1830600, -0x3041860800, -0x20c1060800, -0x60c1060c00, -0x4081020400, -0x6081020400, -0x4081020600, -0x2041060800, -0x20c1020800, -0x40c1020400, -0x4081030200, -0x20c1860c00, -0x4081020200, -0x4081830600, -0x4081020400, -0x4081020400, -0x20c1020400, -0x20c1060800, -0x3041060800, -0x20c1020c00, -0x40c1020600, -0x2041020c00, -0x2041020400, -0x20c1060c00, -0x20c1860c00, -0x4081020400, -0x4081020600, -0x4081020600, -0x4081020400, -0x4081020200, -0x4081020400, -0x20c1020c00, -0x40c1820400, -0x60c1020400, -0x20c1020400, -0x20c1060400, -0x4081020400, -0x60c1020c00, -0x4081020400, -0x4081020400, -0x2041860800, -0x1041041000, -0x3041860800, -0xc1c1830600, -0x2041060800, -0x60c1830c00, -0x20c1020400, -0x4081820400, -0x6081020e00, -0x4081020400, -0x2041060800, -0x60c1830600, -0x2041020800, -0x20c1020800, -0x4183830300, -0x2041020800, -0x4081020400, -0x2041060c00, -0x4081020600, -0x4081030400, -0x3041061800, -0x4081020400, -0x2041000400, -0xc1020400, -0x4081030600, -0x4081030200, -0x40c1020400, -0x1041061800, -0x40c1020400, -0x4081020400, -0x3041860800, -0x60c1020c00, -0xc181030600, -0x4081020400, -0x2041060800, -0x40c1830600, -0x2041060800, -0x40c1020c00, -0x30c1841800, -0xc081030600, -0x20c1060800, -0x40c1020c00, -0x4081020400, -0x4081020600, -0x60c1830600, -0x2041020800, -0x2041060800, -0x2041020800, -0x60c1020400, -0x2041020800, -0x20c1060c00, -0x20c1060c00, -0x20c1820400, -0x6081020400, -0x4081020200, -0x4081020400, -0x20c1020c00, -0x20c1020c00, -0x4081020400, -0x4081020400, -0x60c1060800, -0xc081830600, -0x4081020200, -0x2081020400, -0x20c1020c00, -0x2041020c00, -0x41c1820400, -0x2041060800, -0x20c1020400, -0x60c1020c00, -0x4081020400, -0x20c1020c00, -0x2041860800, -0x2041060800, -0x2041020c00, -0x20c1020c00, -0x4081020400, -0x2041060800, -0x4081020400, -0x20c1060c00, -0x20c1060800, -0x2081020400, -0x20c1860c00, -0x4081020400, -0x4081020400, -0x204081070c00, -0x20c1860c00, -0x2041860c00, -0x20c1020c00, -0x4081030400, -0x2041060800, -0x20c1060c00, -0x2041860800, -0x60c1020400, -0x20c1060800, -0x4081020400, -0x4081020400, -0x40c1820400, -0x20c1020400, -0x4081020c00, -0x4081020600, -0x2041020c00, -0x20c1020400, -0x6081020c00, -0x40c1830600, -0x20c1060800, -0x1041061800, -0x1041860800, -0x4081020400, -0x20c1020400, -0x4081020400, -0x20c1060800, -0x4081020200, -0x2083020400, -0x20c1020400, -0x4081020400, -0x20c1020400, -0x30c1860c10, -0x4081020400, -0xc081030600, -0x20c1020400, -0x2081020800, -0x2041020800, -0x4081020400, -0x4081020400, -0x60c1060c00, -0x4081020400, -0x4081830400, -0x1041020800, -0x4081020400, -0x81020400, -0x81020400, -0x4081020600, -0x4081020400, -0x20c1020400, -0x20c1020400, -0x2081060c00, -0x4081020400, -0xc081030600, -0x2041860c00, -0x60c1860c00, -0x40c3020400, -0x4081020400, -0x6081020600, -0x4081020400, -0x4081020400, -0x1041861800, -0x60c1820400, -0x8181030600, -0x3041860800, -0x1061861800, -0x2041020c00, -0x20c1020c00, -0x20c1020c00, -0x4081020400, -0x20c1020800, -0x20c1020800, -0x60c1060c00, -0x20c1020c00, -0x20c1060c00, -0x20c1020c00, -0x2041060800, -0x60c1020400, -0x4081020400, -0x20c1020c00, -0x40c1830400, -0x204081070c00, -0x20c1060c00, -0x4081020400, -0x2081020800, -0x4081020600, -0x4081020600, -0x4081020400, -0x40c1020c00, -0x20c1820400, -0x20c1020800, -0x30c1060800, -0x4081020400, -0x20c1060800, -0x4081020600, -0x2041020c00, -0x4081020400, -0x20c1020c00, -0x20c1020400, -0x6081020c00, -0xc081020600, -0x2041860800, -0x4081020400, -0x81020400, -0x4081020600, -0x4081020400, -0x4081830600, -0x2041060800, -0x2041020400, -0x4081020600, -0x20c1020c00, -0x2081020400, -0x20c1020800, -0x20c1060800, -0x2041060800, -0x2041020800, -0x40c1020400, -0x4081020400, -0x20c1060800, -0x4081020400, -0x20c1060800, -0x4081020400, -0x4081020400, -0x60c1020400, -0x81020400, -0x4081030600, -0x2001040800, -0x20c1060800, -0x60c1020c00, -0x40c1020400, -0x2081060c00, -0x2041060800, -0x20c1820c00, -0x4081020400, -0x20c1020800, -0x20c1020400, -0x2041020c00, -0x2081060c00, -0x6041020c00, -0x2041020800, -0x40c1820400, -0x20c1020800, -0xc1020400, -0x60c1020408, -0x1041041800, -0x40c1020400, -0x20c1060c00, -0x20c1020c00, -0x2041060800, -0x4081020400, -0x4081030600, -0x4081830600, -0x40c1870400, -0x20c1020800, -0x40c1830400, -0x60c3060e00, -0x60c1020c00, -0x4081830400, -0x2041020800, -0x4081020400, -0x2081020400, -0x20c1020c00, -0x2041060800, -0x4081020400, -0x2041860800, -0x20c1020800, -0x40c1830600, -0x2041020800, -0x20c1020400, -0x20c1060800, -0x4081020400, -0x4081020600, -0x2041020c00, -0x20c1020c00, -0x60c3860c00, -0x4081830600, -0x60c1060c00, -0x2041060800, -0x1041860800, -0x4081020400, -0x4081020600, -0x3041861800, -0x20c1020c00, -0x4081020400, -0x2081020c00, -0x4081020400, -0x4081020400, -0x20c1820c00, -0x20c1860800, -0x20c1060800, -0x60c1820400, -0x4081020400, -0x40c1830600, -0x4081830600, -0x20c1020c00, -0x4081020400, -0x20c1060800, -0x1041861800, -0x20c1060800, -0xc081030200, -0x20c1860c00, -0x3041060800, -0x60c1820400, -0x4081020400, -0x30c1060800, -0x2041020800, -0x40c1020400, -0x3041060800, -0x20c1020400, -0x20c1020400, -0x20c1020c00, -0x20c1020400, diff --git a/samples/digitrec/digitrec/data/training_set_2.dat b/samples/digitrec/digitrec/data/training_set_2.dat deleted file mode 100644 index 8411863c8..000000000 --- a/samples/digitrec/digitrec/data/training_set_2.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0xc0c1020f00, -0xe1c3870a00, -0x71608e1e00, -0xc3c081070c, -0xe340878d00, -0x6020870e00, -0xe14087ca00, -0x70a0cf1800, -0x6041870800, -0xe0c38f8300, -0xe0618f1b00, -0x70a08f1800, -0xe161cf9c00, -0xe041820f00, -0xf0430e1700, -0x70238f1800, -0xe041078e00, -0xe060871f00, -0x20e0438f8300, -0x21c08f9000, -0xf320860e18, -0xc1868000, -0xe040870e00, -0xf060871b00, -0xc1c187c800, -0x60608f1900, -0xc0c0860700, -0x6040870e00, -0x2020438b0800, -0xe0474b8e00, -0xc081078e00, -0x71c08f9800, -0x43e38f8900, -0x21e0820830, -0xc08107c000, -0xe1c38f8e00, -0x6041821e00, -0xc041830900, -0xe140870c00, -0x6361c59c00, -0x1c043c78700, -0xe060861f00, -0x60c1cf8e00, -0x70a0cf0e00, -0xe041830e00, -0x70c30f9a00, -0x3c4830700, -0x6041c71c00, -0x6041860e00, -0x71e08f1a00, -0x61c304ce00, -0xe140878c00, -0xe0c1060a00, -0x70208f1800, -0x7021871c00, -0xc1c36f8000, -0xc041870c00, -0x2020410f8000, -0x60c10f8000, -0xe24187cc00, -0x40c083078f00, -0xe0428f9800, -0x1c0c18e8f00, -0x41c040878c00, -0xe343870d00, -0xe041868e00, -0x6140860e00, -0xc0c30fc800, -0xe0438b1e00, -0x60e08f9800, -0x60c78f8800, -0x6063cf9900, -0x4023c98c00, -0xe040871100, -0x21a1cf9000, -0x71a0820c10, -0x61c1078c00, -0x61c1820e00, -0x43c5028600, -0xe043870b00, -0xe040870c00, -0xe041840f00, -0x1c78b8000, -0xc0c38fc300, -0x7020cf9800, -0xe060870e00, -0x40c1870a00, -0xe040870d00, -0xe241878f00, -0xf123cf9800, -0xc141078000, -0xe041879e00, -0xf0618f9e00, -0x6081020600, -0xc0c1078800, -0x1060438f1a00, -0xe060831c00, -0xe14182080c, -0xc0c1878e00, -0x70e08f1800, -0x40f0274f9f00, -0xe0418f8880, -0xe0c1040f00, -0x7041048e00, -0xc081020700, -0xe1c0878c00, -0xc041860e00, -0xe141878d80, -0x60c38f9800, -0x6041060e00, -0x61c1cfdc00, -0xf0618c1e00, -0x1e1c187ce00, -0x40c1020c00, -0x61c0870e00, -0x6140cf1800, -0x1c240870f00, -0x41c3cf0800, -0x107041059f00, -0x3063cf1800, -0xe344820f00, -0x1c081020700, -0x4143870d00, -0xe163870a00, -0x206040821e00, -0x20e0c1060f00, -0x61408f1900, -0x41c081878f00, -0x2060438f0c00, -0x21e1c18f9f00, -0x71e08f9800, -0xe040878900, -0xe0438d8d00, -0x1f061869f00, -0x2060c3078f00, -0x1020418f0000, -0xe041060c00, -0x71218f1a00, -0x81e70000, -0xe040871b00, -0x1e1e1879f00, -0xe1c38f8d80, -0x61e1c71c00, -0x6141871c00, -0xc1c0820f00, -0xe141860700, -0xe141820f00, -0x1c040878400, -0x381078600, -0x60418f9400, -0x70e28f1e00, -0x4080830400, -0x1c0c1078400, -0xe0c1860f00, -0x7023ce1800, -0x61438d0e00, -0xe0c10e8f00, -0xe0c3cf8800, -0x20f062cf9d00, -0x1e041020f00, -0x40c041871900, -0x2020c1070c00, -0x60e78f9b00, -0x6041060600, -0xc0c1060700, -0x180c0878e00, -0x60a0cf1a00, -0xe140870b00, -0x6161871c00, -0xe141860f00, -0x60428b1c00, -0x60c38b8d00, -0x20c3ee0800, -0x4040870200, -0x1c3c0cf8c00, -0x1c083078700, -0xe3e0870f00, -0x60478f8100, -0x6060cf1c00, -0x6040860e00, -0x71e38f1b00, -0x181ce0000, -0x61c0871e00, -0xe040861e00, -0xa041870800, -0xe1c3cfdc00, -0xf0238f1000, -0xe143878f80, -0xe0c3060e00, -0x70a3cf1800, -0x6123c99c00, -0x61c18f8000, -0xe041870b00, -0xe0408f0b00, -0xe0c1878f00, -0x40410f8800, -0x71e1041e00, -0x60678f0200, -0xe0c38fc300, -0x71e1cf9c00, -0x2060478b8800, -0x80428f8000, -0x20e0428f8800, -0xe060cf9800, -0x1e34187cf00, -0xe041071000, -0x40c18f9000, -0x61e18f9000, -0x20e041840f00, -0x404043870000, -0x7141061600, -0xc0c0860300, -0x2063cf9c00, -0xc0c0820e00, -0xe3c1878f00, -0xc1438a8800, -0x70228e0e00, -0xe021879900, -0x40c0c0878c00, -0xe1c1069e00, -0xe041040e00, -0xe1c1078f00, -0xe0638f1c00, -0xf0234f9d00, -0xc240870f00, -0x2060438f0a00, -0x20e041070b00, -0xe041038e00, -0x206040870c00, -0xe0e7cf8100, -0x30238f0a00, -0x1070618c1f00, -0x61a04f9c00, -0xe041e78800, -0x43c1878800, -0x70a3cf1900, -0xc1c38fc800, -0xe040879c00, -0xc040870c00, -0x103808000, -0xe0438b0f00, -0x7020cf0a00, -0xe341cf9c00, -0xf022cf0e00, -0x40e0618f9700, -0xc081060780, -0xe143850f00, -0x1020c1078800, -0x61608f9c00, -0xc0c0c78c00, -0x2043860800, -0xc0c1878c00, -0x20e041060f00, -0xe041878c00, -0x183c7c000, -0x206043cfcc00, -0x1c0c1e79c00, -0xe041079f00, -0xf060871b00, -0x60438f9800, -0x6041070a00, -0xe040870e00, -0xe1608f1f00, -0x1c18f8000, -0x183278000, -0x8383c78000, -0x60478f1100, -0xe1c18f1d00, -0x20e34b0800, -0xe0c14f9000, -0x6361870c00, -0x70608f1800, -0x6041079a00, -0x1c0868000, -0x6060870e00, -0x6020860e00, -0xe0438f1800, -0x71e0871900, -0x60609f0000, -0xe0428f0e00, -0x7063cf1e00, -0xc0c1070f00, -0x20e041878800, -0xc0838f8080, -0x41c30f8000, -0x181c187cf00, -0x70618f1000, -0xc3c187c300, -0xe341870b00, -0x20e041040e00, -0xe040870f00, -0x2060438f8c00, -0x70e38f1a00, -0xe3e0cf1f00, -0xe0438f8b00, -0x6060870e00, -0x6140830400, -0xc3c0c78c00, -0x60208f1800, -0x70a3cf9800, -0xe141858c00, -0xc1c3c78c00, -0xf0410c1f00, -0xe0c30f8f00, -0x180c3878000, -0x41c1879900, -0x6040860600, -0x1e0c1020f00, -0x21e0cf9800, -0x302063ce1800, -0xe061cf9c00, -0xe3c18f9d00, -0xe060c70c00, -0x71e0861f00, -0x70a18f1000, -0x182078000, -0x31a04f0c00, -0x307023cf1a00, -0xe141860e08, -0x1e2c1878f00, -0x6041070a00, -0x60438b9800, -0x60430f0300, -0xc1c18f1900, -0x61c18f8000, -0xe040830c00, -0xe1438f9d00, -0xc0418fc800, -0x60438f9800, -0x6060c10f8f00, -0xc081020600, -0xe0c3079980, -0xe3e0c61e0c, -0x6040879800, -0xe043870d00, -0x60e2c70c00, -0x6040860a00, -0xe060cf1f00, -0xf0608f1a00, -0x30278f0000, -0x60c1068e00, -0x61c18f8e00, -0xc041878900, -0xe041479c00, -0xc04087cc00, -0x181670000, -0x1c043c70e00, -0x1c040878800, -0x1c0c3878800, -0x61c1871e00, -0xe0418f1100, -0x1e040820f08, -0xc3c187ce00, -0xf1e14f9c00, -0xe1a24f1e00, -0x1c081038c00, -0x206041860e00, -0xe141878f00, -0x1c38f8000, -0x1e3c0838e00, -0x20a041041e00, -0x70418f8900, -0x3020438f0a00, -0x1c040878e00, -0x40e0c1878f00, -0x21e38f8000, -0x387038200, -0xf1e38f9f00, -0xe041c78e00, -0x70618f0600, -0xc0c3830d00, -0xe141868f00, -0xe1438f9d00, -0x40c1870a00, -0x1e0438f8d00, -0x20f0208f9d00, -0x61c18f9900, -0x63e4c79c00, -0xe140830e00, -0x8381c78000, -0x1c040850c00, -0x60438e8000, -0x61c1870c00, -0xc1c307c700, -0xe0c107cc00, -0xe1c1839f00, -0x1e040871800, -0x61c0870e00, -0xe041871e00, -0xc0408f8000, -0xe0208f9800, -0xe3c3871f00, -0x6161878c00, -0x808107c000, -0x20c08307cf00, -0xe041860e00, -0x21e261cf9d00, -0x70265f9c00, -0x6041871800, -0x1c0c30f8f00, -0xe361cf9c00, -0xf1e38b9d00, -0x181cf0000, -0x60a0870c00, -0x20e0608f0d00, -0xe3e0879c00, -0xc0c1830e00, -0x30228b1600, -0x60c3079400, -0x60c38f9800, -0x71e18f1c00, -0xe062cf8e00, -0xc1c1878c00, -0xc0c1078f00, -0x1c040830c00, -0xc0c1078000, -0x1c0418f8900, -0x71a0c71c00, -0xc081060700, -0x30a08f9800, -0xc041020e00, -0xc041878c00, -0x6041870a00, -0xf1a3cf9c00, -0x20e0c1078c00, -0x206040871c00, -0x41c08f1e00, -0xc0c1870f00, -0x60c10f3200, -0x61c08f0e00, -0xc143020700, -0xc1c1878c00, -0x41e041878e00, -0x70208f1200, -0x6063841e00, -0x1e0e1860f00, -0xe041020e00, -0xe1408f0c00, -0x61e0cf1b00, -0x60c3cf8000, -0x71e1861e00, -0x4183078000, -0xc1834f8000, -0x60638f1a00, -0xf0224f1e00, -0xe1e1c70e00, -0xe140831f00, -0x40c0c1878c00, -0x41c041879b00, -0x6140879880, -0x6040870c00, -0xe1c0879800, -0xe041860e00, -0x20e0438b0900, -0x80830f8000, -0xc360c39c00, -0xe243cd8e00, -0xe160878c00, -0x1c040870e00, -0xc081048e00, -0x1e0204f1700, -0x41c3820600, -0xe1e38f9f00, -0xe341068e00, -0x20e040821e00, -0x61c0870e00, -0xc040870e00, -0x60c08f0a00, -0xe0618f9d00, -0x40c040870e00, -0xe041061b00, -0x6060871e00, -0x1070618f9800, -0xe041870a00, -0xe163cf9c00, -0x70208f1a00, -0x404081078000, -0xe0c1861e00, -0xe043870f00, -0xe04f1000, -0x1c0c0838c00, -0x60418f1800, -0xe1418f8e00, -0xc041820f00, -0xe341048e00, -0x7020cf9800, -0xe041878800, -0x20e0c1870e00, -0xf022cf9800, -0xc3c1870f00, -0xc040c70600, -0x61c1279800, -0xc1c1c70c10, -0x60e067cf8c00, -0x1e020cf1c00, -0x1e161870c00, -0xe1238b9800, -0xc08f8000, -0x40e0c1079c00, -0x1c08f8000, -0x1071e18f1a00, -0xe1e1cf9900, -0x20e040870e00, -0xe140878e00, -0xe0638f9900, -0x1c040870300, -0xc0c0870a00, -0xc0c107c800, -0xe041870c00, -0xe1408f1e00, -0xe0274f8000, -0x41410fc000, -0x20e041061f00, -0x83c1878100, -0xe1c12f9a00, -0x60418f8100, -0x60c18f9c00, -0xe0408f9b00, -0xc28107cc00, -0xc08107cc00, -0x41c6830f00, -0x1e041860f00, -0x2041870800, -0xf061878e00, -0x60678f8000, -0xe0c1068f00, -0xe1408f8800, -0x61c0878c00, -0x1c0c1870b00, -0xe040870e00, -0x6041841f00, -0x20e043878c00, -0x20c18f1800, -0x40e0418f8300, -0x3060418f1800, -0x20e040871b80, -0x1c081038e00, -0xe041870f00, -0xe161861e00, -0xc0c1020600, -0x63e0cf9800, -0x6023cb9c00, -0x70a0cf1e00, -0xc1c0830e00, -0xc141830404, -0x7020871800, -0xe0414f9600, -0xc0404f9800, -0x206040870200, -0x1c3c38f8d80, -0xe1c1070a00, -0xf3a1cf9c00, -0xe121cf9c00, -0x6040860400, -0xe041078e00, -0x20c38f9000, -0x40c1870800, -0xc081078000, -0x61c0830c00, -0xe0438f1900, -0x70e18e1600, -0xe04087ce00, -0x6140820e00, -0x3c0cf0800, -0x6043cf1c00, -0xe042870c00, -0x1070238f9c00, -0xe041870e00, -0xe0428f9e00, -0x1e0c1060f00, -0xc081478000, -0xc3c0838c00, -0x6040871600, -0xc1c1838e00, -0x61c18f0a00, -0xe041040f00, -0x61c1861e00, -0x1f061868f00, -0x61a04f1e10, -0x60e78f8800, -0xe043cd8e00, -0x30e08f1800, -0x41e063478f00, -0x71e78f0100, -0x4140870c00, -0x1e041878f00, -0xe1418e9e00, -0x51418f1a00, -0xe041060b00, -0x7041071300, -0xe1618f9800, -0x60a38f9100, -0xe1c0870f00, -0x21c38f8000, -0x61c4878c00, -0x20e041070e00, -0xc040870e00, -0xe061cf9800, -0xc040870c00, -0x3027931800, -0xe040870e00, -0x7061871a00, -0x20e063cf9c00, -0xf1e0c71e00, -0xc043ef1800, -0x41c040878f00, -0x20e0438f8c00, -0x60e0c10f8f00, -0x70238f1a00, -0x418307c100, -0xe0438f8d00, -0xc3878000, -0x20e063c91e00, -0xe143049f00, -0xe0408f9800, -0xc08107c000, -0x60238f0c00, -0x3022cf9800, -0xc040870e00, -0x20e0c10e8700, -0x2060418f9800, -0x2040c1870400, -0xe141879e00, -0xc140870f00, -0x60438f9000, -0x60c0871e00, -0x202041870e00, -0x60638b1a00, -0x21e0430f8f00, -0x71a0cf1f00, -0x1e141870f00, -0x60c1020600, -0xe3e4cf8f00, -0x1e060c79c00, -0xe063c78c00, -0xe041878e00, -0xe1c0830f00, -0x1c0424f8e00, -0x60638f8100, -0xe160cf0d00, -0xe160831d00, -0xe0c0870e00, -0x1c0c1060700, -0x6040860e00, -0x60438d8c00, -0x70638f1b00, -0x63c0878900, -0x70678d0600, -0xe260830e00, -0x20e0c3878f00, -0xc0c30f0b00, -0x41c1cf0800, -0x70a1cf1a00, -0x41c143c78f00, -0xe041060e00, -0xe220478c00, -0xc041048e00, -0x20c081078f00, -0x41c0c1878e00, -0xe241868f00, -0xe24106df00, -0xe041079c00, -0xc0418f8000, -0xc240820f00, -0x30e28f1e00, -0xe041060f00, -0x18180830400, -0xc080870600, -0x1e041858c00, -0x60408f1e00, -0x60e1cf1800, -0x1c0c1870f00, -0x60638f0300, -0x20f020cf1f00, -0x2063ce0800, -0x6142870e00, -0x70e38f1e00, -0xc041870300, -0x302026cf0900, -0x1e0c1060f00, -0x61e1870d00, -0x7027cb1e00, -0x41e18f8000, -0x4043870c00, -0x20e041868e00, -0x70238f1800, -0x1e0438b9900, -0xe1c1879f00, -0x1e041058e00, -0x30e3cf1800, -0x70e08e1e00, -0x7020cf1800, -0xc1834f8000, -0x1e020c79e00, -0x4041870c00, -0xe3e08f9b00, -0xe0c1060f00, -0x70618f1a00, -0x40c1820e00, -0xe0418f9800, -0x1030278b1800, -0xe3c1878f00, -0x60428b1c00, -0xe14107c800, -0xe041839c00, -0xe260c78c00, -0x1c082078000, -0x6041078800, -0x6041070200, -0xe341878f00, -0x30238f1100, -0x6043870c00, -0xc1c387c800, -0x18081c70800, -0x2060408f9800, -0x1c040878c00, -0xc041878f00, -0xc0408f0900, -0x30649e0400, -0xe061879e00, -0xc041cf8e00, -0x4080038000, -0x71c1879c00, -0x60418f9900, -0xc081020e00, -0x7027cf9800, -0xe340878e00, -0x6041861e00, -0x406040878800, -0x71a0871e00, -0xe0608f9900, -0x70208f1a00, -0x7026cf9800, -0xf022cf9c00, -0x30608e1200, -0xe140c78f00, -0xc0c38fc800, -0xe1408f9900, -0xe040820e00, -0xe143878c00, -0x6043858c00, -0x71e38f1000, -0xe341878f80, -0x6041871e00, -0x2041860800, -0x6041020e00, -0xe040861f00, -0xc1c1870900, -0x61c1830700, -0xe027cf9c00, -0x60618f8100, -0x60428b0e00, -0x60478f8000, -0x60410f8800, -0x7120478e00, -0xc18387c000, -0x60438f9800, -0xe041878c00, -0x1e040820f00, -0xc0c30fce00, -0x7041071c00, -0xe040820e00, -0x71e1cf9c00, -0x4041860800, -0x20e027cf8d00, -0xe7cf0000, -0x6040871a00, -0xe140810f00, -0xe0410e1700, -0x1e0438f9d00, -0x6040870c00, -0x60418f9f00, -0xf120871e00, -0x6043878800, -0x1f0204f9f00, -0x61e0cf9800, -0x1c0c1878f80, -0x61e0871e00, -0x3060618f9800, -0x71a0cf1a00, -0xe040860e00, -0xe041878c00, -0x18380878400, -0xc3c3078000, -0xe1e18f8e00, -0x1c1c1820700, -0xc3c0838f00, -0xc1cf0000, -0x1c0c1060700, -0xe063cf1e00, -0x41c1070100, -0x70e0861e00, -0x6341060700, -0x1c041c50c00, -0x6041870800, -0xe041041f00, -0xf041820e00, -0x6040870800, -0xe043899c00, -0xe241028e00, -0x60c3ce1800, -0x2043ca8800, -0xc38107c700, -0x7020cf9800, -0x4081870c00, -0x2020c1078800, -0x3023cf9c00, -0xf060cf9e00, -0x70208f9800, -0xe043878c00, -0x20e041870900, -0x60618f1200, -0xe3e0cf1d00, -0x71a24f1c00, -0x1e0c3048f00, -0x38307c200, -0x1e041831900, -0xe140878c00, -0xe0e18f9c00, -0x306020cf1c00, -0x107043879800, -0x6041000e00, -0x8381078100, -0xe041079800, -0x6041070e00, -0xe344830700, -0x20e041871b00, -0x1c3c1878700, -0xc1c1870e00, -0x7041871c00, -0xc3ce0000, -0x1c34f8000, -0x60c1cf9600, -0x40830fc000, -0xe1418f9d80, -0xc0c1020700, -0x60264f8800, -0x60c78f8000, -0x1f221cf9f00, -0xe041840f00, -0xe060870a00, -0x71e1851e00, -0xf023cf9800, -0xe040820f00, -0xe1668b8c00, -0x1e040878800, -0xe1c1878c00, -0xc040830e00, -0x6140879a00, -0x60408f1800, -0x60c30f8000, -0x60c38f1a00, -0xe160871c00, -0x70208f1a00, -0x6040861e00, -0x30e18f1000, -0x70238f1e00, -0xf0430e1f00, -0x20e1c30c0f00, -0x2070208f1100, -0x40478d8000, -0xe3c08f9900, -0x7020cf1c00, -0xe041020f00, -0xe0c3078f00, -0x6041040e00, -0x6040870c00, -0xe041870b00, -0x60438b9800, -0xe0418f9e00, -0xc341860f00, -0x1e060820f00, -0x20e2cf0200, -0x207022cf1b00, -0xe3cf1000, -0x10e0c3048f00, -0x1c0c0878800, -0x70234f1e00, -0x202041058f00, -0xe063c98e00, -0x1e041c70c00, -0x61438f8900, -0xf1e1ce1c00, -0xe041820e00, -0xe2438d8d00, -0x202061860c00, -0x60e38f1a00, -0xc081020f00, -0x4040870800, -0xe041058c00, -0x7020871800, -0x40c0c1878f00, -0xe0618f1a00, -0xe041870c00, -0x6160870d00, -0x181c107cc00, -0x1070228f1b00, -0xe041078c00, -0x70238f9800, -0xe0c1070e00, -0xe1438f0f00, -0x60c1871800, -0x70418e1200, -0x40c0c32fc700, -0x81c60000, -0x41c1079000, -0x1e041871f00, -0x7020871a00, -0x61c1071e00, -0xe041070e00, -0xf020c71e00, -0x6041871c00, -0x71e1049f00, -0x1c243cd8c00, -0x7027db1c00, -0xe0410f9900, -0x6043878c00, -0xf061861c00, -0xe0234b1d00, -0xe0c1879900, -0x60428f8000, -0x20c0860c00, -0xe063cfcc00, -0xf1e1860f00, -0x60408f9800, -0x20e3cf8800, -0x60238f1200, -0xe0c18f9e00, -0x73e1879e00, -0x6041870d00, -0x3060410e1700, -0xe062cf9c00, -0x6041061300, -0xf1e7cb9f00, -0xf220431e00, -0x20e0c3040f00, -0x6041060e00, -0xc241860700, -0xe341860f00, -0x61c1870c00, -0xf061871c00, -0xe041060f00, -0x206041878c00, -0xe340879f00, -0x70c1879800, -0xc0c3878d00, -0x3c0838100, -0xe060871b00, -0xc1408f8c80, -0x20e1c1868f00, -0x1c040c10e00, -0xe1c1860700, -0x6040870700, -0x20e143c98e00, -0x6140878800, -0x381878000, -0x7020cf1e00, -0x6083060700, -0xe043870f00, -0x60408f8c00, -0xe0438f8000, -0x2060418f1b00, -0x6041860e00, -0xc081820700, -0x60e1850c00, -0xf0618f1b00, -0x6041871e00, -0x1e041840f00, -0x20e041058e00, -0xe0c38f1b00, -0xe040820e00, -0xe0c1078e00, -0xf061861e00, -0xc04082040c, -0xe041040f00, -0xf0618f1e00, -0x1e041860f00, -0x70e08f1a00, -0xe0204f1e00, -0xc0c1870c00, -0x21f0204f9f00, -0x1e060860e00, -0xc0c18f8000, -0x70639f9200, -0x60438f9800, -0x30628e1800, -0x60418f9800, -0x6142870c00, -0xc040871400, -0xe143c50e00, -0xe040879800, -0x6020628b0d00, -0xe140810e00, -0x1c2c0870700, -0x1c081878c00, -0x6043850e00, -0x1c0c0838900, -0x60438f0e00, -0xe0438f8c00, -0x71e1869e00, -0xe041870e00, -0x70e1879c00, -0x70e18f1200, -0x4081870800, -0x1e060cf9c00, -0x20e0608f0e00, -0x60c3ef8c00, -0xf141861f00, -0x61e3cf9800, -0xe1e6830d00, -0xe041860f00, -0x1c0c1020700, -0xe0408f8800, -0x41c0c1038f00, -0x30a28f1a00, -0xe0c30f8f00, -0x20e0418f1f00, -0xe3c1860f08, -0x6041820e00, -0x70e0871800, -0x70608f9c00, -0x30618f1800, -0x6060c30f8f00, -0x2060438f8800, -0x20e061879f00, -0x20e021cf1c00, -0x3c0828400, -0x30e08e1c00, -0x1c3c0c28600, -0x3c4838400, -0xc38347c000, -0xe0c2060b00, -0x70e0861e00, -0x1e043060f00, -0xe041079c00, -0x21c08f9800, -0x1808107c000, -0x2043070100, -0x60c1870e00, -0xe341068f00, -0x60418f9e00, -0x20478f8000, -0x1070a08f1b00, -0x61410f8100, -0x70608f1b00, -0x61438f8800, -0x30e3861e00, -0x6040870e00, -0xe1408f0f00, -0xe1c3878f00, -0x70e18f1b00, -0x70438b1a00, -0x60438f0e00, -0xe041861e00, -0x838107c000, -0x7020871c00, -0x3e0cf8800, -0x70638f1200, -0xe041860e00, -0xe0238b0a00, -0x60c1860600, -0x6040870c00, -0x31e08f1800, -0x1070408f1c00, -0x20e141cf9c00, -0xe140820e00, -0x181ce8000, -0xe060871f00, -0x1070678f8d00, -0x1c0c1020f00, -0x6140870e00, -0x70c1869800, -0x70208e1e00, -0x1c0c187cc00, -0xe0c1870e00, -0x70a1cf9c00, -0x2041070800, -0xe021870c00, -0x70a0cf1c00, -0x1c0c1830e00, -0x8383c78000, -0x6140879c00, -0x6144820e00, -0xe241860f00, -0xe3c0870f00, -0xe041040f00, -0x70a0cf1800, -0x81460000, -0xe0438f0b00, -0x1c181078000, -0xf041861f00, -0xc383078000, -0x6020678f8800, -0x70618f1900, -0x7064cf1e00, -0xc041860f00, -0xc0c3870700, -0x4020408f0c00, -0xe060451e00, -0xe260870c00, -0x6041870a00, -0xc1c10f8000, -0xe1418f8c00, -0xe0438f9c00, -0x20478e8000, -0x6041860c00, -0x4083040700, -0xf120861f00, -0x1e041048f00, -0x6020cf1100, -0xf0638f9900, -0x63c107cc00, -0x8181078700, -0xe043870f00, -0x60418f9800, -0x6041040f00, -0xc2c187cc00, -0xe0408f0c00, -0xe0408f1b00, -0x1c38f8000, -0x60438f0e00, -0x60608f1c00, -0x400f8000, -0x6043cf8c00, -0xe040830e00, -0xe0618f1c00, -0xc0c1060e00, -0xc241868600, -0x2060c1061f00, -0x87ce0000, -0x20e140870f80, -0x41e041078f00, -0x6081061b00, -0xe063899d00, -0x6021ce1800, -0x107060ce1800, -0xe141020f00, -0xe0428f8000, -0x20e0410ccf00, -0x1c083078700, -0xe040cf1c00, -0x1c081878c00, -0x61c1871a00, -0xe141060e00, -0x60438b1800, -0x6040870c00, -0x1e060c78e00, -0xe021861e00, -0xe1c0878d00, -0x2060438f0200, -0x1f0204f1f00, -0xc3c107cc00, -0x6043870e00, -0xe1438b9d00, -0xe060c71e00, -0xe160870e00, -0x61c0870e00, -0x1e042cf8900, -0xe0438f8000, -0x71e18e9700, -0x60c1060e00, -0xc040838c00, -0xe1e3cf8c00, -0xc1c78fdf00, -0xe0608f1900, -0x60c38f1a00, -0xc0c1070300, -0x204081020a00, -0xe0228f9c00, -0xe3604f8e00, -0xe141820e00, -0x106060871800, -0x6040870c00, -0xe041870c00, -0x183c60000, -0xe161c78c00, -0x7161861e00, -0xe140830d00, -0x183058c00, -0xc0c1820f00, -0x61c0879900, -0x1c081028700, -0xc040820f00, -0x6140820e00, -0x418c8000, -0x6141879f00, -0xe0638f1d00, -0x6042870c00, -0xf041049e00, -0x383078100, -0x71e08f9800, -0xe023879c00, -0xe063c98e00, -0x70430f0e00, -0x1c0c0878c00, -0x70e38f1c00, -0x60e1cf9c00, -0x70a3cf1a00, -0xe1438f8100, -0xe0408f9f00, -0x1e340838f00, -0xe043cf8c00, -0x20e043870900, -0x382838000, -0x60e063cf8d00, -0x1c3e0c70e00, -0xf120cf1e00, -0x60e08f1e00, -0x70618e0a00, -0xe0438f9800, -0xe1418f8080, -0x20e083040f00, -0xf023cf1800, -0xc08307c700, -0x206043878800, -0xe141820e00, -0xe0638e9000, -0x40c183078600, -0x183278000, -0x2020c3058e00, -0xe041879b00, -0x40e041079980, -0xf241841f00, -0x202041860e00, -0xe0c1078f00, -0x2067cf0800, -0xc3c1078b00, -0x20e0c1078c00, -0xe0410e1300, -0xe140870e00, -0xe060cf9c00, -0x70a18f1e00, -0x1e041878f00, -0xe363cf8f00, -0x71a04f1e00, -0x71e1873e00, -0x206043860800, -0x7141860e00, -0xe041020f00, -0xe041860e00, -0xe0234b9c00, -0x6063c50c00, -0xe0408f1e00, -0x1c043870f00, -0xe1438f0900, -0x6160870e00, -0xe0c1040f00, -0xe1e38f8100, -0xe0438e0300, -0x71e28f1e00, -0x107067cf9c00, -0x6160871f00, -0x60c78f0100, -0xe041870f00, -0x60c30f8000, -0xc241839c00, -0xe183078f80, -0xf1e1879e00, -0x6041879000, -0x70208e1c00, -0x71e1841e00, -0xe341020f08, -0xc041078c00, -0x2020478f8000, -0x60218f1800, -0x60c18f1a00, -0x4141870e00, -0x71e0cf1e00, -0xc08307ce00, -0x70618f9800, -0xe161870a00, -0x83c60000, -0xc081038600, -0xc0c1870d00, -0x61e3c78c00, -0xe341860e00, -0xe041868e00, -0x6043870800, -0x60618f1200, -0x61c7870200, -0xe140870d00, -0x71e3cf1e00, -0xc28f8000, -0x6041041f00, -0xe040870600, -0x6041870a00, -0x1e040830f00, -0x1e241878f00, -0xe041879f00, -0x1c381078700, -0xe0c1860e08, -0x1c081040700, -0xe140878c00, -0x70c1078c00, -0x3060418f8900, -0x43e0430f00, -0x60438f9800, -0x3e0cf9000, -0x70a68f1800, -0xc041820e00, -0x1e2c1068f00, -0x1c040820f00, -0x20c1cf0000, -0xc241820e00, -0x1c040878c00, -0xe140860a00, -0x818307c000, -0xc0810fc000, -0x1c0478b8d00, -0xe040831c00, -0xc0c0878f00, -0xc047cf8800, -0x1c240878e00, -0x107023cb9c00, -0x1070e78f9900, -0xe0428b0c00, -0x61e08f1a00, -0x6041040e00, -0x4181c20800, -0x3c0878800, -0x6142870500, -0x20e0c30f9e00, -0xe041078b00, -0x4040870c00, -0xe043899c00, -0x1e041820f00, -0x60418f1f00, -0xe081040e00, -0x31e1861c00, -0x404081078400, -0x202041860a00, -0x20e141870f00, -0xe14187d800, -0x3020070000, -0xe0428f0a00, -0x70e7cb9c00, -0x60c0871e00, -0xe0408f0b00, -0x1e04f0800, -0x3c0cf0000, -0x6041861a00, -0x71a28f1c00, -0x61c3cf9800, -0xe041860c00, -0xe140820c08, -0x6040871c00, -0xc140820e00, -0xc3058000, -0x70418f9000, -0x70238f9800, -0xe060cf9800, -0x6040871e00, -0xc14183cc00, -0x70e38f1c00, -0xe041068e00, -0x40c081030700, -0xe040cf8f00, -0x6041878c00, -0x7061841e00, -0x48183278800, -0xe0c1060f00, -0x20e043878900, -0xc0c1878c00, -0x4043870c00, -0x6027cf0900, -0x60e08f9b00, -0xe041060e00, -0x20e0638f8800, -0x2060438f8800, -0xe1c187df00, -0x61c68f0100, -0x6041820e00, -0xc040830e00, -0xc0c1838c00, -0x6041870800, -0xe041040e04, -0xf023cf9900, -0x60418f9900, -0x23e0870900, -0x60c3078800, -0x61c6830e00, -0x1c041858e00, -0x71e0871f00, -0x60274f0000, -0x70c38f1a00, -0x70438f0300, -0x101c60000, -0x1e0c1078f00, -0xe040c70c00, -0x31e1861c00, -0x1c0c1878e00, -0xe0418e8600, -0xc0408f8300, -0xe1c3870d00, -0xe1408e1f00, -0x1e040821f00, -0x1c081038c00, -0x60c1070c00, -0x30638f9800, -0xe041840f00, -0xe0438f9800, -0x30470e0200, -0x2060c38f8800, -0xe1c1878c00, -0x1e3c38fdf80, -0x7140861f00, -0xe0638b9900, -0x7022cf1800, -0x70e18f0200, -0x1e260820f00, -0x70c38f1b00, -0xe061060e00, -0xc3c3c7c000, -0xe166cf8e00, -0xc38f8000, -0x7021850c00, -0xf0208f1c00, -0xe041871e00, -0xf3208f1f00, -0xe041870f00, -0x106043870c00, -0x60438f0e00, -0xc0c1c78c00, -0xe0c1078f00, -0x61a0cf1e00, -0x1c0c0820f00, -0xe060871e00, -0x61e28f1e00, -0x206040870800, -0x71a0cf9c00, -0xc2c1038d00, -0x1c043c78c00, -0xc143878c00, -0x4040830fc800, -0x70678f9c00, -0x10f1c1871f00, -0xe040820c08, -0xe041860f00, -0xc0c1878980, -0xc1c187c000, -0xc04187cc00, -0xe1e3cf8c00, -0x30a3cf1800, -0xe16186cf00, -0x60a3871e00, -0x61c1860a00, -0xe0c18f9f00, -0x1e061860f00, -0x20e3cf9000, -0x83470400, -0xc040830c00, -0xe040870c00, -0xe240878c00, -0xe0438f1700, -0x20e041870a00, -0x87470000, -0xc140879800, -0x2063cf0c00, -0xc1c0c7cc00, -0x40408f8000, -0xc18387c000, -0x60418f8200, -0x102041870000, -0x20e0438b0900, -0x7022cf1e00, -0x10f0238f1d00, -0x70234b1800, -0xc0c1820700, -0xc081058c00, -0x60e0c38fc800, -0x3efc30700, -0x70e18f1100, -0x6041860e00, -0x1c18f8000, -0x1c0cf8800, -0xe061870900, -0x60478f8000, -0xe140870f00, -0x61c1860e00, -0x43c10f8000, -0x21e08f9a00, -0x3c3cf8000, -0xc043878100, -0xe1e0cf9c00, -0x6041060e00, -0xe063cb9800, -0xe340831f00, -0xc040878c00, -0x20e041870c00, -0xf120cf1c04, -0x61c1860a00, -0x30a08f1800, -0xe0438f8800, -0x30e38f9000, -0x60410f1300, -0xc040870c00, -0x61c105ce00, -0x6140830c00, -0x63c4838600, -0x70638f0200, -0x60c1060300, -0x40c081870300, -0xe020860e00, -0x1e1418f8f00, -0xc0c10f8900, -0xf0208e1b00, -0xe3c1028e00, -0x1c78f8100, -0xf1a3cf9900, -0x6043860600, -0xe041020e00, -0xe060c78e00, -0xc240470e00, -0xc1c30fcf00, -0x4040870e00, -0xe041820f00, -0x41c0c307cf00, -0x30e3cf1800, -0x61e08f9800, -0x20c38f1a00, -0x20e040870800, -0x60c0871800, -0xe141040f00, -0x2060608f9800, -0x7020861c00, -0x6042870c00, -0x40c1070800, -0x181078000, -0xc241078f00, -0x60410e0300, -0xe241820f00, -0xe0c08e9f00, -0x30208f1000, -0x70208f1800, -0xe041860f80, -0xc141820f00, -0x71e1cf3b00, -0x7041079200, -0x181078000, -0x1c0418f8100, -0xc081830c00, -0xe1608f9c00, -0x2040c1078800, -0x206041871800, -0xe0c1060f00, -0x20e160478e00, -0xe143cf0e00, -0xe340820f00, -0xe1c38f8f00, -0x6141820c08, -0x6063cf1c00, -0x60408f1800, -0xc3c0820e00, -0x20e043870d00, -0x2060418f9800, -0x60478f0300, -0xe060c78c00, -0xe1c107d800, -0xa040871800, -0x61c1878980, -0xe021cf9c00, -0xe140860a00, -0xe243c99f00, -0x6140870e00, -0xf020cf1800, -0xe041870900, -0x1c041060f00, -0xe041c78c00, -0x2060c30f8a00, -0x60c08f8000, -0x6140830c10, -0x20e0638f0e00, -0xe041871f00, -0x1c0438d8c00, -0xc0c30f8000, -0xc241870300, -0x60c30f0700, -0xc241878800, -0xe040860e00, -0xe040820e00, -0x70a08f9800, -0x6041860e00, -0x8307c000, -0xe040870e00, -0x1060430f8300, -0xe0618e9f00, -0xc3c107c700, -0xe0208f1e00, -0x60428f8800, -0x20e040879e00, -0xe141040f00, -0x6141860e00, -0x6140860c00, -0xe0418f9900, -0x2040860c00, -0x4181278800, -0xe0608f0c00, -0x70278f9800, -0xc0c107cf00, -0xe041870d00, -0xe1638f8000, -0xe041868f00, -0xf0208e1b00, -0xe0e1870e00, -0x60438f9900, -0x61e6830f00, -0x60608f1800, -0xe0438f8c00, -0xe340879c00, -0xe1e04f1900, -0x6041040e00, -0x6026cf1c00, -0x6063cb1c00, -0x61c38f8000, -0xe040c78c00, -0xe0238b9d00, -0x1e020870e00, -0x6141820600, -0x1c040870e00, -0x6041079000, -0x180c0810608, -0xe1c1070e00, -0x3027cf9c00, -0xe0408f8c00, -0x6141860e00, -0x31e08f9000, -0xe0608f9800, -0x1c0c1820708, -0x1c3c0878c00, -0x1c0c18f8000, -0xe0608f9800, -0x4040418f8800, -0x71e0cf9c00, -0xe140878c00, -0xc1c1870a00, -0x20e067cb1f00, -0xe0c1830f00, -0xc041860e00, -0xe141879f00, -0xc041c78e00, -0x40e042c78e00, -0x60438b1a00, -0xc1c187c800, -0x60410f9000, -0xf363841f04, -0x6140830c00, -0x18081030300, -0x6141868800, -0xe040820e00, -0xc381820700, -0xe040871800, -0xc0c1020f00, -0xe041020a00, -0xf061cf9c00, -0x6040870c00, -0xe1408f0c00, -0xe0638d1c00, -0x7040871e00, -0x7021cf1800, -0x1060408e1a00, -0x70638f1a00, -0x6341820f08, -0xc3c1078900, -0x2022cf0000, -0x20e041069f00, -0xe043c91f00, -0xe141068e00, -0x4040871800, -0xf161041f00, -0xe3e0c20c0c, -0x43c1820700, -0x30e18e1400, -0xf060861e00, -0xe041879800, -0xe061cf1c00, -0x60418f1b00, -0xe043870c00, -0xc040870e00, -0x4081078000, -0xe0c1058f00, -0xe0224f1e00, -0xe1c78f8000, -0x60c38f8d00, -0x2061cf1c00, -0xc041878c00, -0x60608f1c00, -0x70638f1000, -0xc0438f8200, -0xe240830f00, -0xe0438fc800, -0xe1c1878c00, -0x1e041870900, -0xc0c1879e00, -0x1c040878600, -0x60418e1e00, -0xe0408f9e00, -0xe060cf1c00, -0x1c281878580, -0xe041879800, -0x7041879800, -0x60c041078b00, -0xc080830c00, -0x1e3e1860f00, -0xe041820e00, -0xe3e0cf9f00, -0x206041070e00, -0x70a08e1c00, -0x40c0438f8580, -0x1030238b1a00, -0xc1c30f8000, -0xc040878c00, -0xf0608f1a00, -0x70e38f0300, -0xc141078000, -0x3c1cf8c00, -0xe3a6c30f00, -0x418107c000, -0x6041860e00, -0x107026cf9e00, -0xe0438f8800, -0x1c0c0878c00, -0xe240870f00, -0x61c3c79800, -0xc0c1870f00, -0x70a28f9800, -0xe043ce8800, -0xe3c1820f04, -0x7023cf1800, -0xe0430f8000, -0x6040871a00, -0x206041871f00, -0xf041861e00, -0xe061861f00, -0x20e0618f9c00, -0xe061840e00, -0x70a18f9900, -0xe040878c00, -0x1e023cf8c00, -0xe0438b9a00, -0x20e040831e00, -0xc0c18f9900, -0x8381e78000, -0x2027cf0000, -0x70e38f1b00, -0x418304ce00, -0xe1608f1f00, -0xc0c0820e00, -0xe0608e1b00, -0x4081078800, -0xf0618f9800, -0x60c08f8200, -0x6066cb1c00, -0x1e2628f9900, -0xe0418f0e00, -0x1e062cf9d00, -0xe040870c00, -0x71a08b1800, -0xc081020700, -0x70418f1000, -0xe0c3040f00, -0x808107c000, -0xe260879800, -0xf320cf1900, -0xe240871900, -0x1060408f1e00, -0x1c2c1078700, -0x40c10f8300, -0x1810f8000, -0xe023cf9800, -0x61c38f9c00, -0xc240830d00, -0xe040820e00, -0x204040870a00, -0xe060878c00, -0xe041c70c00, -0x43c1020700, -0x61478f8300, -0x60c1870e00, -0x61608f9900, -0x3027cf1000, -0xe060821e00, -0xe344831f00, -0xe0c1060f00, -0xe0c1070f00, -0xe041820c00, -0x1e0c3078f00, -0x181038000, -0x381078000, -0x60c38f1800, -0xe140871e00, -0x1c1c1878c00, -0x1c081020700, -0x2060264f8e00, -0x1c0c1060f00, -0xc0c1070b00, -0xe241c50e00, -0x2041860800, -0xe0c1870e00, -0x81c3c50c00, -0xe060871900, diff --git a/samples/digitrec/digitrec/data/training_set_3.dat b/samples/digitrec/digitrec/data/training_set_3.dat deleted file mode 100644 index de11a687c..000000000 --- a/samples/digitrec/digitrec/data/training_set_3.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0xe041010e00, -0xc083810e00, -0xe225811e08, -0x6041810c00, -0x1e0c3808e08, -0x6343c08e10, -0x1c083808e00, -0xe0c1010e00, -0x61c3870e30, -0xe063899e00, -0x1e0c3891e00, -0xf0e3c11e00, -0xe043010610, -0x4041010e00, -0x1031e18d1e00, -0xf1e3871e10, -0xe081030c00, -0xe0c1c89e00, -0x7063809c00, -0x6061831c00, -0xc345808704, -0xe043810e08, -0x6043831c00, -0xe1c3c19e10, -0xe041030c00, -0x7023811c00, -0xe041830e00, -0x1e243858f00, -0x1c041010e00, -0xe0c1010e08, -0xe041810e00, -0xe1c3819e10, -0xc1e3819c00, -0x1e3e3c09f18, -0x8381030102, -0xc043030e00, -0xc283808f00, -0xc081810e00, -0x1c041810e08, -0x6041891c00, -0x20c1990c00, -0xe043810e10, -0xe38f0000, -0xe081818e00, -0xe0c3811e00, -0x6141891e00, -0x60c1811c00, -0x61c1818e08, -0xe0c18d8e00, -0x1e0c3c19e00, -0xc081810e00, -0x70e1821c30, -0xc0c1810e10, -0xc081810c00, -0x1e1c24ccf00, -0x18183808700, -0xe0e3891e00, -0xe0c3810c00, -0x1c3030204, -0xe341c89e00, -0x6041010c10, -0xe041811e00, -0x4181810c00, -0x1c383038704, -0xe043811e00, -0xc0c0c50e00, -0xc3c1838700, -0xe0c3850e00, -0x6041821e00, -0x70c1011e00, -0x1e1c3809f00, -0x61c1858e00, -0xc0c1020608, -0x1c041810e08, -0x40c3010608, -0x70e3870c10, -0x1c081011e00, -0xe0c3810e00, -0x60c1031c00, -0xe1e18b1e10, -0x6043030c00, -0x60c3810e00, -0xc0c1030410, -0x60e3811e00, -0xe0c3c88e00, -0xe0c1cf8f00, -0xc041010400, -0xe043810208, -0xe1c1810600, -0x1c043810e00, -0xe043810e08, -0x1c0c3818e00, -0x1e0c3818e08, -0x1e3c3c38f08, -0xe063810e00, -0x41c1860204, -0x61c3891e00, -0x1e0c7c48f08, -0xc08381020c, -0x1e3cf8000, -0xe0c3808f00, -0xe041891e00, -0x6041831c00, -0xf061811e00, -0xe0c3810e00, -0xf161020630, -0xe0c1810e00, -0xf1e3891e00, -0x70e1831c00, -0x6041010c00, -0x41e3890c00, -0xe0c3819e00, -0x70c3833c00, -0x61e3891c10, -0xf0c1cf1e00, -0x1e041819e10, -0x6043031e00, -0x7167811e10, -0xe0e3c19e08, -0xe043010e00, -0x1e3c3c08f00, -0xc043810400, -0xc0c3c19e00, -0xf1e3811e18, -0xe1c3850218, -0xe3c3818f08, -0x40c0810c00, -0xe0c3819e00, -0xc0c3818e08, -0xc043c89e00, -0x73e3cd9e10, -0x20c183ce8e00, -0x8083809300, -0x6041810c00, -0xe041c18e00, -0xe043010c00, -0xe141011e00, -0xc081810600, -0xe1c08d0e00, -0xc0c1818600, -0x1e243808f00, -0x1e0c3018f00, -0xe1c3811e00, -0x4081010400, -0x1c083810608, -0x2060c3898e00, -0x6061811c00, -0x1e041810e08, -0xe3c3c08f00, -0x1e0c3c08f08, -0xc043810e10, -0x70e3811c00, -0x1e343818f08, -0x60c3030600, -0x6041030c00, -0xc283808708, -0x7063021c20, -0x41c1030600, -0x6043890e00, -0xc041830610, -0xe0418d0e00, -0x71e1830c10, -0xe0c7818e00, -0x8181810208, -0xe1c1810e08, -0x4341c10600, -0xe041810e00, -0xc083818e08, -0xe0c1819e00, -0xe043819e00, -0x60c3811e00, -0x1e1c3818e08, -0x6043811e10, -0xc341818e20, -0x1e063811e08, -0xf041899e10, -0xc181818600, -0x4081810c00, -0xe1c3811e00, -0x61c3811e00, -0xe0c1810e00, -0xc041810e00, -0x71a18f1c10, -0xc1c3010608, -0x1e2c3818f00, -0xc083810e00, -0xe081810e08, -0xc1c1810e08, -0xe0c1810e00, -0x1c383c08700, -0x7043811c00, -0xc0c1030608, -0xc1c0409e00, -0xe043808a08, -0x7161831c00, -0x61c3811e10, -0xe241808e00, -0xe1e3899e00, -0xe043830e00, -0x70e3831c10, -0xc38380870c, -0x61c3c78e00, -0xe0c3810e00, -0xe2418f8e00, -0x18383818704, -0x4081020c00, -0x3043831c00, -0x1e043818f08, -0x7063831c00, -0xe143830600, -0xc2c0489e00, -0xc1c3810e00, -0x6043811c00, -0xc081010c00, -0x60c1810c00, -0x101c1810e00, -0xe0c1810c00, -0x60c1811e00, -0xe0c1890e00, -0x1c04381933c, -0x1c3c3c18f00, -0x60c3030c00, -0xe0c3830c00, -0x61c3859e00, -0xf063871c00, -0x61c3830c00, -0xe041818a00, -0xe043810e18, -0x2020c08f9c00, -0xf3c3819f10, -0x20e18b1800, -0xe041010e08, -0xc041810e08, -0xf3e3839e10, -0xe0c3811e10, -0x1e041878600, -0x1c0c3818f00, -0x4181810c00, -0x71e1851610, -0xe0c3858e08, -0xe0c3809f00, -0x1e221808f18, -0x6041810c00, -0x1e1c3c09f18, -0xe041811e00, -0xc081810600, -0xe0e3831e00, -0x1c183c68700, -0xe1c3870e00, -0x1e1c3c18f00, -0x6043810c00, -0x6143810610, -0xc2c3c88e00, -0xe1c3870e08, -0x70c3cf1c00, -0x30c38f1c00, -0x60c2811c00, -0xc081810e00, -0xc0c0810e00, -0x6041811c00, -0xc081810e00, -0x1e0c38d1e00, -0x1c041808e00, -0xc043070608, -0xe0c3810610, -0xe1c3030e18, -0x30e38b1c00, -0xc383808710, -0x70e1cf9e00, -0x20c1830c00, -0xe0c3c08e00, -0x1c0c3808e00, -0xe0c3010e00, -0xe1e3831c00, -0x4041020c00, -0x31e3810c20, -0x60c3010e10, -0xe1c3810e08, -0xf1e3c49e10, -0xc181810c00, -0xe041811e00, -0xc0c1810410, -0xc081810e00, -0x383878600, -0x60c3830c00, -0x60c3811c00, -0xe0c1810e00, -0xc083818608, -0x6080890c00, -0x1c181010200, -0x70c3811c20, -0xe3e38f0304, -0x31e3cb1c00, -0x6043010a08, -0xe1c3830e00, -0x7041811c00, -0xe041811e00, -0x6041030c00, -0x1c087c08f00, -0xc183808e00, -0x70c1833c00, -0x40c1810c00, -0xe043810e08, -0xe0c3870e00, -0xe343888f08, -0xc181808200, -0xe2c3809e00, -0xc0c3c08f00, -0xe1c3810e10, -0x3c3018300, -0x1c043818b04, -0x60c1830e10, -0x1c3c3c08f00, -0x1e1c3c48f08, -0xe101808e00, -0xc081010e00, -0xf061811c00, -0xc087808318, -0xe043810e08, -0x1e0c3858e00, -0xe0c3811a18, -0xc1c1850e00, -0x30e3831c00, -0x1c083808108, -0xc183818e00, -0xe0c3c89f00, -0xe3c3810e18, -0xe041810e00, -0xe0c3811e00, -0xf0e1811e00, -0xe041010410, -0x61c3811c00, -0xe0c2030e10, -0x7041811410, -0x1c18181860c, -0x71e3cd9e00, -0xc0c3030e00, -0xe041030e08, -0x6041890e00, -0xe0c3818e00, -0xe0c18d0e00, -0xc0c3810e00, -0xf163c19e00, -0x81c1c50608, -0xc0c1010c00, -0x1c083038300, -0xf0c1011e00, -0x1e1c3c19e10, -0xe141818e00, -0xc0e1030c20, -0x70c3831c00, -0xe143810e00, -0xc0c3408e00, -0x61c1020608, -0x60c1831c00, -0xe043810e00, -0xc087888f00, -0xc1c3838e00, -0x61e3810c10, -0x1e0c3808e00, -0xc083808e00, -0x6083810400, -0xe061831c10, -0xf1c3c19e00, -0xe041811e00, -0x40c2810e00, -0xc0c1830e00, -0x70c3813e00, -0xe1c3c09e10, -0x6043811e00, -0x70c1871e00, -0x60c3811c00, -0xe0c1010e08, -0xe1c1df8400, -0xe063811c00, -0x1e043818e10, -0xe0e3030c30, -0xe0c0c18e00, -0xe061819e00, -0x60c1811c00, -0xe3e1811e10, -0xe043810e10, -0x60c1810c00, -0x30e1891c00, -0xe0c1850e00, -0xc243c68f00, -0x60c3811e10, -0x2060c08d0e00, -0xe0c3818e00, -0xf0c1819e00, -0x6103810208, -0xe043891e00, -0x60c1811c10, -0x4003810400, -0xc083818600, -0xc0c1010600, -0xf1e1831e10, -0x1c3c3808318, -0xc0c1810e00, -0x1c083808e00, -0x60c3811c10, -0x1e3c3cc8f00, -0xc0c3c19e00, -0x61c1830410, -0xe0c389be00, -0x60c3811c00, -0xe2c3038f00, -0xe0e3c99e00, -0xe3e1c98f00, -0x60c1870c00, -0x1e2c7818f00, -0xe0c3870e00, -0x21c3850418, -0x1e0c7818f00, -0x1c083818e08, -0xc043810e00, -0x1c081810608, -0xe043809f00, -0xc243838600, -0xc0c3808e08, -0x6043031c00, -0xe3e3830e18, -0xc081808e00, -0x60c3810e00, -0x1e043809f08, -0xe143020a08, -0xe041810c00, -0x1c043809f00, -0xc081010600, -0xe0c3830e00, -0xc1c3030608, -0xe0c1810c00, -0xe0c1818e00, -0x2061c38d9e00, -0x6063811c10, -0x4181810e00, -0xf063811e00, -0xe141830400, -0x60c3810c00, -0xe0c1810e00, -0x60e1c3cf9f00, -0xc180810e00, -0xf0e3c99e00, -0x60c1831c00, -0x83c3808f00, -0x6141810e10, -0xe141819218, -0x60c1010c00, -0x6141811e08, -0x6041811e00, -0x61c3858e08, -0xe1c3818e00, -0xc083810e00, -0x4043010e00, -0x6080030c00, -0xc043010e00, -0x1e043830f00, -0x60c3830c00, -0xc0c3810e00, -0xc183810400, -0x60c1890e00, -0x60c3850410, -0x6041811c00, -0xe141070600, -0xc3c1808208, -0x60c1991c00, -0x20c18b1c00, -0xf0c3cd9e00, -0xe043020610, -0xe1c1890e00, -0xe043810e00, -0x1c081808e00, -0x4081810600, -0xe04181be00, -0x61c3811e18, -0x2060c18f0e00, -0x1c043808e10, -0x30e1811630, -0xe0c3808e00, -0xe180850e00, -0x1c181038608, -0xf0e1831e00, -0xe043810e10, -0xe041811c00, -0x61e1df1e00, -0xe063d19e00, -0xe0e38f9e00, -0x1e0c3808218, -0xe081810e00, -0xe043810e00, -0xc043010f08, -0xc082810e00, -0x1c083c48f00, -0xc081808218, -0xc183010608, -0xe143810e10, -0xe0c3810c00, -0xe3c3018218, -0xe0c0470c00, -0xe0c1010e10, -0xf061c99e00, -0xe1871800, -0x61c3861110, -0xe0c1811e00, -0xc043810208, -0xe0c1850e00, -0xe0c1030e08, -0xc183818600, -0x61c1811600, -0x30e48f0e00, -0x6041810c00, -0xc0c3010e00, -0xc183818708, -0xe1c7808f00, -0x61c1811e00, -0x4041030c00, -0x2182810e00, -0xe0c3c18c00, -0xe043808e00, -0xc243808600, -0x70c1811e00, -0x21e1c91e00, -0xc1c1830e00, -0x60c3850c00, -0xc081830c00, -0xc083818e00, -0xe0c3810e00, -0x60c1870c00, -0xc081810e00, -0x71c1831c00, -0xe041811c10, -0xe041811e00, -0xc081858e00, -0xe0c3811c00, -0xc3c3838708, -0x60c1811c00, -0x6041810c00, -0xe041891e00, -0x30e1851c10, -0xe143818e08, -0x1c183808e00, -0x1c183808700, -0x61c3810c00, -0xf361891f18, -0xc0c3870c00, -0x60c1831c00, -0x1c2c3c18e00, -0xf1618f1c00, -0x60c3811e00, -0xe1e1891e00, -0xc081810e00, -0xc0c3010600, -0xf3c3c08e00, -0x70a1821c00, -0xf063809e00, -0x70c1890c00, -0xc081808e00, -0x1c0c18d0e00, -0xc0c3830200, -0x2060c0870e00, -0xe0c1810e08, -0xe0c1899e00, -0xc043010600, -0xe341c78e00, -0xe0c3811e00, -0x6061831e00, -0x6041031c10, -0xc083810e00, -0x1c081010208, -0x6043c19c00, -0xe043808e00, -0xc040810a00, -0x1c083c08600, -0x63e1810630, -0xe0c1811e00, -0xf063811e00, -0x1e041809e00, -0xe141830e00, -0x1e3c3c08f00, -0xc1c3830e00, -0xe0c1810e00, -0x61c3811e00, -0x1070c0cb1c00, -0x4081810400, -0x6041811c00, -0xf061811c00, -0x60c3890e00, -0x21c0c99e00, -0x1c0c3c18e00, -0x6041020c00, -0x2060c3871c00, -0xe0c3010e00, -0x1c187808700, -0x60c0cb1c00, -0x6043811c00, -0x4182818e00, -0xc081818608, -0xe043811e10, -0xe1e1830610, -0xe1c3859e08, -0x18081808104, -0xc0c3810e08, -0xa082818e00, -0x40c1010400, -0x7041821800, -0xe043811c00, -0xe383818e00, -0xe043810e00, -0x6043811e00, -0xc041810e00, -0x73e0810e30, -0xc3c3828700, -0x4083010e00, -0x6143850600, -0xc083810e00, -0xe0c1811c00, -0xe0c0811e00, -0xe041830c00, -0xe0c3010e00, -0x101e1839e00, -0x7041831c00, -0x1c081808f08, -0xe0c3819e10, -0xe0c7818f08, -0xe0c3850e00, -0xe041811e00, -0xc041020600, -0x61c3810c10, -0x61c1850218, -0xe2c0810e00, -0xc383818e00, -0x7063831c00, -0xe0c1021c00, -0xc081848e00, -0x6181810e10, -0x41c1810600, -0xc0c1810610, -0xe1c3c18e10, -0xe0c38f8e00, -0x43c1030608, -0x70e3871c00, -0x21c3811c00, -0x1e1c3878e00, -0x101c1030100, -0x4181010600, -0x1c3c1030304, -0xc081010e00, -0x60c1010208, -0x6043811c00, -0x60c1811c00, -0xe1c1830400, -0x60c3810e00, -0x7043831c00, -0x61e3811c10, -0x1c083808700, -0x6061999e00, -0xe181010600, -0xe0c3810e00, -0xc1c3830600, -0xc2c1818e08, -0x7061cd1c00, -0xc043811e00, -0x61c3810410, -0xe063891c00, -0x1e1c1c89e00, -0xe043819e00, -0xc3c1810e00, -0xc0c1830400, -0x2060c3870e00, -0xe1c3811e00, -0xe1c3870608, -0xc0c3858e00, -0x10e5cf0c00, -0xe083889e00, -0xe041810e00, -0x60c1871c00, -0xe0c3810e00, -0x7040cd8e00, -0xc182810e00, -0xe343808f04, -0x1e043808e08, -0x50e3c99c00, -0xf0238f1e10, -0x2060c0850e00, -0xc043810e00, -0xe0c3c19e00, -0xe0c3030e00, -0x183c0838300, -0x60c3810c00, -0xc081830c00, -0x60c3811e00, -0xc081010200, -0xc380c18e00, -0x6361819e00, -0x4143010100, -0xe3c1c10e18, -0x18383818f00, -0x60c1011e00, -0x63c3818e00, -0xf0e3c99e10, -0xc041010c00, -0x1c181810e00, -0x206081070c00, -0xe243810e08, -0x6041020400, -0x1c1830204, -0xe041020c00, -0xc3c3878618, -0xe1c3818f08, -0x1c0c1878e00, -0x1e1e3819e00, -0x60c3060e00, -0x7141811e00, -0x1e0c1819e00, -0x7063831c00, -0x60c3419e00, -0xe243819f00, -0xf1e3c99e00, -0xe2c1848600, -0xc1c103020c, -0x1e2c1818f08, -0x40e0c71c00, -0xc3c1850204, -0xc081010300, -0xe363811e00, -0x60c3810e00, -0x61e3831e10, -0x60c1811c00, -0x63c3c08318, -0x60c1011e00, -0xc081810600, -0xe1c3811e00, -0xe041811c00, -0xe1c1c99f00, -0xe0c1810e00, -0x6141811e00, -0x1c083818700, -0xe1c3810e00, -0xf041811e10, -0x1e0e1c99e00, -0xe0c3819e10, -0xc083010e00, -0xe0c1811c30, -0x70408d0c00, -0x60c1811c00, -0xe043810600, -0xe041811c00, -0xe0c3818e00, -0xe041810e00, -0x60c0831c10, -0x61c3891c00, -0x60c3811c00, -0xe0c1819e00, -0x7161011c00, -0xe043830e18, -0x1c287808708, -0xe1c1818e00, -0xc083810600, -0xc083808e00, -0x61c1030600, -0x6041811c00, -0x6041830c00, -0xf0e18b1c10, -0x1c043810e08, -0xe3c3858608, -0x6081810e00, -0xe0c1830c00, -0xe0c3030700, -0x1e3c3c48f00, -0xe163871e00, -0x61c0cf0e00, -0x41c3820408, -0x1f1c3c19e00, -0x2060c1830c00, -0xc1c1870204, -0xc0c1810e00, -0xc041010600, -0xe043811e00, -0xe1c3810410, -0xe1c1819e00, -0xe043850e00, -0x18383838700, -0x4083810e00, -0x1c180808e00, -0x60c1830c00, -0xc343858718, -0xc083810600, -0xc1c1810e00, -0xe041810e00, -0xe041811e08, -0xc0c1830c00, -0xe1c2810e00, -0x1e0c3838e00, -0x71e3899e00, -0x30c1891c00, -0x1c283c08700, -0xe1c3879e00, -0x4081000400, -0xe0c3c18e00, -0xe083810e00, -0xc1c1811e00, -0xe0c1810e00, -0xe0c1818e00, -0x21c3810c10, -0xe0c3810e00, -0x31e3079e00, -0x2043010e00, -0xe1c3c99e18, -0x18081810200, -0xe0c3010e08, -0x1c0c3838e00, -0xe143813c20, -0x6041820400, -0xe1e3c99e00, -0xe1c3870e00, -0xc243808f08, -0x60e1851c00, -0xe041810218, -0x61c1831c00, -0x1e043819338, -0x1e0c1c09e00, -0xe083819e00, -0x1c0c3818704, -0x6043c1be00, -0x7063831c00, -0xc0c3010600, -0x60c1030e00, -0x60c7c19e00, -0xe1c3818f00, -0xe0c3030e00, -0x1e0c3c18e10, -0xe3c7818f04, -0x1e1e3c11e10, -0x1e041810d04, -0xe0c7870e00, -0xe0c3889e00, -0xc0c3870c10, -0xc081810400, -0xc043810400, -0x61c1848e08, -0xe0c3850e00, -0xe0c3810e10, -0xe041030e00, -0xf1e38f9e08, -0x40c1810e00, -0xe043811c00, -0x6081011c00, -0xe0c3811e00, -0x60c34d9e00, -0x60c1891e00, -0xe0c3cd9e00, -0xe081850e00, -0xc0c3810600, -0x63618e0608, -0xe0c3819e00, -0xe343858e00, -0xe063819e00, -0xe1c1c58e00, -0xe0c3818e00, -0xc043030600, -0x60c1811e00, -0xe1c3c58e00, -0x7061890e00, -0x61c1020e18, -0x3e3870410, -0x61c3011e10, -0x1c041810e00, -0x4081810e00, -0x71c3810c00, -0xe061851e10, -0xc083810600, -0x1c0c1818f00, -0x61c1810e00, -0x6080810c00, -0xe0c3811e00, -0x1c083808600, -0xc081010600, -0xe080811c00, -0xe0c3818e00, -0xe0c3819e10, -0x1e1c3810e08, -0xe043010e00, -0x61c1890e00, -0xe0418f1c00, -0x6041030c00, -0xe081c19e00, -0x1e2c3899f00, -0xe043819e00, -0x60c1891e00, -0x1c081818600, -0xe041818e10, -0xe0c7808e00, -0x1c1c0c88f00, -0xc0c3870e00, -0xe3c1c19e10, -0xe1c3858e00, -0x1f0c3809e00, -0xf063811e00, -0xe0c1010e00, -0xe0c1819e00, -0xe1c1010e08, -0xe081038f00, -0xc043810e10, -0xc3c3c08600, -0xe0c3818e00, -0xe1c3030e00, -0xe083810e00, -0xc0c0850e08, -0xe0c3819e00, -0x1e1c7c08700, -0x1c043808700, -0x6041810600, -0xe143030410, -0xe1c3850e00, -0xe0c3810e00, -0x61c3c58e00, -0x6061831c20, -0x206041891e00, -0x1c081848600, -0x181e7c78e10, -0x6041831c00, -0x1c0c3818e08, -0x18080838600, -0x7043060e00, -0xc141030608, -0x7063891e00, -0xc081810a00, -0xe243858618, -0xc041810e00, -0xe0c1818e00, -0x30c0891c00, -0xc0c1810618, -0xe041810e00, -0x7041011c20, -0xc041010e00, -0xc1c3858e00, -0xe061811c20, -0xf061cc9e00, -0xc1c3830e00, -0xc0c3810e00, -0x20e0e14f9e00, -0x1c283c08f00, -0xc1c2810600, -0xe0c3810e08, -0xc083818e00, -0x1e0c3809e08, -0xc1c1070608, -0x1c0c3808b00, -0x7040911c00, -0x6043011c00, -0x7041891c00, -0xe043811e10, -0xc0c7818e00, -0xe0e1811e10, -0x60c1810c00, -0xe1c1850e00, -0xe041819e08, -0x70e38f1c00, -0x30e3811e00, -0xe0c1870e00, -0x6041810610, -0xc043010e00, -0xc081810e00, -0xc081810e00, -0xf0c3811e10, -0x20e0618b9e00, -0x63c1838e00, -0xc081898e00, -0x4183810600, -0x1e043818e00, -0x61e3031c00, -0x60c38d0e00, -0xc381808600, -0xe0c3809e00, -0xe0c3c08e00, -0xe0c3830c10, -0xe1c38f9e00, -0xc081810600, -0x60e1811c00, -0xe0c3818e00, -0x70618f0e00, -0x6061831c00, -0xe1c0811c10, -0xc0c3818e08, -0xc0c1030608, -0x18383808708, -0xe347810708, -0xe0c3858e00, -0x73e3810e18, -0xe0c0810e00, -0x70e1819e00, -0xf021811e00, -0xe143811e00, -0x71e1820410, -0x6081010400, -0x40c1010e00, -0xe0c18b1c00, -0xc1c1808e08, -0xe1c3870e00, -0xe0c3819b00, -0xc041810a08, -0xe0c3819e00, -0xc083010600, -0x6043010e00, -0xe141818e08, -0xe0c1010e00, -0x60c1011e00, -0x60c0890e00, -0xe161811e00, -0x4080810c00, -0xe043810e00, -0xc0c3030e10, -0x70618f1c00, -0xe041891e00, -0xe0c3811e10, -0xe0c3810e00, -0xc081010208, -0xe041850e00, -0x60c3891c00, -0xe0c3819e00, -0xe041810e08, -0xe1c3c18e00, -0xc0e3810e00, -0x6041810c00, -0xe043811e00, -0x60c1830c00, -0xc0c3818e00, -0x1e0418f0204, -0xe0c0409e08, -0x70c3871c00, -0xc1c0408e08, -0x61c3811e00, -0xe081010e00, -0x1c0c3830700, -0x1c083c08600, -0xc081810e00, -0xe0c3c19e00, -0xe1e3c99e00, -0xe1c3818f08, -0xe1c1811e00, -0x60c1890e00, -0xc041030e00, -0x18381c28300, -0xe1c1810e08, -0xf161851c00, -0x70e18e1c00, -0x10183458400, -0x6043811c00, -0xc141030618, -0xf0c3811e00, -0x206083870e00, -0x1e043819f00, -0xc0c3808e00, -0xe1c3cf8e00, -0xc080800e00, -0x1e043818e08, -0xe043811e10, -0xe041810e00, -0x63c3c19e10, -0x4081810600, -0xe0c1810e00, -0x4041030400, -0xe0c3810c00, -0xc081810e00, -0x4081830c00, -0xe161cf9e00, -0x61c4820608, -0x60c1810e00, -0x381038200, -0xf0e1891e00, -0xe041811e00, -0x61c3c59e00, -0xc0c3010600, -0xe3e0818e18, -0x6141831c00, -0x1e083818f00, -0xc0c3810c00, -0x1c083808f00, -0xe043808e00, -0x6041821c00, -0xc043810e00, -0x6063811c00, -0x6141810e00, -0x6043870e00, -0xe341858f00, -0x4083010e00, -0x63c3878608, -0xc0c1030600, -0x1c3c3818608, -0x6041891c00, -0xe1e3851e00, -0xe083810c00, -0xf1e3899e00, -0xe041820c00, -0xe0c3810c00, -0x8181830200, -0x1c0c3808f08, -0xf0c3011e00, -0xe043818e00, -0xe1c3811e10, -0xe0c3858238, -0x70618b1c00, -0x4183c18e00, -0xc1c3858e08, -0xc183818e00, -0x1e2c1818f08, -0xe041811e10, -0x60c1890e00, -0x71e3850e00, -0x1e3c3830308, -0x1c083818e00, -0xe120870c00, -0x60c3811e00, -0x4141830e08, -0xe0c1899e00, -0xe080810e00, -0x1c3c3c78700, -0x6041010010, -0x6041010c00, -0x6041810c00, -0x1e1c0c89e00, -0x1c043809f00, -0x20e0c1c78e00, -0x1e081010e08, -0x6081010c00, -0x8383808600, -0xc083808e10, -0xe043810e00, -0x60c1810c00, -0x6080810c00, -0xe0c3010e00, -0xe0c1811e00, -0xe041818e00, -0xe041811e10, -0x4081810c00, -0xc1c1020208, -0xf0e1811e10, -0x30e18f1800, -0x60c3811c00, -0xe0c1831c00, -0xc081010200, -0xe081810e00, -0xc0c3810600, -0xe0c3870c00, -0x60c3830c00, -0x6043819e00, -0x6041050c00, -0x70c38f1c00, -0xe041810e10, -0x61c3870c00, -0xe0c3810e00, -0xe0c7c09e00, -0x1e043870e08, -0xe083810e00, -0xe1e1819f10, -0xc1c1030408, -0xc181010600, -0x1e043c08f00, -0xc1c3808e08, -0x1c2c3c08700, -0x2061c3cf9c00, -0xc1c1c70200, -0xf041c99c00, -0xe0c3818e00, -0xc183810608, -0x1c0c3418e00, -0x60e7819c00, -0x30e38f1e00, -0x1e0c1810e10, -0x60c18b1c00, -0xc0c3810e10, -0xf1e3891e10, -0xe0c1010e00, -0xe043819e00, -0x6181810410, -0xe341810e00, -0x6041810c00, -0xe163831e00, -0x1c0c3c40e08, -0xf361871c10, -0x1e041891e00, -0x1e1c3c08f00, -0x60c3030410, -0x7061831c00, -0xe1c1830e00, -0xe0c3810e00, -0x1030c3871c00, -0xf061811c00, -0xe0c3819e00, -0xc3c1810608, -0xe1c1c19e00, -0x70e3831c00, -0x60c1821c00, -0x60c1830c00, -0x1c041810e00, -0x60e1c90e00, -0x6083810e00, -0xe3c3810e00, -0xe041831e10, -0xc0c1030e00, -0xc183810e00, -0xe283c08f08, -0x7041891c00, -0x1e0c1810e10, -0x4081810c00, -0xe243888e00, -0x1e0c3818e08, -0xe0c3810e00, -0x3c3808f00, -0xe0c1891e00, -0xe1c3818e08, -0x60c1030e08, -0x1c081010e00, -0xf1c3871c00, -0xc083810c00, -0xc0c3850e00, -0xe043810e00, -0x7043811c00, -0x60c2810c00, -0xf0c3c49e00, -0x1e061899e00, -0xc081010e00, -0xc083808e00, -0xe043811e08, -0xe1c1830c10, -0x40c3010208, -0x3040891c00, -0xe0c1818e00, -0x6001850c00, -0x61c1811c10, -0xc141830600, -0x61c1820c00, -0x6141811e00, -0xc1c1830e00, -0xe341819e18, -0x61c3831c00, -0xe1c24c8e00, -0x3e3831c00, -0xe1e3819e00, -0xe0c1810e00, -0xe043819e00, -0xf1c3811e10, -0xe161821c10, -0xf3e3879e08, -0xe281858e00, -0xe1c38d8e00, -0x30e3990e00, -0x1c0c3cc8e00, -0x1c1c1878f00, -0x1e083038e00, -0xe163811e10, -0xe241018f08, -0xc181818e00, -0xe1c3810e00, -0xe0c3038c00, -0xe0c7818e00, -0xe181c78e00, -0x60c1811c00, -0xe243010a08, -0x4041020400, -0x7021911c00, -0xe1c3830e08, -0x1e081811e00, -0x6081810c00, -0x31c1811c00, -0x21e18b1c00, -0x1c0c3810e00, -0x30e18f1c00, -0xe0418b1e00, -0x1c041810208, -0xe061809e00, -0xc3c1870208, -0x60c3813c00, -0x1c3c3848f00, -0x7063811c00, -0xe0c3810c00, -0xe041811e00, -0xc043818e08, -0xc043010200, -0xc081810e00, -0xf0c3011e00, -0x61c3818e00, -0xc0c1810e00, -0xe043810e00, -0x60c0811c00, -0xe241810e00, -0x30278f0400, -0xe0c2808e00, -0x4181810600, -0xe1e1860608, -0xe3c3c19e10, -0x1c2c3808f00, -0xc0c3810e00, -0xf143819e18, -0x70c3831c00, -0xc180818600, -0x60c1830c00, -0xe0c1810e00, -0xe041858e00, -0x1c381838f00, -0x1e0c3810e08, -0xc083818f00, -0x60c1830c00, -0xc0c1030e00, -0x6041811c00, -0xe0c1850e00, -0xe0c3599e00, -0x1e1c0cccf00, -0x60c1810c00, -0x7021819e00, -0xc041030e00, -0xc043830e00, -0xc081810604, -0x70c1911c00, -0xc0c1830c00, -0x43c1838108, -0x61e3830e08, -0xe1c3c89e00, -0x1f3e3839f00, -0x4043810e00, -0x60c1811c00, -0xe043818e00, -0xe141810e10, -0x60c3811e00, -0xf0e1c89e00, -0xf0c3811e00, -0xc081810e00, -0xe0c1030e08, -0x206081811c00, -0xc081808e00, -0xc043870410, -0xe043810e00, -0x6081810e00, -0x60e3031200, -0xe143858e08, -0x8101810400, -0xe0c1830e00, -0x70e1891e00, -0x40c1830c00, -0x70c1891c00, -0xe0c1818e00, -0xe1c3818f08, -0x6061830c00, -0xe1c3838e00, -0x1e063811e10, -0x60c3c78e00, -0xc043810e00, -0xe183808e00, -0xe043810c10, -0x60c1811c10, -0xe1c1030618, -0xe0c1810e00, -0xf0e7819f00, -0x63c103020c, -0x60c1811c00, -0x1e261c1be00, -0xf1c3879f00, -0x7041030e00, -0xc0c3cf8e00, -0x20c0891c00, -0x60c3811c00, -0xc083818e00, -0xc0c3c78e00, -0x1c0c7808300, -0x1c1c3c58e00, -0x60c3858e00, -0xc081010e08, -0x6063811e00, -0x1c3c383870c, -0x61c3c10610, -0xe043810e00, -0x60c0810e00, -0xe341819f18, -0x1e041070e08, -0x70c0d19e00, -0x6041810e00, -0xe1c3811e00, -0xe0c3819e00, -0xe0c3819e00, -0xe3c1810e00, -0x60c1020c00, -0x60e48d0e00, -0x60c1819e00, -0x7323811e08, -0x1e2c3c18f08, -0xe0c0810e10, -0x1e181818f08, -0x1c1c3c08e00, -0xc081810e00, -0xe283c00700, -0xe0c3810e00, -0x1c083808700, -0x1e3e1819f08, -0xc043808e10, -0xe3c1830e08, -0x4181830400, -0x60c3810e00, -0xc041010e00, -0x1c1c3c78e00, -0xe083810e00, -0xe0c3810608, -0xf3c1809718, -0xc083810608, -0xe0c3cf1c00, -0xc2c1030708, -0xe3c3c08e18, -0xe0c3818e00, -0xe0c1818e00, -0xe0c3879e00, -0x70e38f1c00, -0x61c1830e00, -0x70e3831c00, -0xe183808e00, -0xc081818e00, -0x18183818700, -0x30e3891c00, -0x1e0c3888f00, -0xe043078e00, -0xe043890e00, -0x6043820408, -0x4181010600, -0xe0c1811e00, -0xc083010e00, -0x40c1020400, -0xe1c1870c00, -0xe141810e00, -0x1c083818200, -0x70e3811c10, -0xe0c1030600, -0x20e1821c00, -0x70e1811c00, -0xc081818e00, -0xe1c1810e00, -0x1c0c3c08e00, -0xe161811e00, -0x31e1831c00, -0xe1c1891e00, -0xe0c3011e00, -0xe081850600, -0x81810400, -0xf3e19f9e00, -0xe0c1810c00, -0xc283810608, -0xc081810c00, -0x60c3810e00, -0xc081010600, -0x40c1810e00, -0x7041891c00, -0xe0c3811e00, -0xe0c1810400, -0xe1c0878e00, -0x71e3c91c00, -0x60c2c70c00, -0xe1c7811e10, -0xe1c3810e08, -0xe3c1810238, -0xe1c3830e00, -0xe081850e00, -0xc043888e00, -0x6061831c00, -0x6141811e00, -0xe021c88e00, -0xe0c3808b08, -0x1c0c1030e00, -0xc1c1020408, -0x18383878600, -0x6341030608, -0xe0c0810e00, -0xe1e1831e08, -0xe0c3870c10, -0x60c3811e00, -0x6043810c00, -0xe041010e00, -0x7041811c00, -0x20e1871e00, -0xe0c3819e10, -0x1e1c7c18e10, -0xe1e3831e10, -0xe0c1800e00, -0x60c1891c00, -0x70a1859e00, -0xc043810e00, -0xe0c3811c00, -0xc043810a00, -0xc001010e00, -0xc0c3810e00, -0x60c3831c00, -0xc0c1010c00, -0xf0e3819e10, -0x1e043818f08, -0x1c083808700, -0xe0c3810e00, -0xe1c3819e00, -0xc043810e00, -0x1e0c1818f00, -0x63e3c19e00, -0x60c1830c00, -0x60c1850c00, -0xe1c1818e08, -0xc081808e00, -0xe041810e10, -0x60c3831c00, -0xc181810e00, -0x70c1813e00, -0x6043810410, -0xe1c2498e00, -0x60c3811c00, -0x1c081808704, -0x61c1810e00, -0xc083810e00, -0xe1c3c58e00, -0x70e5999e00, -0x1e061c89f08, -0x71a1811c00, -0x61c3811c00, -0xe0e3870e00, -0x60c1830c00, -0xe043810c00, -0xe0c3810e00, -0x1c083808e08, -0xc0c3c08e00, -0xe0c3818e00, -0xe141810618, -0xe0c3848e00, -0x4081810e00, -0x61c3811c00, -0x4043010c00, -0x1c083808600, -0x60c1810c00, -0xe041810e00, -0xe043818e10, -0x1c041010e00, -0xc083810410, -0xe0c38d0e00, -0x1e041848f08, -0x3e3839c00, -0x40c3060400, -0xc0c1818e00, -0x6081810e00, -0x71c3819e00, -0x1e3c3838f00, -0xf1e1811e10, -0x60c1cf0e00, -0x30e7891c00, -0x1e383818e00, -0x60c1850e00, -0xe0c3010e00, -0xe043831e00, -0xe0c1859e00, -0x1c1c3818700, -0x40c3810e00, -0xe041030e00, -0x60c1821c10, -0xe041808e00, -0xe043830e00, -0x61c3810c10, -0xc081810e00, -0xe261811e00, -0x2041050c00, -0x1c383c18e00, -0xc0c1810e00, -0x1e2c3c58e00, -0xc1c3810610, -0x6041030410, -0x4081030400, -0xc083810e00, -0x41c1808600, -0xe141810e00, -0xe0c1850e00, -0x60e3819e00, -0x61c1810e00, -0xe0c1810e00, -0xe041811e00, -0x6143810e10, -0xe041810e00, -0x60c1030e00, -0x60c1821800, -0xc0c1810600, -0xc0c1810e00, -0xe0c1810e00, -0x181c1ce8300, -0xe0c1810e00, -0x60c3830c00, -0x3e7c08f00, -0x6041011c00, -0xe1e3030e18, -0x61e1c31c10, -0xf161871e10, -0xe1c3ce9e00, -0xc081810600, -0xe0c3890e08, -0xe081810610, -0x30638f0c00, -0x60c1070608, -0xe141810e00, -0x6143811e00, -0xc081818e00, -0x1e0c3850f08, -0xe041031c00, -0xc043810200, -0xe343c19e00, -0x60e38f1c00, -0xf043851e00, -0x7143851e00, -0x2060c1810c00, -0x7043811e00, -0x30618b1800, -0x63c1809e00, -0xe041831c00, -0x30c3811c00, -0x6041810c00, -0xe1c3010608, -0x1e0c3819f08, -0x6143810400, -0x183c3010700, -0x4081010600, -0xc245c08e00, -0xc3c3c08f00, -0x6143810e00, -0xe1c3898e00, -0x6043810c00, -0xe3c3810f00, -0x1e041009e00, -0xc083808f00, -0xc181808f00, -0xc183808e00, -0xe0c3811e00, -0x6041010410, -0x43c1808e00, -0xe0c3c19c00, -0x60c3811c00, -0x4043891e00, -0xe183889e00, -0x71c1891c00, -0x1e243c09e00, -0xe0c3810e00, -0xf0c3811e00, -0xc081810e00, -0x1c1c3818708, -0x4041030c00, -0x7061831c00, -0xe043819e00, -0xf163831e20, -0xc083c08e00, -0x1e0c3818f00, -0xe0c1870e00, -0x70c1833c00, -0xe241c48f00, -0x70e1891c00, -0x1c0c3808f08, -0x61c1830e00, -0x61e1890e00, -0x6041810c00, -0x1c1c383870c, -0xe0e3819e10, -0x1e1c3818f08, -0xe143810e08, -0x1e3861000, -0xe0c1811e00, -0xe0c3850e00, -0x1c081808104, -0xc3c1818708, -0x1e063831e20, -0xc1c3808e00, -0xe0c3811e00, -0x60c3850c10, -0xe0c7c08e00, -0x1e0c1808f00, -0xc1c3818e00, -0xc3c3819e00, -0x6041010c00, -0xe061c31e00, -0x61c3891e00, -0xc0c1810e00, -0x1c081010608, -0x60618a0c00, -0x6041831800, -0xc041811e00, -0xe041810e00, -0x1e0c3030e00, -0x8103818600, -0x70e3861c18, -0xc0c3030600, -0xe1c3c39e00, -0x1e18f1000, -0x60c0819e00, -0xe0c3811e10, -0xe041811e00, -0xc081010e00, -0xe0c3811c00, -0xc240810208, -0xe0c1819f08, -0xc181808e00, -0x7021891e00, -0xe1c3810600, -0x6143810e00, -0x70c3831c00, -0x20c0831c00, -0x1c080808e00, -0xe0c1811e18, -0xc1c3038304, -0xc080810e00, -0x4343808e00, -0xe081000e00, -0xe141811e08, -0x7063811c00, -0x40c1020e00, -0x1e043808f08, -0xe041810e00, -0x3c1818600, -0xe0c1811e00, -0x70c3811e00, -0x1e023819c00, -0x63c38c8218, -0xe041811c20, -0x61e3899e00, -0x60c3030c00, -0xe1c3809f00, -0x1c081030e00, -0x60c1850c00, -0xc1c1810c00, -0x8183018300, -0xc083818e00, -0x30c3cd0c00, -0x60c3810e00, -0xe1c3819e18, -0x1c08101810c, -0x70e1811c00, -0x60c1010c00, -0xe041050e00, -0xe043810e08, -0xe243810e08, -0xc083010400, -0x71e1831c00, -0xe0c1010e08, -0xe3c3819e00, -0x70618f1c00, -0x60c1830c00, -0x61618b1c00, -0xe1c3858e00, -0x60c3858e00, -0x180c3848700, -0xe081818a08, -0x61c3030c30, -0xc001030e00, -0x1c083808f00, -0xe043850e00, -0xe0c3d19e00, -0x1e083c08e00, -0xe143870600, -0x60c3010e00, -0x7063891e00, -0xe341818f00, -0x61c1c10c20, -0x60c3811e00, -0x21c1810e10, -0xe0c3030610, -0xe041810c10, -0x1e063809f18, -0xe041030c00, -0x1c0c3030f04, -0xe1c0858e00, -0xe043810e00, -0xe043031e00, -0xf3c1811e00, -0x60e3831c00, -0x7061821c00, -0xe1e3858e00, -0xe1c3839e18, -0xe041010e00, -0x7063c19e00, -0xe041890e00, -0xe041889e00, -0xe043878e00, -0x6041810c00, -0xe041830e00, -0xc081810e00, -0x71a1889e00, -0xe041830c00, -0x6041010c00, -0x182c0408700, -0xc381c0870c, -0xf1e1cf1c00, -0x6143870e00, -0xc041810408, -0xc1c3818e08, -0x41c3830618, -0xe0e3811c00, -0xe1c3c99e00, -0x1e043c09e00, -0x70e18b1c00, -0x6043891c00, -0x1c2c1808600, -0xe061c91e00, -0x7041811c00, -0xc081810e00, -0x3c3010204, -0x1c083030208, -0x60c1830c00, -0x60c0891e00, diff --git a/samples/digitrec/digitrec/data/training_set_4.dat b/samples/digitrec/digitrec/data/training_set_4.dat deleted file mode 100644 index 4183b1fcd..000000000 --- a/samples/digitrec/digitrec/data/training_set_4.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x8122870400, -0x267c78000, -0x1167870400, -0x13274f8100, -0x60c3830200, -0x9327cf0200, -0x2343830608, -0x2247c98300, -0x81c3870200, -0x51e2870c00, -0x11e7cf0600, -0x4183c70600, -0x2143c30400, -0x11e78b0c00, -0x41a78f0408, -0x10e78f0c00, -0x1127830200, -0x2143830600, -0x51e3830c00, -0xa143c78100, -0x21c3c70400, -0x61c2870200, -0x51a3cf0400, -0x20c6870200, -0x10228e0400, -0x2347c70400, -0x10267c78100, -0x20c3c70600, -0x20c3cf0600, -0x60c3cf1400, -0x167c20c00, -0x20c3830400, -0x143870200, -0x107c20000, -0x2143c70600, -0x1127c10408, -0x3147870400, -0x8143810600, -0x1122870400, -0x1278f0608, -0xe143878200, -0x4163870800, -0x2143810200, -0x1327810200, -0xa347830408, -0x247810204, -0x1367c70608, -0x2183820400, -0x1c3c30200, -0x8123c30400, -0x10c3860400, -0x8142830200, -0x4183810200, -0x2146c70600, -0x81c3830600, -0x2042870000, -0x1163820400, -0x20c3860400, -0x1143830400, -0xa142830200, -0x1143820400, -0x3143860400, -0x91248f8200, -0x91468f8200, -0x11e3cf0400, -0xc167c18300, -0x5167c70c10, -0xa142870b00, -0xc3830400, -0x10a2820800, -0x2143c60408, -0x61c38f0200, -0x11a7c21800, -0xa142870200, -0xc1c7c70600, -0x10a3820800, -0x1163820800, -0x61c3870200, -0x4162870400, -0x8143830400, -0x2042870200, -0x18438f0800, -0x41c3870200, -0x51e7c70400, -0x243810200, -0x8103830200, -0x40a3820400, -0x4122870400, -0x4143c50200, -0x10243810100, -0x10e38a0c00, -0x10e38e0400, -0xa143830200, -0x8143830200, -0x2043870400, -0x1163830400, -0x2142870400, -0x147c30608, -0x8143830600, -0x1c1810204, -0x8347850204, -0x142870200, -0x4183830400, -0x8326470200, -0x2147c70400, -0x4183c20400, -0x127c10400, -0x10c3870400, -0x20c3cf0408, -0xc1c3870600, -0x8143c20400, -0x20c3c70400, -0x20c3830400, -0x9127cf0200, -0x9367cf0204, -0x143030408, -0x10345cf8300, -0x41c7c78400, -0x4183830c00, -0x1167870400, -0x1067860800, -0x1142870600, -0x41c3cf8200, -0x264870200, -0x4083838400, -0x63c7c30600, -0x11a6870400, -0x51a7c30c00, -0x143820408, -0x11678e0400, -0x207c88100, -0x8143810204, -0x11c3860c00, -0x10247878100, -0x40a3820400, -0x2143870200, -0x41e3870408, -0x8367860c10, -0x51e3c20c10, -0x9a7870c00, -0x83c7ef8200, -0x9167830408, -0x2448f0200, -0x1c2cf8400, -0x21c3820400, -0xc3c7810204, -0x10c3860800, -0x31c78f8400, -0x60c2870600, -0x1a7c78400, -0x50e3860810, -0xa143870200, -0xa347830608, -0xa167cd0200, -0x2147890200, -0x12244870100, -0x5163830400, -0x8103c30400, -0x3143870400, -0x11e3830c10, -0x2142830600, -0x4102830400, -0x1e2860c10, -0x1c3c30400, -0x2147810200, -0x2247c18300, -0x8143830200, -0x2143870400, -0x3147c10200, -0x6142870200, -0xa147850200, -0x81448f0200, -0x41c3830600, -0x1143870400, -0x3143830c00, -0x43820408, -0x21c7870400, -0x9163870400, -0x4182870200, -0x81c3810200, -0x2143830200, -0x41468f0200, -0x8143830300, -0x40c2870400, -0x2347cf8200, -0x2147830400, -0x40c2cf0200, -0x4082870600, -0x9122cf8200, -0x2143870600, -0x146870408, -0x22c7830200, -0x3246870400, -0xa146870400, -0x8143870408, -0x2143870400, -0x61c3878200, -0x8143850204, -0x2142870200, -0x41a3c70400, -0x4127810400, -0x23468f9a00, -0x125850400, -0x2143c70400, -0x4383c30604, -0x2043860800, -0x163830408, -0xa147870600, -0x41a3830400, -0x2183020400, -0x3146870600, -0x2142830200, -0x367c30600, -0x2147c70400, -0x11478f8800, -0x2143070400, -0x2042870400, -0x1142860800, -0x9324cf0200, -0xa1478d0200, -0x147810200, -0x2146878200, -0x8364878200, -0x2143820400, -0x243810200, -0x2347838300, -0x167cf8400, -0x8143810204, -0x1127850400, -0x2143870400, -0xc7810200, -0x41b7c70400, -0x10e38f8800, -0x8103c38200, -0x1047850200, -0x8126870400, -0x27820800, -0x8163830400, -0x10e3860c00, -0x2142c70200, -0x9143820400, -0x5123820c00, -0xc1428f8600, -0x20c3820400, -0x20c28f0400, -0x40c2870600, -0x11628f0400, -0x30c3cf0400, -0x20c3870200, -0x143810200, -0x3147c70600, -0x4103898200, -0x10438e0c00, -0x11428f8400, -0x2143860400, -0x4102810200, -0x2043870200, -0x9367c30400, -0x8103810204, -0x91638f0600, -0x127830600, -0x2142830200, -0xb147c70400, -0x11e7cb0400, -0x2247870204, -0x2147c28400, -0x1124cf0600, -0x2142830600, -0x11c3860400, -0x8127c98102, -0x11470e0800, -0x4102878400, -0x1167c20400, -0x11a7cf0600, -0xa143870200, -0x247810600, -0x1c3830604, -0x21c3830408, -0x8122830200, -0x1163820800, -0x20c38e0c10, -0x2143c20410, -0x8143810200, -0x20c3ce0400, -0x2143870400, -0x21c3cf0400, -0x1043820800, -0x4082830200, -0x50e3820c00, -0x146830200, -0x40c38f8200, -0x143830600, -0x2147870200, -0x41e3830400, -0x2147860400, -0x51e3c70600, -0xe3860800, -0x4082870200, -0x1327cf8200, -0x61c2870200, -0x41c3c70600, -0x81c3850200, -0x10a38f0410, -0x21e1c20c10, -0x2247850200, -0xc7ce0800, -0xa143810200, -0x23c7830304, -0x13267cf8100, -0x1163830400, -0x1127c70c00, -0x11a7cf0c00, -0x20c3820400, -0x11e28f0400, -0x1167cf0c00, -0xc3870400, -0x2143820400, -0x8143870400, -0x4122860c10, -0x10a28f0400, -0x42448e8100, -0x2142870600, -0x49a7860800, -0x1147870400, -0x40c3870600, -0x9266c70200, -0x60c3850200, -0x143820408, -0x2142070400, -0x3042870400, -0x2143870200, -0x11c7878000, -0x143810200, -0x61c38f0600, -0x167cf0400, -0x8143870200, -0x51e7c70c00, -0x143830200, -0x10c38f0400, -0x1142870400, -0x5163870400, -0x2047870200, -0x11638f0400, -0x143830408, -0x4143870200, -0x20c3820400, -0x8142870200, -0x143830600, -0x11e3820800, -0x23c3830200, -0x20c38f0600, -0x1043cf0400, -0x1a7c70c10, -0x2142870400, -0xa1c3878600, -0x1a267c70300, -0x51a6c70c10, -0x1047c60800, -0x4103830200, -0x8142c70200, -0xa142878600, -0x2043c70400, -0x31c7cf8400, -0x11c3820400, -0xa3870800, -0x9364cf8300, -0xa143830400, -0x6147870200, -0x1043860400, -0x41c3830600, -0x9147cf9200, -0x51e3860c00, -0x2143830600, -0x61a3c30400, -0x1227c10200, -0x2143830400, -0x8167c10600, -0x1146cf0400, -0x143810600, -0x9167830600, -0x11648e0400, -0x227c08100, -0xa3448f0300, -0x143830608, -0x2147870600, -0x50c2870400, -0x9162cf0600, -0x61c3860c00, -0x1123870400, -0x4143860400, -0x2147ca0400, -0xa147830600, -0x50c3870400, -0x10e38e0800, -0x20c38f0400, -0x27820000, -0x1143870c00, -0x8127cd0300, -0x143810200, -0x1c3820400, -0x10a7860800, -0x1c3810200, -0x4183030200, -0x81c3830600, -0x10c3870800, -0x8307c18300, -0x2267830604, -0x143810400, -0x2143830200, -0x81c2cf8300, -0x2147870600, -0x4182820400, -0x31478f0600, -0x8367c78100, -0x11e3870400, -0x5167820c00, -0x20c2870200, -0x4122830408, -0x2147870300, -0x41c3870800, -0x2147850204, -0x2143810204, -0x342830200, -0x21478f0204, -0x21c3c60400, -0x126870400, -0x1167c30400, -0x1127830400, -0x41c3830600, -0x2143830400, -0x143c20410, -0x10c3860400, -0x127810200, -0x11e6cf8200, -0x41e3cf0608, -0x1123870400, -0x8323c18200, -0x2142830200, -0x51c3860400, -0x8163830400, -0x41c7cf0200, -0x11678b0400, -0x1167cb8400, -0x41c3c78200, -0x2142870204, -0x142860400, -0x21c38f0400, -0x2143830200, -0x19678e0c00, -0x1142870400, -0x20c3870400, -0x4167810300, -0xa3ce0c00, -0x11678f0600, -0x2043820400, -0x2143830600, -0x2043820400, -0x1c3870200, -0x4124cf8200, -0x21c3870600, -0x10e38e0c00, -0x41c3870600, -0x20c3820408, -0x127810400, -0x11648f8200, -0x2344878100, -0x1127c70400, -0x21c68f0600, -0xc3830408, -0x3147860400, -0x21c3860400, -0x21c3830408, -0x143870400, -0x1143830800, -0x30c38f0400, -0x142860400, -0x23c7c70600, -0x4143870400, -0x2143830600, -0x8143830600, -0x2142870400, -0x1062860400, -0x163830408, -0x143810400, -0x143830200, -0x8327cf8300, -0x6146870400, -0x4183870400, -0x183810200, -0x2043810200, -0x143800600, -0x61c2cf0200, -0xe3860810, -0x11a2870c00, -0x2247810204, -0x12247870300, -0x18367c70600, -0x2143870400, -0x2143860400, -0x1143860400, -0x8a3870800, -0x21c3830608, -0xe3861800, -0x102c7c10300, -0x2042870600, -0x10e3860800, -0x2347c70400, -0x4182870200, -0x12347c58300, -0x2042878400, -0xc143830600, -0x61c3cf0600, -0x4102838400, -0x51e38f8400, -0x2143870400, -0x3147870400, -0x147810400, -0x1042878400, -0xa1438f0200, -0x2143870200, -0x2143820400, -0x2247810200, -0x8143820400, -0x8326470200, -0x21c3830c00, -0xa1c3870600, -0x21c7810200, -0x21c3820c00, -0x8367cd8200, -0x10c3820400, -0x8103810200, -0x12343870300, -0x2144870200, -0xa7c30800, -0x2147c70200, -0x141810200, -0xa147810600, -0x1167870400, -0x1c3830400, -0xc142870200, -0x8143810200, -0x11e7c30c00, -0x1163830c00, -0x8143870200, -0x8143810600, -0x1c3870400, -0x21c7870600, -0x83830408, -0x8143830200, -0x2246c78100, -0xa343830600, -0x40e38a0410, -0x1143c70400, -0x91244f0200, -0xc103c78300, -0x50c3860400, -0xe3861800, -0xa143820408, -0x11e78f8c00, -0x10247818100, -0x11478f0400, -0x1127870600, -0x2247830200, -0x10247c18100, -0x2143830400, -0x10c3820800, -0x8143810300, -0x41c3c20400, -0x10e3870800, -0x9147850200, -0x8143810200, -0xa143830200, -0x186878200, -0xe166870200, -0x61c3870400, -0xa3820410, -0x31c3870408, -0xa143870400, -0x1163820810, -0x83020000, -0x41c3810200, -0x2147850600, -0x2143870c00, -0x8146878600, -0x10c3cb0400, -0x2247c70200, -0x10240878100, -0x10c3860400, -0x3147cf0400, -0x1042860800, -0x20c3870600, -0x2147870400, -0x107810000, -0xc1c3878200, -0x1226870600, -0x143c50000, -0x2267cf8000, -0x51c7e78c00, -0x10a38a0c00, -0x8107810600, -0x1c3810400, -0x11228f0200, -0x2343870200, -0x20c3870400, -0x1c3820400, -0xa28f0000, -0x3147870400, -0x40c1870200, -0x1346870400, -0xc3830600, -0x40a38f0200, -0x1127890600, -0x243c30200, -0x11e7830c00, -0x51e68f8c00, -0x41a2870400, -0x20c28f8200, -0x11a2860c00, -0x126878400, -0x70e38f0200, -0x367c78300, -0x2043830200, -0x1143820810, -0x81a7cf0600, -0xa3830c00, -0xa142870200, -0x4187850204, -0xa166870200, -0x8163810200, -0x23478f0200, -0xe142870200, -0x367870400, -0x30a3830c00, -0xc3810200, -0x8143838600, -0x21478f0600, -0x2143810200, -0x11438a0400, -0x2147830400, -0x142820400, -0x21c3820400, -0x1142870200, -0xe1c3870400, -0x1063860400, -0x246830200, -0x41c3830600, -0x41c3c20400, -0x8143820400, -0x4123860810, -0x41438d0200, -0x146830200, -0x1e7870400, -0x21438f0400, -0x8103810200, -0x43870400, -0xc2870200, -0x10246c68000, -0x20c3820800, -0x8167870600, -0x4143860400, -0x1163810600, -0x40c7830200, -0x343810204, -0x243810200, -0x21c3ce0400, -0x142830600, -0x2343870200, -0x41c7838600, -0x2143820400, -0x11e6870c00, -0x143830600, -0xa143820400, -0x2142870600, -0x1c3870c00, -0x27820c00, -0x1167860800, -0x1c3850200, -0x8224c70200, -0x167810408, -0x1124cf0200, -0x42820400, -0xa147c70600, -0xc3870400, -0x43820000, -0x4122870200, -0x4183830400, -0x21438b0200, -0x4163830400, -0x1167878000, -0x2143c60400, -0x4082860400, -0x11678f0400, -0x1123cf0400, -0x1143830400, -0xa142830200, -0x21c7838200, -0x41c38f8400, -0x2347870200, -0x2142870200, -0xc103c78300, -0x1042870400, -0x10c38e0400, -0x2347c50204, -0xc3020408, -0xa343810200, -0x1143860408, -0x2143830600, -0x142870400, -0x21c3c70800, -0x20c3ef8000, -0x20c3840800, -0x2143cf8400, -0x1126870400, -0x11a7830408, -0x1143870c00, -0x143c60408, -0x11a38f0400, -0x8143810200, -0xc3830400, -0x143830200, -0x4082870400, -0x343c20400, -0x12266478100, -0x20c3870400, -0x2142870200, -0x20c38f0400, -0xa1428f0100, -0x60c2870200, -0x247830400, -0x10a3820400, -0x2143870400, -0x1127c30200, -0x20c2860000, -0x1c7820c00, -0x143830400, -0xa143870200, -0x2143870400, -0x1163870408, -0x143850204, -0x1367870608, -0xa142870600, -0x10e3870800, -0x367c70200, -0x2347810200, -0x2367cf8304, -0x3142870600, -0x8143830600, -0x8346878200, -0x2143820400, -0x12343830200, -0x41a7830c00, -0x2143830204, -0x23c7c78600, -0x40a3cf0400, -0x2147830600, -0xa3c78f8a00, -0x61c3870200, -0xa143830200, -0x1163870400, -0x60c38f0200, -0x3166c70400, -0x1164cf0200, -0x61e3cf8300, -0x1143070800, -0x5163c70400, -0x4127830400, -0x341810204, -0x1043860400, -0x146870400, -0x4143870400, -0x2146870200, -0x2347c70600, -0x8367870300, -0x2347c10200, -0x20428f8400, -0x20c2870600, -0x1167870608, -0x8143810200, -0x123830c10, -0xb164cf0200, -0x20c78f0400, -0x367c30200, -0x2143850600, -0x8143830200, -0x143810400, -0x2143870200, -0x8143c58300, -0x2143870408, -0x2143830400, -0x163850200, -0x2347830204, -0x1164878400, -0x41e78f0408, -0xc1c3830200, -0x51e38f0400, -0x30c3860c00, -0xa1c3870600, -0x1b3c70c00, -0x20c3870200, -0x142870200, -0x2147c90200, -0x1a7c60800, -0x327810600, -0x2347870300, -0x2346870200, -0x1147870600, -0x18383810302, -0x51c3860400, -0x4143820400, -0x12243838100, -0x4083830400, -0xc3860400, -0x4142878200, -0x4083810400, -0x1163830c00, -0x127c90200, -0x1040860000, -0x10a3870810, -0x1c3830400, -0x2043820400, -0x2147c30608, -0xa147c70400, -0xa344cf8300, -0x1127870400, -0x21c3c70400, -0xc3820400, -0x1167870400, -0x142870400, -0x12448f0200, -0x1147820400, -0x10e3820c00, -0x1043860400, -0x5127870400, -0x2247810200, -0xa143830600, -0xc3c60800, -0x4081030200, -0x20c3860c00, -0x4143810200, -0x4083820400, -0xa143810400, -0x61c38f8400, -0x8343c78300, -0x51c7c30c00, -0x20e28f8200, -0x1043860800, -0x2047830200, -0x21c28f0400, -0x122870800, -0x20c28f0200, -0xc1e7cf8300, -0x1147c60400, -0x20c3820400, -0x10678f0600, -0x3082860600, -0x1127870600, -0x2147830600, -0x11678f0400, -0xa347830200, -0x4122870400, -0x2042860000, -0x147e70400, -0x127820800, -0x42860400, -0x41a2c60400, -0x2143820400, -0x2143820400, -0x2146870200, -0x2143830200, -0x11c3870400, -0x5147c38c00, -0x126830400, -0x8143810200, -0x2343810300, -0x2143860800, -0x4183c70600, -0x20c28f0400, -0x10c3860400, -0x4183830600, -0x147810200, -0x8143870400, -0xc082830200, -0x143830600, -0x1144870200, -0x41a7c70c00, -0x21c3870c00, -0x2146870600, -0xa3c7c70200, -0x8143830400, -0x61c3820408, -0x143ee0400, -0x1142830400, -0x1147c20c00, -0x5167830400, -0x8163c78600, -0x41a3c70400, -0x2247870200, -0x2142830400, -0x40c3030400, -0x8143830600, -0xc1820800, -0x20c3820400, -0x31c3820400, -0x41a78b0400, -0x1127c70200, -0x9163cf0400, -0x2042860400, -0x21c3870200, -0x30c7878c00, -0x1147870c00, -0x11e3820410, -0x1142870200, -0x1e3830400, -0x20c3830204, -0xa3820c00, -0x4083850200, -0x11e3c20800, -0x30c38f8c00, -0x4923870c00, -0x4103830600, -0x8347830200, -0x1124cf0600, -0x3c7810302, -0x41c2870200, -0x163830600, -0x143cd1200, -0x8143810200, -0x2146870200, -0x143820c00, -0x41c3878600, -0x6142c70200, -0xc1820800, -0x11428f8400, -0x6147870200, -0x3142860400, -0x8146870200, -0x11e7cf0408, -0x9167850600, -0x8142830600, -0x4122c70400, -0x2147870608, -0xa143e70200, -0x5163830400, -0x1c7810204, -0x8143810200, -0x143c60400, -0x2047830200, -0x20c3860810, -0x1162870800, -0x1123830400, -0xc1e6cf0200, -0x2143870400, -0x41a7830c00, -0x146870408, -0x2142870400, -0x8143c10400, -0x147c20410, -0x2142870000, -0x2247858100, -0x1146860400, -0xc3830400, -0x1042870800, -0x8143820400, -0x2142870200, -0x8142870200, -0x13678f0204, -0x40c2870200, -0x8143830200, -0x61e3830c00, -0x9367870400, -0x12247870200, -0x2142870200, -0x2246878300, -0x2142870200, -0x11478f0400, -0x2247810300, -0x6142870200, -0x91668f0700, -0x1a7c70400, -0xc183810200, -0x2143830600, -0x2143cd0200, -0x1147c78800, -0x963820c00, -0x41e3830408, -0x41c3870400, -0x143810400, -0x1167830408, -0x11a68f8400, -0x8142870200, -0x1167830800, -0x1c6830200, -0x11e3890400, -0x10e3830410, -0x9123cd9100, -0x143810600, -0xd163830408, -0x18265c48100, -0x4083820400, -0x4162870600, -0x2143020800, -0x1167ca0400, -0x51e78f0400, -0x2143830400, -0xa1478f8200, -0x4183810200, -0x41c3870200, -0x8143810200, -0x41c3810200, -0x143830604, -0x41c3830400, -0x1163870400, -0xc122cf8200, -0x8324c70200, -0x4141878400, -0x1127870400, -0x1142830400, -0xc3870600, -0x102830400, -0xa2860400, -0x822860800, -0x12247878100, -0x4083810200, -0x2147810200, -0x2143870200, -0x1628f0200, -0x8347c10600, -0x2144878100, -0x11e3830800, -0x1a78f0400, -0xa143870400, -0x2143830600, -0x1142860400, -0x2143830200, -0x4182870200, -0x11678f0600, -0x8143810600, -0x1123860c00, -0x143020408, -0x143830400, -0x2246870200, -0x4083030000, -0x10628f0400, -0x10326478300, -0x8147870204, -0x8143810204, -0x41c3810400, -0x2083820400, -0x8224c78200, -0x2342870200, -0x2142870600, -0x9324cf0600, -0x11c68f0200, -0xc3830200, -0x13366c78300, -0x40c3820400, -0x2043820400, -0x2142870200, -0x9344cf0600, -0x1143820c00, -0xa143c48100, -0x21c7830204, -0x20c3c60800, -0x41e3870608, -0x30478f0c00, -0x9367830408, -0x10c38e0800, -0x1167870608, -0x51e7870400, -0x243850202, -0x2142878200, -0x21c3870200, -0x2346870200, -0x50e2870400, -0x1123820c00, -0x51e7cb0400, -0x2043860400, -0x1a7830400, -0x264870200, -0x10438f0c00, -0x1162870600, -0x1163860400, -0x147838608, -0x12267c98300, -0x2147870400, -0x1127c90200, -0xa346c78300, -0x127cf0200, -0x4182870200, -0x51a28f0200, -0x1122860400, -0x4183850200, -0x2343870200, -0x247cc8100, -0x2143830600, -0x143c30408, -0x10c3860810, -0x10a3860400, -0x1e3820410, -0x8145850200, -0x61c3878600, -0x2247810200, -0x50c28f0400, -0x11e78e0e00, -0x11e3820c10, -0x81810400, -0x41b7870400, -0x11e68f0c00, -0x20c68f8600, -0x8122870204, -0x4083850200, -0x8143810204, -0x8343830200, -0x1c3830600, -0x2143830408, -0x12267810200, -0x123478f8300, -0x5127c70600, -0x2143870200, -0x2343810200, -0x142830400, -0x1122c70400, -0x238f0400, -0x10e3820800, -0x4143830400, -0x9126c70400, -0x2143810600, -0x2347850200, -0xa1468f8600, -0x1367cf8300, -0x4126c30400, -0x22478f0200, -0x1c3820c10, -0x41a7c20400, -0x51a6cf8400, -0x247830408, -0x123c30408, -0x1042870400, -0x2143810200, -0x11e7878c10, -0x143830c10, -0x2143860000, -0x30c38e0c00, -0x1142830000, -0x8103810200, -0x2143870600, -0x11668f0000, -0xc3820400, -0x8147870600, -0x2143830400, -0x41a7c70c10, -0x2247c90200, -0x21c3870408, -0x2343810400, -0x51e7870408, -0x20c7830400, -0x20c3820c00, -0x9126870400, -0x4167870608, -0x18143810300, -0x2143870400, -0x8327ce8102, -0x13678f0600, -0x8167c70408, -0x5163820400, -0x11638e0c00, -0x247c68100, -0x123820000, -0x810448f8000, -0x147830408, -0x164cf0200, -0x20c3870600, -0x6347cf8400, -0x2143870800, -0x143830604, -0x41c3860400, -0x1143860400, -0x21c3c60c00, -0x2146870200, -0x40c28f0200, -0x143820408, -0x1043860000, -0xb1628f8600, -0x181850200, -0x8162c70200, -0x8162870200, -0x1147c70800, -0x41e3830408, -0x41e2870400, -0x10c3820c10, -0x30c28f8400, -0x183820400, -0x1c3830204, -0x21468b0200, -0x127810200, -0x40c3870400, -0x4082870200, -0x2043820400, -0x143820800, -0x41c3830400, -0x1c3830600, -0x2143820400, -0x1124cf0600, -0x2083830400, -0x2043860800, -0x4083070400, -0x10e3c60800, -0x9366cf0200, -0x10243810200, -0x8143830608, -0x2147cf0200, -0x8143870604, -0x1143820800, -0x5143c70408, -0x204083870000, -0x8143c78100, -0x4147c28c00, -0x9167c78608, -0x8367c70600, -0x1a7878400, -0x8143810204, -0x8147830400, -0x247c08102, -0x4143820400, -0x10c2870400, -0x8146878200, -0xa144cf0200, -0xa147c78300, -0x1327cf8000, -0x2247830200, -0xa142870200, -0xc2870400, -0x20c7cf0400, -0x10e78f0400, -0x143820400, -0x143830400, -0x1a3890200, -0x9a7870400, -0x3167870600, -0x8107810200, -0x1c3830604, -0x2143870600, -0x1c3820c00, -0xa347c78c00, -0x40c3870408, -0x2347c60400, -0x4142870400, -0x11a38f0400, -0x1167c20400, -0x41c2870200, -0xc3820400, -0x12347870304, -0x1c6cf0000, -0x20c28f0200, -0x2342870200, -0x8367c50300, -0x2143830200, -0x2147c70608, -0x47810200, -0x50c3860c00, -0x2147890200, -0x21c6830200, -0x4122860c10, -0x8162860600, -0xa347cf8300, -0x2143c60400, -0x2143c30400, -0x2147870200, -0x10c3860800, -0x21c7ce8400, -0xd167cf0400, -0x143810200, -0x41a7860c00, -0x2143820400, -0x147810200, -0x83658f0200, -0x8183c30600, -0x2143870200, -0x4143c70400, -0x2147830600, -0x1143c60400, -0x8143810200, -0x2142870200, -0x142820400, -0x41a3870600, -0x1126cf0c00, -0x8127c08100, -0x10a2820c00, -0x40c2820400, -0x11678f0800, -0x1146830400, -0x1126870400, -0x183820400, -0x10e3820810, -0x2267810600, -0x1a4860400, -0x8142830200, -0x3147830400, -0x1167830400, -0x2147810300, -0x4183c30600, -0x2142870400, -0xa147810204, -0x8144878400, -0x8327c70600, -0x4142870200, -0x2142870400, -0x10267810200, -0x1142860400, -0x4143830200, -0x2347cf0200, -0x8143810200, -0x1c3830408, -0x1042870400, -0x3043870400, -0x1176fc38300, -0x2147cf0400, -0x107810200, -0x143820400, -0x143830400, -0x143870400, -0x2144870200, -0xa347870300, -0x3c3830200, -0x10c3820400, -0x1043070800, -0x2448f8100, -0x2147c70600, -0x8147c78200, -0x2143820400, -0x147cf0400, -0x10e7870800, -0x2147870400, -0x41a3870410, -0x1167820c00, -0x163810400, -0x21c7830600, -0x41e3860c00, -0x143810400, -0x10e38f0800, -0x8127cd0204, -0x1147c60400, -0x20c3870200, -0xc364cf8300, -0x1678f8800, -0x43c3830200, -0x2143870200, -0x41a2870400, -0x143830200, -0x40c3870c00, -0x167870608, -0x2142870400, -0x61c7cf0200, -0x21c3870400, -0x21c3830200, -0xe1418f0200, -0x41e3810408, -0x10c7830400, -0x2043c70800, -0x1143830400, -0x40c7870200, -0x9143820600, -0x2143c60400, -0x2143830400, -0x143810200, -0x41e3870408, -0xc3870400, -0x10e38f0c00, -0x1167890200, -0x20c68f0408, -0x4183830200, -0x41c3820800, -0x1162870800, -0x60c38f0200, -0x1163820400, -0xc2870400, -0xc3830408, -0x1163870800, -0x11448f0200, -0x8123c70600, -0x21c7c78400, -0x827870800, -0x2243830200, -0x83870400, -0xe2878c00, -0x1043860800, -0x2081020400, -0x2143cf0400, -0x4083830400, -0x11268b0200, -0x1327810200, -0x146870400, -0x41c3838c00, -0x10a3870400, -0x2183870200, -0x2147870200, -0x40c2870200, -0x4123820c10, -0x2142870400, -0x1144870400, -0x41e7830c10, -0x2147810600, -0x11628f8400, -0x142870100, -0x4123830408, -0x8143810200, -0x1043860800, -0x40838f0200, -0x2246c78300, -0x8143810204, -0xa1c3c30608, -0x51e2cf0400, -0x143820400, -0x11e2830400, -0x1126cf0200, -0x21c3c70400, -0x2143810200, -0x10c3860400, -0x143820400, -0x247810200, -0xc2820400, -0x163870608, -0xa147870600, -0x4143810200, -0x8367c10608, -0x61c3cf0600, -0x1127830400, -0x2143820400, -0x41c3870608, -0x8143850200, -0x2082870400, -0x22830000, -0x142870400, -0x20c3870400, -0x9167870600, -0x30c7ce0800, -0xa346878300, -0xa142870200, -0x143860400, -0x2147c70400, -0xc0c38d8b00, -0xa366870200, -0x10c2860400, -0x1147850200, -0x21c7870408, -0x21c3830400, -0x2147810200, -0x1e38b0408, -0x81268f8200, -0x11678f0400, -0x9267c70200, -0x20c3030400, -0x8307c68100, -0x31c7cf0600, -0x1147830c00, -0x2147870200, -0xe3820400, -0x31c38f0400, -0xe142870400, -0xa3448f0200, -0x8143830608, -0x167c30410, -0x3147878400, -0x10c3070800, -0x8123c50200, -0x2247878300, -0x1143830400, -0x10c3870400, -0x8327c10200, -0x41a7c70600, -0x2147830600, -0x20c3830400, -0x4347830400, -0x1167870400, -0x367870400, -0x41c7cf0000, -0x2147830400, -0x2143830400, -0x41a3c20c10, -0x41c3830400, -0xc3820400, -0x341810600, -0x2047810200, -0x8347c70204, -0xc3860400, -0x41c2870400, -0x143810400, -0x2167850600, -0x2247870600, -0x143820400, -0x4123c20c10, -0x2267870204, -0x1143860400, -0x40c3870800, -0x2143870200, -0x1142860400, -0x1143860400, -0x1c3870200, -0x1c3860c10, -0x2143c70400, -0x1163860400, -0x61c68f8400, -0x2142870400, -0x5166cf0600, -0x2264c70200, -0x2143820408, -0x2043820800, -0x8226c70600, -0x143830400, -0xc142870200, -0x4383030200, -0x8103850200, -0x1167c78c00, -0x10327cc8100, -0xa143830600, -0x8147870400, -0x10c3860400, -0x10c2870400, -0x41c2cf8600, -0x1163861800, -0xc28f8200, -0x10a3870800, -0x20e28f0c00, -0xa1c6878600, -0x2043830200, -0xa142860400, -0x147c38400, -0xa366cf8300, -0x8127850204, -0x2247878300, -0x2147870400, -0x2143820400, -0x11227c10200, -0x11e2870c00, -0x9a38f0800, -0x51a3850400, -0x9127cf0400, -0x2143820400, -0x11e2cf8400, -0x51a7820c10, -0x51e7cf0400, -0x8127c10408, -0x1c3820408, -0xa3820800, -0x8103810200, -0x127c30408, -0x10c3870800, -0xc3cf0200, -0x10c3820800, -0xa143870604, -0x3147870400, -0x9167890600, -0x8224cc8100, -0x2146830200, -0x1065860400, -0x61c7870200, -0x127820800, -0x1063820800, -0x5163820c10, -0x23c7c30600, -0x21e0830820, -0x142870400, -0x143820400, -0x2143c70200, -0xd367830c00, -0x4383010200, -0x6143830800, -0xa7cf0800, -0x8126870600, -0x2243810200, -0x8103830200, -0x33678f0600, -0x2061e28f8300, -0x2147810200, -0x11e68f0e00, -0xa347870200, -0x4083870400, -0x31c28f0400, -0x147830600, -0x183870400, -0xa143870200, -0x2143810200, -0x50e3860800, -0x6143820400, -0x27820000, -0x2042860400, -0x40c2cf0200, -0x2142830400, -0x143820400, -0x1167ce0400, -0x83e7830408, -0x1142830400, -0x20c38f0200, -0x10e3820800, -0x143850200, -0x4183870600, -0x10c3060800, -0x2147830200, -0x61c78f8600, -0x166830408, -0xa1c3820400, -0x8143850200, -0x143810200, -0x10e3820800, -0x91678b0400, -0x143820400, -0x3c7c30408, -0x61c3820400, -0x41c3820c00, -0xa347870200, -0x2143860400, -0x1142860400, -0xa346870200, -0x63cf0000, -0x11678f8c00, -0x23c7870200, -0x8347850300, -0x1127830400, -0xc102870200, -0x2247830600, -0x2147830408, -0x1123830410, -0x40c2870200, -0x9167830600, -0x2143870200, -0x127c70400, -0x8143870200, -0x1325cf8000, -0x20c78b0c00, -0x3142870400, -0x1e3c70408, -0x103810200, -0x143850200, -0x162870400, -0x1163860400, -0x10a3c60800, -0x167810200, -0x2143860400, -0x4125c70204, -0x2347870204, -0x1438d0204, -0x10c3830c00, -0x1043860800, -0x2143860400, -0x41c3878400, -0x123810408, -0x1163870600, -0x2142860400, -0x11e38f0c00, -0x2143870200, -0x41c3830400, -0x8142870200, -0x147850200, -0x147c60c00, -0x142860408, -0x10c28f0400, -0x21c6830200, -0x9127c70400, -0xc3cd0000, -0x8141810600, -0x20c3830400, -0x1e3861800, -0x21c7c70c08, -0xa143830600, -0x11e7cf0408, -0x30438f0800, -0x8343c10204, -0x40c3c60800, -0x1163820400, -0x41c7c10200, -0x8124870200, -0xc102830200, -0x20c3830400, -0x11668f8400, -0xc7c60000, -0x8143850100, -0x1c3820200, -0x1162870200, -0x2143830408, -0x11c3830400, -0x3043878c00, -0x9143830400, -0x19367cf8200, -0x4146870300, -0x9167870600, -0x4147810200, -0x10c3860400, -0x8147850200, -0x19324cf8100, -0x41c3850608, -0x1a2c70c10, -0x20c3820400, -0x2143830400, -0x2042860400, -0x127830400, -0x20c3820400, -0x1147c78800, -0x8102c78300, -0x2347870200, -0x41c3820408, -0x2047810200, -0x10226478300, -0x30c3870c00, -0x8163c20400, -0x20c3860c00, -0x20c78f0200, -0x1228f0200, -0x143830400, -0x142830200, -0x2142870200, -0x2243830200, -0x4083820400, -0x2043870400, -0x143810204, -0xe2820800, -0x1c3830408, -0x10a3820800, -0x81c3c30600, -0x8103810200, -0x41e3830c00, -0xa147e70600, -0x4141820800, -0x1163870400, -0x8102830200, -0x2146870200, -0x142870200, -0xe3830c10, -0x40c38f8200, -0x2143830400, -0x2142870600, -0x20c3870800, -0x143870400, -0x9367810600, -0xa143870200, -0x10e3830400, -0x12247870300, -0x20c28f8200, -0x61c3870200, -0x21c3830600, -0x8126830200, -0x2247c78300, -0x21e7cf0c00, -0xc143870200, -0x1146870400, -0x2143830600, -0x143830400, -0x51e3820c10, -0x10244c70200, -0xc3020400, -0x147810200, -0x10c3860400, -0x125c70400, -0x9367c78400, -0x41e3870810, -0x11278b0400, -0x144870200, -0x2147cf0400, -0x1067860400, -0x8143820400, -0x123820400, -0x8143810200, -0x21c7870408, -0x11668f0400, -0x21c2870200, -0x92244f8100, -0x167cf0200, -0x1127cf0400, -0x20c3810200, -0x21c3870600, -0x2143870400, -0x3166c70200, -0x11a68f0400, -0x10c7c61800, -0xa147870600, -0x9163830608, -0x143820400, -0x8167cf8300, -0x8107810300, -0x60c6cf0200, -0x41e7c78c00, -0x10c3820400, -0x1123830400, -0x2142870204, -0x4082870000, -0x8147830400, -0x2143870600, -0x4103810600, -0x4163850200, -0x2143820400, -0x8126c70200, -0x11c2870400, -0x21c3830600, -0x60c3cf0200, -0x2347830604, -0x4167830600, -0x10e28f0400, -0x4083820400, -0x2147810300, -0x147c30408, -0xc3c20400, diff --git a/samples/digitrec/digitrec/data/training_set_5.dat b/samples/digitrec/digitrec/data/training_set_5.dat deleted file mode 100644 index 02ff008fc..000000000 --- a/samples/digitrec/digitrec/data/training_set_5.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x7183899e00, -0x1f2020c00, -0x71818f1e00, -0xe3021800, -0x10e30e0000, -0x10c3030410, -0x19c3030c00, -0xe103010e00, -0xe2060000, -0x6181010600, -0x20c1021c00, -0x7103818e08, -0xf1c3819e00, -0x2181030400, -0x2083811c00, -0xe180811e00, -0x71c3810e00, -0x7187810e00, -0x7103010e00, -0x61c2819e00, -0x7181030c00, -0x11e3031c00, -0x39e6060608, -0xc183c08e00, -0xf3021c00, -0x3183890e00, -0xf38e0800, -0xf3060000, -0xc1c3810e00, -0x3081811c00, -0xf102038f08, -0x31e3070418, -0x7183c99e00, -0x1e3e3078f0c, -0x6103810e00, -0x11e3011c00, -0xe3020800, -0xe1c1c99e00, -0x18e3060c00, -0xe3860800, -0x10e3071c00, -0x21f38d0e00, -0x3183810c00, -0xe3010c00, -0x71e3031e08, -0xe2040c00, -0x31e30b0c00, -0xe3c3c08f00, -0x61c1010618, -0xf1e2070608, -0xf2021800, -0xe1c2488f00, -0x7103810e08, -0x10c3021800, -0x4163010c00, -0x3182811c10, -0x1e3890c00, -0xe103818e00, -0xe1020800, -0x3183050608, -0x6181010400, -0x11c2020c10, -0xe2070c00, -0x70e3890e00, -0x6103899e00, -0xe307808700, -0x3183020e00, -0x70830f0e00, -0x4081030c00, -0x43c2408700, -0x61c3c10c00, -0x3083810400, -0x6083030400, -0x3947020c00, -0x3080080c00, -0x71c7811618, -0x1f3030c00, -0xe183808700, -0x6183010e00, -0xc1e3010e00, -0xe183850e00, -0x10e3021800, -0x6081830c00, -0x31e3031c00, -0x71c1031c00, -0x10e10e0c00, -0xf383010700, -0xe30f0000, -0x1e3090c00, -0x7183811e00, -0x6003000400, -0x7083070e00, -0x41c1020e00, -0x7103cb1c00, -0x1f2020c00, -0x50c3020e00, -0x6103010600, -0x6081810c00, -0x70c1871c00, -0x7103810c10, -0x610a0000, -0x71c3818e10, -0x7183819e00, -0xe1e7809e00, -0x30e3021c00, -0x4103819e00, -0x3903030c00, -0xb1e3038e00, -0x6183808f08, -0x71e1811e00, -0xe103c08e00, -0xe10e0000, -0x6083811410, -0x31c7810e08, -0x30e30f1c00, -0xe2020800, -0x61c3010208, -0x70c3070c00, -0x38810f0c00, -0xc203808e00, -0x7181090e00, -0x30c3811c00, -0xe143810e00, -0x187480200, -0xf3021c00, -0x11e3020c00, -0x71e3070c00, -0x4081810c00, -0xe207c08f00, -0x6186070300, -0x6183850e00, -0xf3e3030e08, -0xf107808f08, -0xf187800218, -0x10c10b0800, -0xc1c3010600, -0xe103010e00, -0x6183810e00, -0x1060c08f0c00, -0x61e3030e18, -0xc3060800, -0x1c3020208, -0x7183850e00, -0x41c3031e00, -0x11e2021410, -0x79c3878e00, -0xe30b1c00, -0x11c2030c00, -0x7183010e00, -0x6183810600, -0x71818b1c00, -0x6083810c00, -0x40c3810c00, -0xf1e3070e18, -0x10e1890c00, -0x6103808e00, -0x11e7810e00, -0x6081810e00, -0x60c3810e00, -0x7183818e00, -0x6183c58e00, -0x3183011e00, -0x2181030600, -0xf102070708, -0xe103810e00, -0x3183010400, -0x7183810e00, -0xe1c3c18e08, -0x31c3090e00, -0x6101030600, -0x10e3821c00, -0x41c3030604, -0x2183030204, -0xe183838300, -0xe143810e00, -0xe107818600, -0x4083810c00, -0x6181c10c00, -0x7183070e00, -0xe103810608, -0xf3020c00, -0x6181850e00, -0x7181831c00, -0x41e3020c10, -0xe1c3858e00, -0x31e3010e18, -0xe2040800, -0x31f7020e08, -0xe10e0800, -0x31c3070400, -0xc103810e00, -0xc301808f00, -0x610e0800, -0xe30a0c00, -0xe103818e00, -0xe103810e00, -0x60e2020c00, -0xf1c3c89f00, -0x63c781830c, -0x6181811e08, -0x10c3021c00, -0x6083010410, -0x65060000, -0xe101010e00, -0x71c3030c10, -0x3103030c00, -0x6181810e00, -0x4183010200, -0x1f3070c00, -0xc3030410, -0x638e0000, -0x6182020600, -0x6081010c00, -0x6183810e00, -0x20e2031e00, -0x1c3c7c08f00, -0x7183030e00, -0x6103030600, -0xe303810e00, -0xf3c1010e08, -0x71c3810608, -0xe103018e00, -0xe10e0800, -0x71c3031c00, -0xe2020800, -0x78c2030c00, -0x40c3060200, -0x71c206020c, -0x39c38d1e10, -0xc3c3c08e00, -0x7183050e00, -0x7387c08f08, -0x18e1021c00, -0xe307c08f00, -0xf1e781020c, -0xf103818e00, -0xf307848f00, -0xe38f0000, -0x61e1810e08, -0xf3060c00, -0x6103808610, -0x6081810c00, -0x6183818e00, -0x3982020c00, -0x11f7070c00, -0x7183010e00, -0xf103818e00, -0x3083090c00, -0x7183c39c00, -0x7083030c00, -0xf3e7070e18, -0x70c2060c00, -0x10e0890c00, -0x3083031c00, -0x1e3811c00, -0x71c3858e08, -0xf1c3c50e00, -0xf103c08f00, -0x7182070c10, -0x7183891c00, -0x71c2070e00, -0x10e18f0800, -0x7181811c00, -0x7183810e00, -0x7103808e00, -0x21e6020608, -0xf3070800, -0x30c38f0800, -0x10c3020408, -0x1e2021800, -0x19c2060c00, -0x21c6000200, -0x1f2020c00, -0x3081810c00, -0x6101010c00, -0x6102070204, -0x6183810c00, -0x30c3010e00, -0xf103810e00, -0xe181030208, -0x71c3811e10, -0x30c7810e00, -0x10c3821c00, -0x31c3891e00, -0x6183810c00, -0x7183891e00, -0x3083050a00, -0x7183c19c00, -0x7183030600, -0x7183030e08, -0xe1060c00, -0xf3040c10, -0x7183011e00, -0xc100808e00, -0xe100888f00, -0x1f20a0c00, -0x6103878e00, -0x6103810c00, -0xf303c19e00, -0x10c3040c00, -0x1e3010c00, -0x10c10e0c00, -0x7983030c10, -0x7183889e00, -0x61e2020e08, -0x1c3cc8000, -0x7183808e00, -0x71c2020418, -0xe183818e08, -0x6183018e00, -0xe1c3818e00, -0x7083870c00, -0x4101810200, -0x3183010e00, -0x1e385020c, -0x11c38b0c00, -0x6183810c18, -0xe183038e00, -0x39c2020e00, -0xe30e0000, -0x3983890e00, -0x7103c19e00, -0xe3020c00, -0x7183030c00, -0xe18f0400, -0x7183810600, -0x1f3020e00, -0xf3070410, -0x3183890c00, -0x30e3811e00, -0x7183891e00, -0xf1e3831e10, -0x6180810e00, -0xe1c6409f00, -0x71c1030c00, -0x10e3811c00, -0xe1c3878e08, -0x10e3070c00, -0xe1c7810e18, -0x3387818700, -0x30c1031c00, -0x70c30f1e00, -0xf183810e00, -0xc181c78f00, -0x31c3890e00, -0x7103818e08, -0xe103010600, -0x20e38f1c00, -0x30838f0400, -0x1081050c00, -0x2102070100, -0x7103810e00, -0xe3030400, -0x7182031e00, -0x71c3851e00, -0xf103810e00, -0x11c3811c00, -0x7103810e00, -0xe30e0800, -0xf1c3870e00, -0x6103819608, -0xf183c08e00, -0xf187c08f08, -0xe181818e00, -0x70c3021c00, -0x7083850a08, -0x60c10a0c00, -0x1e303808f00, -0xc103810e00, -0x2081870c00, -0x6183858e00, -0x6181850e00, -0x63020c00, -0x1e3c0c08e00, -0xf183050e08, -0x71c3831e00, -0x7183890e00, -0x71c3071e00, -0x18830e0800, -0x61c3c19e18, -0xf3020c00, -0xf101898f00, -0xe3010c00, -0x6183810e08, -0xe08f0800, -0xf2060c00, -0x7183811e18, -0xe081810e00, -0x71e7818e08, -0x7187850c00, -0x31e3850e10, -0x1e1021c00, -0x7183810e00, -0xc183c4810a, -0xe103c08e08, -0x7102020410, -0x7103030e00, -0x30818b1c00, -0xf102070700, -0xe307c08608, -0xf1e3859e08, -0x1e3c2488f00, -0xe2040800, -0x2183050200, -0x7081811e00, -0x7883031e00, -0x60e3070608, -0x7181811e00, -0xe3890c00, -0x6183850e00, -0xf103819e00, -0x71c3810e00, -0xc107c38700, -0x6103818200, -0xe183810608, -0x6103809e00, -0x2081810c00, -0x70c3811e10, -0x6101010600, -0xe307cc8700, -0x7143011e00, -0x6183810e00, -0x31e3811e00, -0x3387819e00, -0xc143810e00, -0x1f2020c00, -0xf183011e00, -0x43e7818700, -0x3083870c00, -0x71838c8e00, -0xe182808f00, -0x7081031e00, -0xe103848e00, -0x73e3030e18, -0x6180898e00, -0x31f2060c10, -0xf303850e00, -0x2181010208, -0x30c1831800, -0xe1c0489e00, -0x70818d0e00, -0x11e2031e00, -0x6183070304, -0x7181010c10, -0x6081890e00, -0x3081020410, -0xf1c3c48f0c, -0x30c3010e00, -0x11e3031c00, -0xf30e0c00, -0xf183810e10, -0xe3090800, -0xe1c3810e08, -0xf143819e00, -0x60c3030e00, -0x6183810e00, -0x71c3010618, -0x6181010600, -0x41e2070200, -0xf30a0c00, -0x71c3878e00, -0xc103808e00, -0xb101819e00, -0x1f3010e00, -0x2081030c00, -0x4103810600, -0x31c3810e08, -0x2183810e00, -0x1e7810c00, -0x30c18e1c00, -0xe1021c00, -0xe103848f00, -0xe1021c00, -0x63c7c58f08, -0xe3c3c58608, -0x30c10d0e00, -0x30c38b0c00, -0x7081031e00, -0xe10e0800, -0x1e2020c00, -0x71830b0c00, -0xf101030e08, -0xf3e3039e18, -0x4343808b08, -0xe103818f00, -0x10c3020c00, -0x40c2060200, -0x61e2060608, -0x40c1030c00, -0x6103810e00, -0x6181811e00, -0x11c2010a08, -0x3883011c00, -0xf3021c00, -0xf103819e00, -0x7183811c00, -0xe103818e00, -0xe103850200, -0x11e2020c00, -0x10c3031c00, -0x3083831c00, -0x30e30e1800, -0xe30e0000, -0xe2020c00, -0x79c202060c, -0x11c20f0700, -0x71c7070e00, -0x7183811c00, -0x3083090e00, -0xe3070000, -0x3183020410, -0xe103c08e00, -0x60e1011e10, -0xe103810600, -0x7183810e00, -0x18387c08700, -0x7183890e00, -0x71c3831e00, -0x71838b0e08, -0x1f2030410, -0xe3c3c89f00, -0x6103810e00, -0x7103810e08, -0x1e2020c00, -0xe100810a00, -0xe30e0800, -0x70c1c11c00, -0xc180810600, -0x60c0810e00, -0xf107809e10, -0xf3020800, -0x7183890c00, -0x7187811c10, -0x7183810600, -0x7183810618, -0xe183808f00, -0xe1831c00, -0xf1c3850e00, -0x1e1011c00, -0x30c3831c00, -0x73c7818e00, -0x11e38f0c00, -0x71c3c19f00, -0x7183818e00, -0x10e2020410, -0x1e7010218, -0xe1c2c89310, -0xe3c3c48f00, -0x618e0000, -0x7183818e00, -0x61c7c18e00, -0xe1e3010e08, -0x1c4040604, -0x61f3030e00, -0x7183850400, -0x13c6060608, -0x7183031e00, -0xe3020800, -0x79c1031c00, -0x7183811c00, -0x1e3831c00, -0x7103810e00, -0x6103010c00, -0x6103810e00, -0x6083810c10, -0x1060c7870c00, -0xc103808700, -0xf303808b0c, -0xe3c3810600, -0x7103810e10, -0xe2c6488e00, -0x7983030e00, -0xe30f0000, -0xf103810e00, -0xe2020c00, -0xc1c260cf00, -0x7183010e00, -0xe181810e00, -0x7183810e00, -0xe107850604, -0xe287c48700, -0x7183070e10, -0xe103010e00, -0xe103010e00, -0x11e3870c00, -0x10c0040c00, -0x31e3811c00, -0xf307808f00, -0xf20a0000, -0xe103818e00, -0x20c38b0800, -0x61080800, -0x3183030600, -0xc3c7c48e00, -0x11e2060c00, -0x71f3030e00, -0x71c3011e10, -0xe307818f08, -0x7101890e00, -0x6183858e00, -0x6183818e00, -0xe1e3409e00, -0x6183030600, -0xc101810600, -0xe2020c00, -0xe103818e00, -0x31e3890e00, -0x10c3011c00, -0x41e3810c18, -0xe10a0c00, -0xe107808700, -0x31c3030c00, -0x3183898e00, -0x6183810608, -0xe2041800, -0xe30e0800, -0xf1e3010e00, -0x1e303808f00, -0xe1e3031e00, -0x1e2020c00, -0x62060800, -0x7183850c10, -0xf183030600, -0xe2060800, -0x71e3071e00, -0xf3e3859f08, -0x7183810e00, -0xe187810e08, -0xf3870800, -0xe3021800, -0xe207c08f00, -0x71e3031e10, -0xe180899e00, -0x7182030e10, -0xf1c384811c, -0xe103818e00, -0x1e207808f00, -0x3083011c00, -0x60e3870c00, -0x7183810e00, -0xe103010e00, -0x31c3811e00, -0x21c3030608, -0xe38f0000, -0x71e3811e00, -0x3183020c00, -0x31c3010c10, -0x41c2030e00, -0xe183858e00, -0x7081020c00, -0x71c3899f00, -0xe2021c00, -0x71e3011e00, -0xe103818f00, -0x6183870e00, -0xe103810e00, -0x1e7810e00, -0x1e3871c00, -0x70c3831c00, -0x6183810e00, -0x71c1811c00, -0x1e303808700, -0x1e3831c00, -0x6183808e00, -0xe30e0000, -0xe1c2818e00, -0x83181011c00, -0xf183899e00, -0x10c2020410, -0x7083811c00, -0xe7870c00, -0xe307c08e00, -0xf183c48e08, -0x31c2030c00, -0x10e3020c00, -0x3083810e00, -0xe1c303060c, -0x7183850e00, -0x7183810c10, -0x1f207818f08, -0xe103810e00, -0xe103828600, -0x6183818e00, -0xc183878600, -0xe103810208, -0x3e2060608, -0x18c2070e00, -0x6183810e08, -0x71e3890c00, -0x10c3021c00, -0xc307038700, -0xf103848e00, -0x31c7818a18, -0x61c3849e00, -0x6183818e00, -0xc103808600, -0xe107ed9e00, -0x7183831c00, -0x6183810c00, -0xe3070800, -0x70830e1c00, -0x6183810c00, -0x1c2040400, -0x7187810630, -0x6183878608, -0x61c3810e00, -0x4181818e00, -0x71e3031c00, -0x20e1021c00, -0x6307c08e00, -0x10e3830800, -0x71c3011e00, -0x60c1020c00, -0x61f2060c00, -0x30c38b0c00, -0xf103c09f00, -0xf101011f00, -0x7183031e00, -0x7183848e00, -0x71c3c99e00, -0xf1e7818e08, -0xf30b0c00, -0x7163810e00, -0x79e3071c00, -0x7381890e00, -0x1e3031c00, -0xf182870e00, -0x6183010208, -0x18c1020c00, -0xe181898f00, -0xc143870600, -0x71e3831e10, -0x7181c18e00, -0x71e3011c00, -0xf38f0c00, -0x6181810e00, -0x610e0800, -0x71e3091e00, -0x6183818600, -0x20e30b0e00, -0xe181c88f00, -0xe3c7c88f0c, -0x3183811c00, -0x71c2030608, -0xe38f0000, -0x61c2418e00, -0x6103810400, -0x7383c88f00, -0xe307c18e00, -0xf1c3858e00, -0x1e3031c00, -0xc181850e00, -0x7182070e00, -0x6183030608, -0x6103810e00, -0xe3c7c18e08, -0x6003010600, -0xe3871c00, -0x70818d1e00, -0x3183891e00, -0x7104070700, -0xc103808600, -0xe103808104, -0xe2030800, -0x71e3031e00, -0x30c3811e00, -0xe103810608, -0x70e10f0c00, -0x41e3030410, -0xe307818700, -0xb902010e08, -0xe180810c00, -0x70c30f1c00, -0xe183810608, -0x61c3818e00, -0x11e3870c00, -0x6103070e00, -0xe3890c00, -0x7081850c00, -0xe18e0000, -0x71e3010618, -0x19c2020c10, -0x6081830c00, -0xe30b0c00, -0x61021800, -0x7103810e00, -0x71e3010e08, -0x1e7030c00, -0x21c3c09f00, -0x7183810e00, -0xc101810c00, -0xf107818e08, -0x30418e0c00, -0x7a07818e00, -0xe107818e00, -0xe1c3858e00, -0x1e3c88200, -0x3083811c00, -0xe307818f00, -0x7183070600, -0xe20a0c00, -0x4081010c00, -0x40e3030c00, -0x7107810e08, -0xe183810e18, -0x6103810e00, -0xc103010600, -0x78e10e0600, -0xe38e0800, -0x6181810e00, -0xe30e0400, -0xe307c08704, -0x7103808610, -0xe28f0400, -0x40c2020208, -0x11e1890e00, -0x78c1071c00, -0x31e2020c00, -0xe1020800, -0x7983811c00, -0x6103809e00, -0x6183818e00, -0xf3070c00, -0x7183811e00, -0x7183010c00, -0x41e3070e00, -0x6103010e00, -0x11f3810c00, -0xc103818e00, -0x7183011e00, -0xf3070800, -0x10e10e0c00, -0xf3030c00, -0x1e30b0c00, -0x10e3020c00, -0xc102030e00, -0x71e30b0e08, -0xe101818e00, -0xf103050e00, -0x6183010600, -0x1e7010600, -0x10e30e0800, -0xe143838708, -0x10e3021800, -0x61e2020c00, -0x1e3030c00, -0x71c3091e00, -0x7183810e00, -0x3182020408, -0x7183898e00, -0x10c3020c00, -0xf20a0800, -0xf103808e08, -0x7083810e00, -0x7183811c00, -0x20e3890e00, -0xe3020410, -0xf1c3830e00, -0x7183070600, -0x7107809f00, -0x30c3821800, -0xe3c3808f04, -0x7082020c00, -0x30c1010c00, -0xc101810600, -0x7083030e00, -0x7183011b00, -0x31e3070c00, -0x11e2020418, -0xe3060c00, -0x7107810e00, -0x40e38f0c00, -0xf106078300, -0xe183810e00, -0xc182818f00, -0x40e3020410, -0x3083810c00, -0xe3060400, -0x11c10b0c00, -0x6081020600, -0x6081010400, -0x1e38f0c00, -0x3183c19e10, -0x30838f0c00, -0xf181810e20, -0x6183810e00, -0x6103810600, -0xc1c0810c00, -0xf1e1810e08, -0x6083810e00, -0x71838f0e00, -0x630e0000, -0xe1c0c11e00, -0xf103878e00, -0x71c0891e00, -0x7103010e00, -0xc183810102, -0x33e7030e08, -0x6183810e00, -0x31c2060608, -0xe103010e00, -0x61c3851e00, -0x11e3830c00, -0x7183850c00, -0x2083010c00, -0x71e3810e00, -0xf103018f00, -0x71c3020410, -0x6083070600, -0x41c0810e00, -0x11f30f0c00, -0x71c3810e08, -0xf102838c00, -0xc101898e00, -0x61c3070e00, -0xf103070e00, -0xe383818e00, -0xe3030800, -0x3083810e00, -0xe101810e08, -0x6000000e00, -0x7103010e00, -0x7103810e00, -0x6103010a00, -0xf183811e18, -0x6101810e00, -0x6183010e00, -0xe1e3818f00, -0xe307c08f00, -0xe3020800, -0xe38f0c00, -0x71c38f1e00, -0xe3020c00, -0xf183c09f10, -0x71e30f0c00, -0x618f0000, -0x71c3031e00, -0xe10a0800, -0x6183c48e00, -0x61c2810e00, -0x30e38f0c00, -0xe101810208, -0x3983030c10, -0x11c3050e00, -0x3082060208, -0xe183838e00, -0x61c3cf8e00, -0x6102878e00, -0x7081020c00, -0xe1a1811e00, -0xe3c440890c, -0xe3c3818e00, -0xf301810e00, -0x60c3810c10, -0x7103010610, -0x3083020c00, -0xe30e0400, -0xe180810600, -0x79e3030e00, -0xf1e7c0811c, -0x6103c58e00, -0x10630e0c00, -0x1c2420408, -0xf383c08f00, -0x71c18f1c00, -0x6183811e00, -0x710e0800, -0x3083020c00, -0xe30b0c00, -0x10c2020410, -0x1081c31c00, -0x7303c08e08, -0x6103010e00, -0xe207c08f04, -0x10c3870c00, -0x6182850e00, -0x71c3810e08, -0x71e6070c00, -0x7183890e00, -0x18301828700, -0x7183811e00, -0x41e3811e10, -0xe303808f00, -0x11c2020618, -0xf18040df00, -0xe2021800, -0x7102030e00, -0xe103c19e00, -0xf347878e00, -0x6083810e00, -0x3947810e00, -0x71e38f1e00, -0x71c3811e10, -0xe103818e00, -0xe2020c00, -0x7102030608, -0x7107010e00, -0x7181011e00, -0x4101810400, -0xe10a0c00, -0x3187810e10, -0x6162060e08, -0xf2020c00, -0x6103810200, -0x1f3020c00, -0x61c3030c18, -0x6183810e00, -0xe3811c00, -0xe103818e00, -0x410e0000, -0xe101010218, -0x6103818e00, -0x71c3020600, -0x71c3850e08, -0x1c3850204, -0x60c1810c00, -0x2083810c00, -0x3083811c00, -0x30e1021c00, -0x6183810e00, -0x31810f0c00, -0xe387c08f00, -0x20c1000400, -0x4103810e00, -0x1f3070c00, -0xe1c0818e08, -0x6180810e00, -0xe38f0c00, -0x7183811e08, -0x7183c09e00, -0x10e3061c00, -0x6183810e10, -0x6103818e00, -0xe3cf8000, -0x30c3031e00, -0xc1c2408e00, -0x7083891e00, -0x11e3021c00, -0x39e3071e08, -0x61c0919e00, -0xf107c19e00, -0x39e3031c00, -0x7083891c00, -0x6183810e00, -0x60e3831c00, -0x79c2020e00, -0x6183010600, -0x1e1c3c08f08, -0xe3821c00, -0xe301808f00, -0x7183011c00, -0xf1078d8e08, -0xf1e7890e08, -0x7183810e00, -0x3081010c00, -0xe1020800, -0x6083810e00, -0x1e7808200, -0xe38e0800, -0x30c1090c00, -0xf3c3c18e08, -0x6181810e08, -0x73c7850e00, -0xf1c3010e00, -0x71e10a0c00, -0xe383838600, -0x18e3060c00, -0x18e3810c00, -0x71e78d8e00, -0x10e3811c00, -0x7183810c00, -0x31c7850e00, -0x7183811e00, -0xc203c08700, -0x60e30b0c00, -0x6183870e00, -0x7383818f08, -0x73c3030f00, -0xe183810e08, -0x1e7811e00, -0x1f3070c00, -0x3182020e00, -0x61060000, -0x61f2020418, -0x7183010e00, -0x70c3811e00, -0x4181010c00, -0xe30b1c00, -0xe103058700, -0x6103810600, -0x3982020c00, -0x11e30f0000, -0x6183810e00, -0x6103810e00, -0xf187819610, -0xe101010e00, -0x30c3870c00, -0xf1e3031e00, -0x73e3031e18, -0xe107848708, -0xc3c7878704, -0x6081020e00, -0xd9e3010a08, -0x10e30f0c00, -0xe3020c00, -0x6183810e00, -0xf187c08f08, -0x71c3851e00, -0x31e3021c10, -0xe3840800, -0x10e1090c00, -0x3083870800, -0x60c1020c00, -0x31e3831e10, -0x10e18f0000, -0x6183810e00, -0x61c3810e08, -0x11e3071c00, -0x7183090e00, -0xe181810e00, -0xe3060c00, -0x7183010200, -0x1e3030c00, -0xf107c19e08, -0x71c7810e08, -0x6183810e08, -0x30c38a1800, -0x7103010e00, -0xe183c48f00, -0xe183810a08, -0x21c2020608, -0xf101011e00, -0x7181810c00, -0xc103810608, -0x7183858f00, -0xe3811c00, -0x7103811c00, -0xe103808f00, -0x71e3070e00, -0x7083010e00, -0x23cc0000, -0x6103878700, -0x30c5cf0c00, -0x71c3030e10, -0x7183021c10, -0x4103818204, -0x7107808e00, -0x1e3c1808f04, -0x163020418, -0x10e30e0800, -0xe103808f00, -0x7081891c00, -0x6182818e00, -0xe103818e00, -0xe383808700, -0x10e1021c00, -0x1e2020410, -0x10c3030e00, -0x18180c08e00, -0x1e2060c00, -0xf183810e08, -0x7187810e00, -0x38c3811c00, -0x30c3071c00, -0x7183010220, -0x31e3850e10, -0x7183819e00, -0xf123810e08, -0xc101810600, -0xc1e3808f08, -0x1e3c3c48f00, -0x21e3010c00, -0xf183818e00, -0x7102030e08, -0x11e3899e00, -0xe38f0c00, -0xf3060c00, -0xf183c89e00, -0x2183010408, -0x71e3811e00, -0x6183838600, -0xe3c2848700, -0x70c38f1c00, -0x6183810e00, -0xe103810e00, -0xe1c3858e00, -0x7103010e00, -0x7081811c00, -0x31c3030210, -0xc183c18600, -0x6103810e00, -0xe10d0400, -0x6081030600, -0x7181810e00, -0xe183818e00, -0xe183858e00, -0xa1c0818e08, -0xc101810e00, -0x7183809e00, -0x1e181c88f08, -0xe1c3818f00, -0x6182030600, -0xe1c3808f08, -0x7103810e00, -0xe3811c00, -0x6182808e00, -0x31c38f0c00, -0x7183030e08, -0x7083811e00, -0xc101808f00, -0x71c1030c00, -0x31c3891e00, -0xe101810e00, -0xf1e3850e00, -0x1f2060c00, -0x10c7010400, -0x7183010e10, -0x6183810e00, -0xe3070c00, -0x70c3831c00, -0xe103810a08, -0x71e3871e00, -0xf181091e00, -0x61e3030e10, -0xe3821800, -0x7083811c00, -0xe3031c00, -0x1e303808f00, -0xf103819e00, -0xe183c08f00, -0x71c3011e00, -0x3083811e00, -0xe2021c00, -0xf101c98e00, -0x30818e0800, -0xf30f0800, -0x7183830e00, -0x30c2060608, -0xf1c3819e00, -0x31c3810e00, -0xe347488308, -0xe2020c00, -0x71818f0e00, -0xc183808f00, -0x1f2060408, -0xc30a0800, -0x7183050e00, -0x71c3811c00, -0x6183c18e00, -0xc103818600, -0x6103030218, -0xc247c48f00, -0x61e3070e08, -0x1e3810c00, -0xe38e0000, -0x6101810600, -0xf3021c00, -0xf081850e00, -0x70e3031c00, -0x7183810c00, -0x7103811e08, -0x71c3031e10, -0xc1e0488e00, -0x71c1850e00, -0x1e1031c00, -0xf3060c00, -0x71a7c18e00, -0xe3020800, -0x71e3050700, -0x6103030c00, -0xe103810c00, -0xe347818f04, -0x10e30e0c00, -0x30c3071c00, -0xc103808e00, -0x6083010400, -0x11e3890c00, -0x11e3060c10, -0xe3080c00, -0x1e3031c10, -0xf103030e08, -0xf181818e00, -0x6183030600, -0x61e3021c10, -0x71e3091e00, -0xe3c3c18e08, -0x6181030c00, -0x7183810e00, -0xe183010e00, -0x81f20f8100, -0xfbc7010e08, -0x71e3031e00, -0x11c3030600, -0xf30e0800, -0xf307818704, -0x6181850e00, -0x71e3811e00, -0x21e3810c10, -0x30c3811c00, -0x7022060408, -0xe3c2c78e00, -0x7081031c00, -0x30818d0c00, -0x31e3010e08, -0x61e3830c00, -0x7103010e00, -0x73c3c88f00, -0x6183810e00, -0xe1c2c18e00, -0x23840000, -0xe3021c00, -0xf102408f00, -0x30e38f0c00, -0x1e3810c00, -0x38c3071c00, -0x71020b0e10, -0x61c0409e00, -0x71e3811e00, -0x3081010400, -0x71c3020608, -0xe10f0400, -0xe103838e00, -0xc3c7848700, -0x6102070600, -0xc180810e00, -0x6083850c00, -0xe3890e00, -0xe1e38f8e00, -0x31e18f0400, -0xf1c3811e00, -0x7183810c00, -0xc207c08600, -0xf3060800, -0x30e18f1800, -0xe10a0c00, -0x3083031c00, -0xf103850e00, -0xe181c48e00, -0x31c2070610, -0x7183811e10, -0x1f2060608, -0x1e7010e00, -0x71e3810c10, -0xe103818e00, -0xe3020800, -0xe303848f00, -0x41c3810610, -0x19e30b0c00, -0x20a3020c00, -0x30c3810e00, -0x41e7810c30, -0x7101010e00, -0x11e3071c10, -0x3946060e00, -0x6181898e00, -0x6103808e00, -0xf982020e08, -0xf181cf9e00, -0x30c3070c00, -0x38e7010e00, -0x4303808600, -0xe307c08f00, -0x8b183871e00, -0x30a3070c00, -0xc3c780830c, -0x18c3011c00, -0x6181810c00, -0xf383c09f08, -0x7187811e00, -0x3983011e00, -0xe2010000, -0x6307c08e00, -0x79838f8e00, -0xe1811c00, -0x7183031c00, -0xe3020800, -0xf103850e00, -0xf103899f08, -0x6183810e00, -0x61e3050e08, -0x31c38f0c00, -0xe103858600, -0xe3030c00, -0x41c2070600, -0x7183030e08, -0x7081020c08, -0x51c1811e00, -0x31c3018e00, -0x71c2070618, -0xe183848a00, -0x3083c89e00, -0x10c1810c00, -0xe2021c00, -0xf387c48e08, -0x3082020c00, -0x7183818e00, -0x10c3870c00, -0x7180899e00, -0x6083030410, -0xf1061800, -0xe103818e10, -0xf3060800, -0x1e2020418, -0xc101810e00, -0xc1c3010104, -0xe30e0800, -0xc1c3030e00, -0x61e3818f00, -0x71c3871e00, -0xe1c3850e00, -0x7181050e00, -0x70c1821c00, -0x3083010c00, -0x6183808e00, -0x6183010e00, -0x6183850e00, -0x31c2020c00, -0xe38f0800, -0xc102838e00, -0xf103809e10, -0x7883811e00, -0x1e48f8700, -0x23841000, -0x4081810200, -0xc103c08600, -0x2083820c00, -0xe10e0000, -0x6183070e00, -0xc1020800, -0x71e38f1e00, -0x71030b1e00, -0x4101030600, -0x3183810c00, -0xe2020c00, -0x33c3819e00, -0x11e7070400, -0xe1c3418e00, -0xc1e3810e18, -0x70830f0e00, -0xe181898e00, -0x31e38b1e00, -0xf103810e00, -0xe183808e08, -0xc1e3848e00, -0xf203010f00, -0x38820f0c00, -0xe103808e08, -0x6083010e00, -0xe30e0c00, -0xe30e0000, -0xf103811e10, -0x6081810600, -0x3083090c00, -0xf303899f00, -0x71e3090e00, -0xe103810e08, -0xe3020800, -0xe163818e10, -0xe183810e00, -0xe18e0000, -0xe183808700, -0x31c3030e00, -0x71e3030e10, -0xe103810618, -0x6081810c00, -0x60c3c70e00, -0x630e0000, -0x71e3031e00, -0x7103811e00, -0x6103810e00, -0xe3091c00, -0x18e3020c00, -0x38818d0c00, -0x23c7878304, -0xe3830c00, -0x31e3011c00, -0x6183810e00, -0xe3070000, -0x1f3021c00, -0xe3060800, -0x7103858600, -0xe303808700, -0xe103808700, -0x1c7010204, -0x173020e08, -0x38c30a0c00, -0x6103818e00, -0x3f7070208, -0x61c3010600, -0x10c3031c00, -0x71e3078f00, -0x30e3001c00, -0x30c1021c00, -0x7983030410, -0xe3060000, -0x60c0810e10, -0x7082060410, -0x31c2060c00, -0xe183808f00, -0x1f3e7c18f08, -0x62040000, -0xe1c7c08e08, -0xc181810e00, -0xf3c3818f0c, -0x3083010c00, -0x1e3090e00, -0x70c50d0e00, -0x7103898e00, -0x6183010e00, -0x10818f0c00, -0xf183810e00, -0x11e3890e00, -0xf3c3878f00, -0x61c2810600, -0x7102070600, -0x7081850e00, -0x7103810e00, -0x100810400, -0xe1c781810c, -0x11e3011c00, -0xe183850600, -0x71e38d1e00, -0xe18f0800, -0x1f3021c00, -0x610e0000, -0x6183850600, -0x7107818e00, -0x1e7010e00, -0xe30e0800, -0xe2020c00, -0xe103810e00, -0x3883010c00, -0x11e30b0c00, -0xc103c08e00, -0x4000010c00, -0xe103810e00, -0x73040800, -0xf3060000, -0xc1c0c19e00, -0xc103810e00, -0x6181010e10, -0x61818d0e00, -0x1e7c89200, -0xe103810600, -0x6083010e00, -0x1e3050600, -0x71c2811c00, -0x7101010600, -0x338603030c, -0x7182810e00, -0xf3c3819e10, -0x1f3030e00, -0xc183808e00, -0xf383808338, -0x71c3811e00, -0x61061800, -0x6081810c00, -0xc182c08e00, -0xf303818f00, -0x3081030c00, -0x70e58f0600, -0x3183030e00, -0x6161010e00, -0x71a3810e00, -0x1163010c00, -0x38c2020408, -0x40e3811c00, -0x6183811e00, -0x7081031800, -0xe30e1800, -0x3182870600, -0x31e3811e00, -0x1c380808f00, -0x3983021c00, -0x3c7818100, -0x7003810e00, -0x1e3011c00, -0xf1e3031e1c, -0x4103810600, -0x79c3031e00, -0x1f3030c00, -0xf303899f00, -0x6143850600, -0x11e3030c00, -0x79c3070e00, -0xe183808f0c, -0xe180810e00, -0x1e3001e00, -0x7183030e00, -0x11e30f0c00, -0x73e3818f18, -0x7182031c00, -0x7103850e08, -0x20e3831c00, -0xe2060c00, -0x41e1830c00, -0x61c3c58f08, -0xe180848e00, -0x31e3891e00, -0x6103010400, -0x71e3850e10, -0x30c38b0c00, -0x31c2021c00, -0x81f6070600, -0x71c3811e00, -0x30e3031800, -0x71830b0e00, -0x61e3030e08, -0x71810e0e00, -0x41e6078600, -0x1e387838700, -0x10e3021c00, -0xe182810e00, -0x6081070600, -0x1f3831c00, -0xe0c0858e00, -0x4102030000, -0x6103810e00, -0xc183818e00, -0x8101848700, -0x11e3060c00, -0x71e78f0e00, -0x6101810600, -0xe1c3810618, -0x6103808b00, -0x3182021c00, -0xe307c08f00, -0xc103808208, -0x21e38f0c00, -0x40c2060c00, -0xf3060800, -0xe1020800, -0xe3020c00, -0x71c3810e00, -0xf1c3811e10, -0x1f7070e00, -0x7183810630, -0x6383818e00, -0xe3870000, -0x30c38b1c00, -0x7103010e00, -0x61e3030418, -0xe10e0800, -0x7183011e08, -0xf2020c00, -0x71838d1c00, -0xe3061c00, -0xe183878f00, -0x6182818e08, -0xe1c3810e00, -0x6103810e00, -0xf103010e00, -0xe101010e00, -0x6103808e00, -0x6183810200, -0x1e30b0c00, -0x30c3020c00, -0xe143810e08, -0xf303010e00, -0x71c1850c00, -0x6183850600, -0x10e3890c00, -0x3123031e00, -0xe3060000, -0xe147c08f08, -0xe163070e00, -0x6183810600, -0xe3020800, -0xf307808f00, -0x6183018104, -0x1e203848f00, -0x6183899e00, -0x30c38b1c00, -0x610e0800, -0x1f2020c00, -0x71e2020e18, -0x30c1021c00, -0x6103810e00, -0xe7090600, -0x6183810e00, -0xf103810e00, -0xf1a3010a18, -0x6083810c10, -0x1c303858700, -0x6081810c00, -0xe6030800, -0xe103810e00, -0x7107808f08, -0xe3c3818f08, -0x61e0810c10, -0xc101818e00, -0x30e2031e00, -0x2083810e00, -0x6083810e00, -0xe3080c00, -0x6103010800, -0x10e3071c00, -0x1e30a0800, -0x1e1810e00, -0x1e3021c00, -0xe20a1c00, -0x7083891c00, -0x6101011e00, -0x7103811c10, -0x7103811e00, -0x1e00f0000, -0x6103808608, -0x4101810600, -0x6183c39c00, -0x7183811e00, -0x70e30b1e00, -0x6180810e00, -0x70818d1e00, -0xf2020c00, -0x7103810c10, -0xc103808e00, -0x72020800, -0xe103818e00, -0x11e2070218, -0x11e3011c00, -0x30c3820c00, -0x10180818e00, -0xe1c3c78700, -0xe182808e08, -0x7187808e18, -0x3e3011e00, -0x30810b1c00, -0x2001010c00, -0x30c3891c00, -0xe30b0c00, -0xe103838608, -0xf1e3810e18, -0x7386070600, -0x71c3030e08, -0xf1c0cf9c00, -0x7103811c00, -0xe1e2070208, -0xf1e7810e10, -0x7307010708, -0xe3021c00, -0x19c3870400, -0xe103810600, -0x41e3010400, -0xe10e0000, -0xe101819e00, -0xc103848e00, -0x7183899e00, -0x6181010600, -0x870818f1c00, -0xe30b0c00, -0xe30e0c00, -0xe3020800, -0x10c1810c00, -0x10e3061c00, -0x3883030e00, -0x7083811e00, -0x31c18f1800, -0x7083091e00, -0x1c387c08700, -0x30c3810c00, -0xe103810600, -0x2081020408, -0x3183811e00, -0x61c2c99e00, -0x7182811c00, -0xc10381020c, -0x7182030e00, -0xe307c09f00, -0x31c3031e00, -0x1e3c7c18f08, -0x31e1020c10, -0x10e3021c00, -0x6101810208, -0xf183810e00, -0x2181010e08, -0xe180c09e00, -0x7383810618, -0x11e2060418, -0xe307c48700, -0x60e1020418, -0x61e3050218, -0xe387c08700, -0x71838d0e00, -0x7103070600, -0x7183030e08, -0x4101010208, -0x70818b1c00, -0xf2040800, -0x60e38b1c00, -0x6183818e00, -0xf1c7010608, -0x39e3070c00, -0x11e2020c10, -0x2081021c00, -0xe3c7c0cf00, -0x20e3011e00, -0x3083010c00, -0x10e30e0800, -0x7183030e08, -0xf181891e00, diff --git a/samples/digitrec/digitrec/data/training_set_6.dat b/samples/digitrec/digitrec/data/training_set_6.dat deleted file mode 100644 index aab6a00c3..000000000 --- a/samples/digitrec/digitrec/data/training_set_6.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x6083850c00, -0x41c3c48e00, -0x204183c78c00, -0x6182c58e00, -0x6183c58e00, -0x106083870c00, -0x3083850c00, -0x20c103c78600, -0x3083cd8e00, -0x102083870400, -0x4083870400, -0x6081870c00, -0x106183870400, -0x1020c30f9d00, -0x20c107c78700, -0x6083870e00, -0x418367e78e00, -0x61c2cb9e00, -0x6182c78e00, -0x4102c58c00, -0x4103868600, -0x4102c70400, -0x104102830c00, -0x2083870c00, -0x4083870400, -0x4082850600, -0x2083c70c00, -0x20c103c78600, -0x6187cecf00, -0x1861838f8c00, -0x4103870400, -0x102083878400, -0x106083870c00, -0x3083cf9c00, -0x100083870c00, -0x20c102c78600, -0x106183c70e00, -0xe367cf8600, -0x6124cb8c00, -0x30c3c78e00, -0x204182c78e00, -0x4083e7478c00, -0x104102868600, -0x4082850400, -0x2081060400, -0x18267e78400, -0x1060820f0e00, -0x6083878c00, -0x204083870400, -0x204183870400, -0x2082878600, -0x6182878e00, -0x1020c3cf1c00, -0x4083870c00, -0x4102cb9c00, -0xc103c78600, -0x104183cf0800, -0x4183cf8e00, -0x106182c78600, -0x106107898e00, -0x4102878c00, -0x104083858c00, -0x4083870c00, -0x6183878600, -0x306183c78e00, -0x6186cf8c00, -0xc307c4c700, -0x104082870c00, -0xc102878600, -0x104082870c00, -0x30c78f8e00, -0x2083850c00, -0x204183c70600, -0x20c107c78c00, -0x106083870c00, -0x4102878600, -0x104083870400, -0x204103cf8c00, -0x204182c78400, -0x3c70000, -0x4183878400, -0x106182c70c00, -0x6183850c00, -0x6183cf8e00, -0xc1e74d8e00, -0x408326c78400, -0x102083870c00, -0x204183870400, -0x204102c78400, -0x2083858a00, -0x408122c78200, -0x8143468600, -0x8102c78600, -0x30c3870c00, -0x102001060c00, -0x61e3cf8e00, -0x204183c78400, -0x2083070a00, -0xc106e78000, -0x8143c70c00, -0x4103c78600, -0x1060838f0c00, -0x61c7858e00, -0x4182c78400, -0x204183cf8e00, -0x2041c3cf0c00, -0x6102868600, -0x8142c70c00, -0x4107c68600, -0x4102868600, -0x102083870c00, -0x3083070c00, -0x6082070600, -0x4083850e00, -0xc103c48600, -0x4102c78600, -0x6182070600, -0x106183c78600, -0xc167c78f00, -0x20c103c78600, -0x4102c78600, -0x4102078600, -0x204183878600, -0x3082870e00, -0x6183870c00, -0x8365c78600, -0x204102870600, -0x3083870c00, -0x6182070600, -0x2083070600, -0x6083070e00, -0x30e307ce8e00, -0xc1e3c78600, -0x104182c70c00, -0x104083c70400, -0xc126c98c00, -0x204183c78e00, -0x6082870400, -0x20c102c78600, -0x71c3cf9c00, -0x820868f8c00, -0x106183870c00, -0xc183c78600, -0x104103cf8e00, -0x20c143c68f00, -0x208102c78200, -0xc183c68600, -0x4102cd8e00, -0x208102c70400, -0x204083870400, -0x4103c70e00, -0x6183848700, -0x4183850600, -0x8326cf8600, -0x8366c70400, -0x106081070400, -0x408367e78e00, -0x4183070400, -0x204106c78400, -0x418307c7c200, -0x4083870400, -0x6183878600, -0x2041c3c70c00, -0x8343c68600, -0x204143c70400, -0x20c147c78c00, -0x6082040600, -0x20c106ce8e00, -0x102082078600, -0x106082c70400, -0x61c3850e00, -0x106083c78000, -0x20c103cf8600, -0x4081870c00, -0xf102868e00, -0x20c102878600, -0x208103c78600, -0x4083830400, -0xc347cf8600, -0x204104cf8400, -0x7103c70600, -0x31c3cf9e00, -0x4183c78600, -0x4183c78e00, -0x102083c70c00, -0x608267ef8400, -0x8102c68600, -0x8142c78600, -0x4083030000, -0x4103c78c00, -0x41c3070600, -0x6083cf8e00, -0x4107cf8e00, -0x8265c98e00, -0x60c3cf8c00, -0x4103870400, -0x4143c70c00, -0x8102870600, -0x10265ca8e00, -0x4083c70c00, -0x104183878600, -0x8126cf8e00, -0x3083078e00, -0x106083870400, -0x86186cf9e00, -0x2081850400, -0x4083860600, -0x104182478400, -0xc127ef8c00, -0x2083870c00, -0x104106cf8e00, -0x8176cf8c00, -0x2081870c00, -0x106183c78e00, -0x4183870c00, -0x4183870400, -0x6183848e00, -0x8224cf8000, -0x61878f8c00, -0x104183870400, -0x6183070600, -0x6083850c00, -0x20c103c78600, -0x2081070400, -0x204102870600, -0x60c3870600, -0x6182850e00, -0x6183850600, -0x106183cf8e00, -0x2082870e00, -0x106083870c00, -0x6082870400, -0x4082870400, -0x103870400, -0x4182c70600, -0x6187cd8e00, -0x4183848600, -0x106083cf0c00, -0x20c106c6c600, -0x408106c78e00, -0x2083870c00, -0x410205c78200, -0x208107c78600, -0x4002830400, -0x10224cf0000, -0x4103850600, -0x104103868600, -0x30c3050c00, -0x20c122c78e00, -0x4183c78600, -0x106083870400, -0x6183870600, -0x20c102878600, -0x41c3470400, -0x820c7cf8800, -0x4182078600, -0x6107cc8e00, -0x104182078600, -0x12082c78e00, -0x4103c70400, -0x106082878400, -0x204183870400, -0x204103878600, -0x6183070600, -0x106183878600, -0x4182cb0c00, -0x4083c70c00, -0x106183cf8c00, -0xc123c78e00, -0x41c3c70e00, -0x30c7cf9c00, -0x61c3878600, -0x70c38f8e00, -0x4183870400, -0x6083050c00, -0x820838f8c00, -0x6183870600, -0x30c38f0e00, -0x8142c48600, -0x4083c70800, -0x104103878600, -0x6083c70c00, -0x106083c78c00, -0xc103870600, -0x6182858e00, -0x106083c70c00, -0x8102c68600, -0x6183870e00, -0x40c3870800, -0x4143478c00, -0x61c7cd8e00, -0x6187cf8e00, -0x4081870400, -0x6081870600, -0x6083070400, -0x106083c70c00, -0x408102878600, -0x418367ef8c00, -0x82083870c00, -0x204103c78200, -0x6083030400, -0x6106c78e00, -0x104103878400, -0x6183850600, -0x8143c68600, -0x4083e7c78700, -0x4103878600, -0x6183878e00, -0x204103c78600, -0xc103830600, -0x2083870c00, -0xc107c78e00, -0x8103828300, -0x4102c68600, -0x20c103c78600, -0xc102c78e00, -0xe103878700, -0x204183878600, -0x82003840c00, -0x6083850600, -0x40c3c50e00, -0x6187cf8c00, -0x4102c70c00, -0x6083850c00, -0x6183878600, -0x6183c70c00, -0x4083850c00, -0x4083870800, -0x7102070600, -0x6186cf8c00, -0x41c3870400, -0x4083850600, -0x6183c78e00, -0x4183878c00, -0x104082c70c00, -0x82654b8c00, -0xc1c3c78600, -0x2081870800, -0x104182878600, -0x6183870600, -0x6083870600, -0x6083850600, -0x2083070e00, -0x6083870600, -0x2081070c00, -0x1061828f8e00, -0x86183c78400, -0x4142050400, -0x30c38d1c00, -0x6183c78e00, -0x106083c70c00, -0x30c30f0c00, -0x4103c78400, -0x82083c78e00, -0x104182870400, -0x30c3cf0c00, -0xa082c78c00, -0x4182878600, -0x208102c78600, -0x4081850400, -0x2083870800, -0x4162860400, -0x4183848e00, -0x6102870600, -0x204183870600, -0xc386efcf00, -0x81c3448e00, -0x3083060400, -0x106183c70c00, -0x6083870c00, -0x60c3c50c00, -0x204183030600, -0x82082cf8c00, -0x182083870c00, -0x3083850c00, -0xc103c68600, -0x4183850600, -0x104183870400, -0x6083c78e00, -0x106083870c00, -0x4082870c00, -0x408167470400, -0x106182050600, -0x106183878600, -0x2082cf8c00, -0x208103c70400, -0x8347ef8f00, -0x8103c78400, -0x4082c70400, -0x4083870600, -0x4102c70c00, -0x4183848600, -0x4102e7478600, -0x71820f8e00, -0x102083070400, -0x8143c48e00, -0x20c306c78e00, -0x102081070e00, -0x40c3c78e00, -0x106083870600, -0x83187cf8800, -0x2083c58e00, -0x6183c78c00, -0x204083870400, -0x204103870400, -0x4083070600, -0x8143468600, -0x8103870400, -0x8264c78400, -0x1061828f8e00, -0x204102870400, -0x20c3450c00, -0x106183cf8c00, -0x1061838f0c00, -0x106183cf8c00, -0x8143c78600, -0x206183870e00, -0x8102c78400, -0x4083030600, -0x6081870400, -0x6183870e00, -0x2081060400, -0x4105cf8000, -0x2083870c00, -0x20a3cf0000, -0x7186cf9c00, -0x4083870c00, -0x408102c78200, -0xc305cf8e00, -0x4183870600, -0x10e30e1c00, -0x20c102878600, -0x20c3c70c00, -0x2081850c00, -0x204103878600, -0x204102c70c00, -0x106187cf8e00, -0x4102870400, -0x20c107c78600, -0x124cf0000, -0x106082870c00, -0x6103858e00, -0x102082850400, -0xc304c78200, -0xc102c78400, -0x4182850e00, -0x2083870c00, -0x2082870000, -0x8103870400, -0x18343c78600, -0x4083870400, -0x20c102c78400, -0x204183c78e00, -0x204102c78600, -0x30c3870c00, -0x4083070600, -0x2082cf8c00, -0x208106464700, -0x7182870600, -0x300102c68000, -0x4103c68600, -0x820838f0c00, -0x6183858600, -0x20c103c78600, -0x4183c78600, -0x6083850c00, -0x21838f8e00, -0x418306c78600, -0x8107c78600, -0x8103c78200, -0x2082870c00, -0x30c3070e00, -0x8102c70400, -0x3083871c00, -0xc204eece00, -0x2083870c00, -0x6183848700, -0x20c143c68600, -0x20c102c78000, -0x81c3458600, -0x204143870400, -0x4182c78600, -0x2083050400, -0x4081850600, -0x208106c78600, -0x204146cf8e00, -0x87187cf8e00, -0x20c103c70400, -0x4103c78600, -0x82083878600, -0x10c103cf8400, -0x4183870600, -0x4103c78800, -0x8102830600, -0x4102850600, -0x7122070e00, -0x204182078700, -0x4083654f8c00, -0x4103850400, -0x6183c78e00, -0x1061c3cf8c00, -0x18c38f0c00, -0x2083c78c00, -0x4083c70400, -0x6083870400, -0x4083870400, -0x106107ce8e00, -0x6082c78e00, -0x204102878400, -0x204102c70400, -0x6081070600, -0x18367e68600, -0x4183870600, -0x3083060c00, -0x41c3c78600, -0xc102878600, -0x30c38f1800, -0x6183850e00, -0x104183c70c00, -0x106083870c00, -0x4183850600, -0x106182c78000, -0x204103870600, -0x20c102870600, -0x106082cf8c00, -0x4183830600, -0x104083870400, -0x104183c70c00, -0x20c102c78600, -0x2083850c00, -0x6187cf8c00, -0x104082868e00, -0x8143c70600, -0x8103830400, -0x71c7cf0e00, -0x106083c78400, -0x4083870600, -0x40c103870600, -0x820838f0c00, -0x204103c78600, -0x4183c70400, -0x4082070400, -0x4083850400, -0x106081070400, -0x6083070600, -0x821838c8f00, -0x106182c78600, -0x4082070400, -0x81e3470400, -0x4083870400, -0x60a3060c00, -0x18347c78600, -0x106102078400, -0x102083870c00, -0x8167ce8f00, -0x2083850c00, -0x20c3cf9c00, -0xc103878600, -0x4083850600, -0x6182870400, -0x2081850400, -0x1020838f0c00, -0x106183c68700, -0x104081850400, -0xc205c78400, -0x20c103c78600, -0x20c187cf8e00, -0x8103c78400, -0x4102858600, -0x6083cf8e00, -0x20c187cf8e00, -0x2082870000, -0x8183878600, -0x4083870400, -0x61c2870c00, -0x106182cf0c00, -0x20a3cf0c00, -0x6083870400, -0x4102ce8e00, -0x6183c78e00, -0x208104c68600, -0x71830f8e00, -0x204103878600, -0x20c106cd8e00, -0x30c3850c00, -0x204081870600, -0x204102c78600, -0x81654b8e00, -0x4183870600, -0x408122c78600, -0x204183c78c00, -0x2083870c00, -0x106182cf8c00, -0x20c106c78000, -0x204183870400, -0x83083cf0c00, -0x86183cf8600, -0xe102870600, -0x4082060600, -0x106083870000, -0x204183c78600, -0xc143870400, -0x6081030600, -0x6183850e00, -0x61828f8600, -0x102081870800, -0x2083870c00, -0x2083870c00, -0x2083870800, -0x408103c70400, -0x8107ce8600, -0x208307c78600, -0x106082050c00, -0x204103c78e00, -0x6083870c00, -0xc163c68600, -0x204083878600, -0xc102c70600, -0x1020e3cf0c00, -0x204103c78600, -0x4183c70400, -0x4083870600, -0x102082cf0c00, -0x104183c70c00, -0x106082cf8c00, -0x4183870400, -0x204102c78600, -0x4182870600, -0x8205c78400, -0x2081060400, -0x8102c70000, -0x204102c78600, -0xc325ef8400, -0x4083078600, -0x106183cf8e00, -0x6082070400, -0x204103c68600, -0x106083c78c00, -0x6103870600, -0x41c2c70c00, -0x6083070400, -0x6081050600, -0x4083850c00, -0x20c3c70c00, -0x408143468600, -0x6082870e00, -0x204103870600, -0x6183cf8e00, -0x106183878400, -0x8143870400, -0x2082850e00, -0x8143468600, -0x2041c3c70400, -0x2041c3c78c00, -0x4103858600, -0x106183870400, -0x2083c58e00, -0x6183878e00, -0xc162c78c00, -0x4183e7ef8e00, -0x106187cf8c00, -0x8143870400, -0x8363e78600, -0x106083870c00, -0x204103c78600, -0x20c103c78600, -0x6083c50400, -0x418307c78600, -0x4183870e00, -0x2081870400, -0x408103c78600, -0x4083870400, -0x6103870600, -0x408102c78600, -0x106083870c00, -0x6183858e00, -0x20c106c78400, -0x4081830000, -0x41c3c70600, -0x2083850e00, -0xe304cfcf00, -0x6183cf8e00, -0x6103870e00, -0x20c104c78600, -0x4102c70400, -0x2083850600, -0x104083870c00, -0x4083870400, -0x20c107c78600, -0x820c3870c00, -0x4102070600, -0x2081070e00, -0x20c183c78400, -0x20c102c70200, -0x4182cf0c00, -0x204103c78400, -0x208103870400, -0x6182cf8e00, -0x8122cf8e00, -0x4003850600, -0x204102c78c00, -0x10c3cf8800, -0x30830d0e00, -0x106083870400, -0x81e2448f00, -0x106182c78c00, -0x6083870600, -0x4083850e00, -0x6183c78400, -0x40c2c50c00, -0xc163c78e00, -0x20c3860c00, -0x4143870400, -0x6083850600, -0x3187cf0800, -0x2081870c00, -0x2083850c00, -0x204cb0000, -0x2041a3cf0c00, -0x6083870e00, -0x6182878e00, -0xc163c78c00, -0x106103878c00, -0x6183878e00, -0x408367c78600, -0x6082060600, -0x102083870c00, -0x31838f8e00, -0x204102878600, -0x8103c78600, -0x200103870400, -0x106083c70400, -0x3083870e00, -0x4103c70c00, -0x6183878e00, -0xc106cf8e00, -0x106083870c00, -0x830838f8c00, -0x6183078600, -0x18367c78600, -0x2083070e00, -0x4183c70c00, -0x820838f0c00, -0x104183870400, -0x4102860600, -0x6183c50c00, -0x4143c70e00, -0x104183870400, -0x3183cd9c00, -0x6183078700, -0x8163470600, -0x6183870e00, -0x2083070600, -0x104102868600, -0x1061e7ef8c00, -0x8264cb0c00, -0x4083870400, -0x104183cf8c00, -0x2082870800, -0x204103870400, -0x10142c70600, -0xc103c78600, -0x2082c60600, -0x82083870c00, -0x2041c3c70400, -0x408102c78400, -0x6082470400, -0xc207c7c600, -0x204183870600, -0x6082070a00, -0x20c1060c00, -0x2083c78e00, -0x106183878600, -0x4183850e00, -0x2041e2c70c00, -0x4143c78600, -0xc102c78600, -0x82083c70c00, -0x4103870600, -0x2083050c00, -0x6106858f00, -0xc1c7c78600, -0x4183870600, -0x8163478600, -0x6081030600, -0x106183878e00, -0x204102c60c00, -0x104104cf8f00, -0x2083870c00, -0x8103c68600, -0x8143c70600, -0x408103c78600, -0x6083c70c00, -0x104183870c00, -0xc307ce8f00, -0x20c103868600, -0x4183c78600, -0x4081030600, -0x820c3870c00, -0x4183870e00, -0x6083870c00, -0x408307c78600, -0x41c7cf8e00, -0x4083850c00, -0x7183848f00, -0x106183870600, -0x6083850400, -0x20e104c68e00, -0x106083c70c00, -0x2083870c00, -0x408103c78600, -0x41e3cf0c00, -0x204183c78600, -0x6183870e00, -0x204103cf0c00, -0x6147c48e00, -0x6183850c00, -0x8122c50e00, -0x3083050c00, -0x4103870600, -0x6083070600, -0x2083858600, -0x102083870c00, -0xc147cd8c00, -0x1061a7cf8c00, -0x71a2070e00, -0x206183cf8e00, -0x6083870600, -0x41c3c70400, -0x6183070600, -0xc183878600, -0x20c3050e00, -0x20c147c78e00, -0x408102c78400, -0x408367cf8c00, -0x82083870c00, -0x20c106c78600, -0x30c3cf1c00, -0x104107cf8600, -0x4083850400, -0x208306ef8400, -0xc103c58e00, -0x830c78f8e00, -0x4183870400, -0x20c123c78400, -0x104082870400, -0x6103850e00, -0x6182870600, -0x4082870c00, -0xc307c6c700, -0x830c38f1c00, -0x4103c48600, -0x8326cf8400, -0x8143468600, -0x30c3870c00, -0x4102c70c00, -0x4183c58e00, -0xc102878e00, -0x83083cc9c00, -0x3083850e00, -0x20838f0c00, -0x4182c70400, -0x408126cf8400, -0x4102c48600, -0x8102c70200, -0x408103c78600, -0x182c0478600, -0x82083c68e00, -0x2083858c00, -0x104081870400, -0x6183870c00, -0x204183878600, -0x408103c68600, -0x6082870600, -0x6081870c00, -0x6103868e00, -0x6183878600, -0x4183850600, -0xe187c7cf00, -0x8265068e00, -0x4183870e00, -0x2083870c00, -0x7083070600, -0x4103c68600, -0x6083850600, -0x8103c78c00, -0x1060c3c70c00, -0x82083870e00, -0x104106cf0c00, -0x4126ef8c00, -0x8166c78e00, -0x2083850400, -0x106182c78e00, -0x4183c48600, -0x2041c6cf0c00, -0x4103c68600, -0x61e3050704, -0x2082870400, -0x208103c78400, -0x8367cf8c00, -0x106187cf8c00, -0x4083870c00, -0x4102c70000, -0xc103c68700, -0x2083870c00, -0x106083870c00, -0x82083cf9c00, -0x106105cf9c00, -0x41c3c70c00, -0x4103868600, -0x830c38f9e00, -0x106083870c00, -0x4083870400, -0x6103870600, -0x6081070400, -0x8163c68700, -0x2081870c00, -0x2083848e00, -0x6083c58600, -0x20c3070e00, -0x6083850400, -0x2083870c00, -0x208345cf8e00, -0x20c3850c00, -0xc386c7c600, -0x4102870400, -0x104083878600, -0x4142c70400, -0x2083050c00, -0x4103850e00, -0x20c105c68600, -0x20c102c68600, -0x20c103c78600, -0x6183878600, -0x82083870c00, -0x1061c3c70c00, -0x6083870400, -0x6083050c00, -0x60c3c70c00, -0x20c147efce00, -0x8122c78400, -0x20c122cf8c00, -0x104102cf8c00, -0x4002070600, -0x20c103c78600, -0x81e3448e00, -0x4102c48600, -0x4183850e00, -0x2083078e00, -0x6182878600, -0x7187cf8e00, -0x4083870c00, -0x86182870c00, -0x3083870c00, -0x2083c78c00, -0x20c127cf8c00, -0x6183870e00, -0x4082010400, -0x4183030600, -0x4143870400, -0x2080840c00, -0x20c3870c00, -0x408245c78400, -0xc143870600, -0x6183070600, -0x20c3cf0800, -0x204103878e00, -0x104102c78c00, -0x8103c68600, -0x106083870c00, -0x2083c78c00, -0x2083870c00, -0x20c105cf8400, -0x104183c78600, -0x208162c58600, -0x20c102c68600, -0x4182078400, -0x4183850c00, -0x106103848e00, -0x204182c78e00, -0x7182cf8e00, -0x71a3850e00, -0x106083878400, -0x4083850c00, -0xc107c78600, -0x106082870400, -0x418302c78200, -0x30c3cf1c00, -0x20c106c78e00, -0x2041e3cf8c00, -0xc145cf8600, -0xc1c3c78e00, -0xc107c78e00, -0x4083870c00, -0x204102c68600, -0x4103870600, -0x6083878e00, -0x4083870400, -0x204082078600, -0x204083870400, -0x40c102c78600, -0x106183870400, -0x4183c78c00, -0x204102878600, -0x4183870600, -0x83654e8e00, -0x8127c68600, -0x4143c78c00, -0xc143c78600, -0x204083830600, -0x30c3060c00, -0x102081870400, -0x20838f0800, -0x20c107c78600, -0x106083c78c00, -0xc107ef8600, -0x30c38f0c00, -0x6082c70400, -0x4082870c00, -0x2081050400, -0x20c147c78600, -0x82183c70c00, -0x6183870c00, -0x4182878c00, -0x8104c78400, -0x4183850400, -0x102083870800, -0x6103070600, -0x6183c58e00, -0x208103870400, -0x8365cf0000, -0x4187cf8e00, -0x2083050400, -0x4183870600, -0x6183c70600, -0x6103878600, -0x4182c78400, -0x30c3870e00, -0x6183cf8e00, -0x408183c78600, -0x6182c70400, -0x20c38f0c00, -0x6183878600, -0x6183878e00, -0x104cf0000, -0x106083cf8e00, -0xc126cf8e00, -0x4083850400, -0x20838f0800, -0x2041c3c78600, -0x106083870c00, -0x106083c78c00, -0x1020c38f0e00, -0x6083070400, -0x204183c78600, -0x2041c3c78600, -0x204186cf8200, -0x6103848e00, -0x20c103c78e00, -0x6183c68e00, -0x20c103878600, -0x20c107c78e00, -0x18306c78600, -0x4143850600, -0x107104098f00, -0x6082070400, -0xc143c78c00, -0x204182cf8e00, -0x20c107878600, -0x4002850400, -0x6083870c00, -0x6082c70c00, -0x2083cf8c00, -0x6083070600, -0x41a2cf0e00, -0x6083870800, -0x4102870600, -0x41e3cf0c00, -0x2083870c00, -0x10c106cf8400, -0x8103c78600, -0x408143c78600, -0x20c104ca8e00, -0x2041024f8600, -0x3083070e00, -0x408167c78600, -0x106183c78600, -0x6081870c00, -0x30c107878600, -0x204102c68600, -0x41c3c78e00, -0x6083c78e00, -0xc102c78700, -0x104102c78c00, -0x4083c78c00, -0x104083c70c00, -0x418224c58600, -0x408367c78400, -0x4083870c00, -0x81c3c58600, -0x208304cf8e00, -0x8103c68600, -0x104107cc8e00, -0x6083870600, -0x4083870400, -0x3083050600, -0x4083850400, -0x4103850600, -0x6081870400, -0xc103c78600, -0x2081850c00, -0x3183cf8c00, -0x81c7478600, -0x2083cf8c00, -0x3083060c00, -0x102083c78c00, -0x2083870c00, -0x4183870c00, -0x204102c78400, -0x4183870600, -0x4183cf8c00, -0x8142c78600, -0x408367cf8600, -0x41c3870c00, -0x30c3870e00, -0x41c3870400, -0xc143c78600, -0x8124c98c00, -0x61828f0c00, -0x8143c70400, -0x4083c70000, -0x6083030400, -0x104186cf8e00, -0x125cf0000, -0x20c102ed8e00, -0x6183878600, -0xc103c68600, -0x4183850600, -0x6083870c00, -0x104082870600, -0x4103c70c00, -0x106083878c00, -0x106183878600, -0x2083070e00, -0x104143870400, -0x30c3070e00, -0x30c38f1c00, -0x104082070400, -0x4183c78600, -0x4103870400, -0x41c3c50c00, -0x4083050600, -0x10e107cf8600, -0x10c3060800, -0xc183078600, -0x6183cf8e00, -0x6103808e00, -0x2081860c00, -0x104103cf8c00, -0x2083870c00, -0x2083c70400, -0x102083870c00, -0xc103870600, -0x4083850400, -0x208103c78200, -0x4102c78c00, -0x20c38f0c00, -0x102082870c00, -0x204103878700, -0x81674f8c00, -0x204182870400, -0x2083060c00, -0x204182c78400, -0xc182c78e00, -0x40c3870c00, -0x104083850400, -0x408306e7c600, -0x3082060c00, -0x6083460c00, -0x2081850c00, -0x20c306c6c700, -0x6083070400, -0x20c103c78600, -0x20c103c78600, -0x7183050e00, -0x8102c78600, -0x4082030600, -0x104005848700, -0x4081850400, -0x6083060600, -0x2083850400, -0x871828b8c00, -0x4183878600, -0x6083870400, -0x204183ce8e00, -0x104183870c00, -0x2083870c00, -0x4082870400, -0x106083870000, -0x61a7ef8c00, -0x2082010400, -0x3083050600, -0x3083858e00, -0x6305cf8600, -0x6083c78c00, -0x8167cf8e00, -0x1060c3870c00, -0x6083850c00, -0x7102070b00, -0x41c3c70c00, -0x81c3870400, -0x20c102878600, -0x6083870e00, -0x106083870400, -0x106083cf8c00, -0xc347c78600, -0x8167c78e00, -0xc182478300, -0x6083850c00, -0x30c38f0c00, -0x106082870c00, -0x6183c78600, -0x2083070c00, -0x106083c78c00, -0x6083870600, -0x408224c78400, -0x104106cf8e00, -0x204103870400, -0x2083870c00, -0x8162c58600, -0x408307c7c000, -0xc103870600, -0x408325c78e00, -0x6081070c00, -0x2082070c00, -0x204102c70000, -0x6083870400, -0x106183cf8c00, -0x2081060400, -0x41c3c70600, -0x4082070400, -0x106183870800, -0x4083850400, -0x8307c7c200, -0x102083870400, -0x4083c78c00, -0x4183870600, -0x1248f0000, -0x4083850c00, -0x2083050e00, -0x6081030600, -0x204102c78400, -0x6103858e00, -0x104083870c00, -0x6182850e00, -0x8147c78e00, -0x6304cf8e00, -0x1061a2cf8c00, -0x20c106c78400, -0x4083860600, -0x104102c78400, -0x2083870400, -0x204183878600, -0x6082870600, -0x18103c78600, -0x61864f8e00, -0x6102878600, -0x106183850600, -0x2083870800, -0x2083c50c00, -0x30c1050c00, -0x4182c78600, -0x6103878e00, -0x6183cf8c00, -0x104102870400, -0x4143870400, -0x104182870400, -0x106083870c00, -0xc136cf0e00, -0x4103c70600, -0x82083870c00, -0x4103cf8e00, -0xc102c58600, -0x4183870600, -0x104103c48e00, -0x20c3cf0000, -0x8102c78600, -0x4083870c00, -0x2082850c00, -0x8102078600, -0x204103c78600, -0x204103878600, -0x6183070600, -0x6081060400, -0x61c3c78c00, -0x104083878400, -0x8106c78400, -0x4163c58600, -0x2082070600, -0x6081070600, -0x6103858600, -0x2081870400, -0x30c38f0c00, -0x2083870c00, -0x106083cf0c00, -0x2083cd8e00, -0xa041030400, -0xc103870400, -0x6081830400, -0xe107c78e00, -0x2082c78800, -0x4083870600, -0x4083850c00, -0x4083070600, -0x6183878600, -0xc1c7c78700, -0x20838f0c00, -0x106183870c00, -0x102082cf8c00, -0x81870400, -0x2083878c00, -0x10207c7c300, -0x104103c78600, -0x104082070400, -0x2083870e00, -0x2083c70c00, -0x20e103c78600, -0x4102c68600, -0x106083878600, -0x82083870c00, -0x8366268a00, -0x106083070600, -0x2081060c00, -0x2083870c00, -0x4083c70000, -0xc106878600, -0x104126cf8c00, -0xc183038200, -0x204102870600, -0x4104cf8e00, -0x4082038400, -0xc106c78600, -0x4000020c00, -0x4183c70c00, -0x106083c70c00, -0x104187cf8e00, -0x6083878e00, -0x61e3c78e00, -0x60c3c50e00, -0x8183c3c78600, -0x30c3070c00, -0x20c304c78600, -0x20838f0c00, -0x86186cf0c00, -0x41828f0c00, -0x4083870400, -0x4103c78e00, -0x106183c48e00, -0x106183870c00, -0x1060828f0c00, -0x4083830600, -0x2082070c00, -0x2082870c00, -0x6083078e00, -0x8102c68600, -0x8306c78600, -0x4082870c00, -0x106182cf0800, -0x4183870c00, -0x104187cf8e00, -0x106083870c00, -0x41c3870800, -0x4081870600, -0x8302c78400, -0x4103c48600, -0x4083870400, -0x204083030600, -0x820838f0000, -0x4102c70400, -0x204143c78c00, -0xc103c68600, -0x4183c78c00, -0x4182c78600, -0x408307c68600, -0x6082070600, -0x4182c70400, -0x106083870400, -0x6083870c00, -0x2082060400, -0x6103c78e00, -0x106083870400, -0x10264cd8400, -0x4103878600, -0x106183878600, -0x6083870400, -0x6183870600, -0x106183878600, -0x61c3c58e00, -0x2083c71c00, -0x106183c78e00, -0x6083858e00, -0x4083870600, -0x61c3cf0e00, -0x4103c70800, -0x4083050c00, -0xc325cf8000, -0x6182c70e00, -0x204183870400, -0x8102c78600, -0x30c3870c00, -0x20c307cf8e00, -0x6081870c00, -0x8365cf8400, -0x30c3078c00, -0x106183c78c00, -0x106083870c00, -0x4183878600, -0x2083870800, -0x107082870c00, -0x104082c70400, -0x4081c3c78600, -0xc307e7c300, -0x2083848e00, -0x106182c70400, -0x6083858600, -0x20c106c78e00, -0x104083870600, -0x8103c48700, -0x104083c78400, -0x6083870400, -0x20c143c78600, -0x6183070600, -0x4143870400, -0x104103c78c00, -0x81c3c70600, -0x4183868200, -0x20c1870c00, -0xc103870600, -0x61c3cf8400, -0x3083870c00, -0x82082870c00, -0x102083870c00, -0x4102c70c00, -0x106083c78400, -0x208306c78600, -0x4083870600, -0x41c3870400, -0x41c3c58e00, -0x6083870e00, -0x4183c78e00, -0x40c103c78400, -0x6083850400, -0x4083850e00, -0x4083870400, -0x7083070600, -0x418367e7c600, -0x106183cf0c00, -0x4102c70c00, -0x6183070600, -0x106083070c00, -0x204106eb8e00, -0x6102070600, -0x2041c7c78600, -0x106083070600, -0x30838f1c00, -0x182e54f8f00, -0x204182c78e00, -0x20c106c78600, -0x4102850600, -0x6081070800, -0x6083850c00, -0x82083cf8e00, -0x20c107c78600, -0x8102c68600, -0x4082870000, -0x6103c78e00, -0x6183878600, -0x820c3870c00, -0x104182470400, -0x41c3850c00, -0x4083870400, -0x6103850600, -0x204182078600, -0x4183870600, -0x4103870600, -0x408102c38000, -0x31c7cd9e00, -0x8103c68600, -0x4081870000, -0xc143c68700, -0x4183830800, -0x6082840e00, -0x20c103c78600, -0x4183070600, -0x106083070600, -0x4182c78400, -0x4183c78e00, -0x2083c48e00, -0x4183c78400, -0x4106c7c400, -0x204183878600, -0x106082c70c00, -0x104000870000, -0x82083cf8c00, -0x106183878e00, -0x104104cf8e00, -0x20c1c3c78700, -0x40c3c70c00, -0x4183c78400, -0x6083850c00, -0x208307c78600, -0x8167c78e00, -0x6083070400, -0x60c3870c00, -0x4182878600, -0xc2656e8600, -0x6083850c00, -0x8102c78600, -0x208106478000, -0x61c7cf8e00, -0x8167c78600, -0xc1654c8e00, -0x4103850e00, -0xc167078c00, -0x82082c70c00, -0x20c307cf8600, -0xe103878e00, -0x81810c00, -0x2081850c00, -0x4183870600, -0x204103850400, -0x4083cf0800, -0x6081870400, -0x4083070600, -0x41a3cf8c00, -0x208147c68600, -0x41e3478c00, -0x106187cf8e00, -0x408106cf8c00, -0x208103c78400, -0x7083858e00, -0x104102070600, -0x4081830000, -0x408103c78400, -0x204103870600, -0x104082070400, -0xc143c78600, -0x2083c70c00, -0x208123c70000, -0x6081030400, -0x204182c68e00, -0x204083850400, -0x30c3cd8e00, -0x4123c70800, -0x2083850e00, -0x20c143c70400, -0x4183c70600, -0x20c107c78600, -0x61c3cf9e00, -0x106083870c00, -0x204102858e00, -0x8143470400, -0x20c1070400, -0x3083850c00, -0x208102c78700, -0x4107cb8c00, -0x6183cf8c00, -0x6083070600, -0x20c124cf8c00, -0x4167cf8c00, -0xc103c70600, -0x30c3060c00, -0x104182870600, -0x82083c70800, -0xc143c78e00, -0x2082cf8c00, -0xc167cf1c00, -0x208103c68600, -0x4183c78600, -0x20c3cf0c00, -0x6083070400, -0x2083870c00, -0xc1674f0e00, -0x104083870600, -0x8342c78400, -0x4083050c00, -0x8167c78e00, -0x106082c70c00, -0x41c3470c00, -0x2081850c00, -0xc102c78e00, -0x4081850400, -0x20c103878200, -0x4083870600, -0xe305c68e00, -0x4103c70400, -0x6083070400, -0x4082850c00, -0x6083870600, -0x106083c70800, -0x106183cf8e00, -0x106187cf8e00, -0x6083c50c00, -0x106182c78e00, -0x204102c70000, -0x104183070600, -0x2083850c00, -0xc103c78600, -0xc183c78600, -0xc1e3468600, -0x204102870600, -0x6083870600, -0x408265ef8600, -0x6182070600, -0x4081070400, -0x104083c68a00, -0x102083870c00, -0x102082c70c00, -0x2083870c00, -0x4083850c00, -0x7183070e00, -0x20828f0800, -0x408363c78600, -0x418245cf8400, -0x2083ce8e00, -0x104103870600, -0x2083c70000, -0x4082cb0c00, -0x106187cf8e00, -0x4102c78400, -0xe187c78f00, -0x106182878e00, -0x3083060c00, -0x4183c50c00, -0xc307c7c600, -0x104103c60700, -0x6083870c00, -0x6081870c00, -0x618302c7c300, -0x60c3850e00, -0x4083850c00, -0x6083850c00, -0x4143c68e00, -0x8102c78400, -0x4081030400, -0x204103c78400, -0xe103848700, -0x4081030600, -0x4183878600, -0x2081060c00, -0x106183c70400, -0x106183c70c00, -0x20c3c70c00, -0x2081070400, -0x2083070c00, -0x204082878600, -0x4183870600, -0x106083878e00, -0x4183cf0c00, -0x6083878c00, -0x20c102c78600, -0x6081070c00, -0x4103c48600, -0x104106cf8c00, -0x4081050400, -0x408102870200, -0x4187cf8e00, -0x6083850e00, -0xc107ec4e00, -0x4081870400, -0x102083870400, -0xc163c70400, -0x6183c48c00, -0x4103850400, -0x106187cf8e00, -0x2083870c00, -0x2082070e00, -0x8325c98c00, -0x4182c70400, -0x8143c30400, -0x408102c78600, -0x2083c70c00, -0x2083070c00, -0x4083830600, -0x4083850c00, -0xc324cf8400, -0x4182c78400, -0x4083870400, -0x408142c78600, -0x6143878e00, -0x204183c78e00, -0x104083870400, -0x20c3070400, -0x8003830400, -0x408307ef8400, -0x30c38f0c00, -0x204104ce0c00, -0x6083870c00, -0x2083c70c00, -0x102083070e00, -0x204183c70c00, -0x204102870400, -0x20c102c78600, -0x1061828f0c00, -0x4082870400, -0x6183070e00, -0xc306ef8e00, -0x104183878e00, -0xc122c70600, -0x8165470c00, -0x2083850e00, -0x30c3cf8e00, -0x4102078600, -0x204183c78600, -0x204183870400, -0x41c3c70c00, -0x2083878c00, -0x4183878600, -0x61c3878600, -0x6083070600, -0x2083850c00, -0x20c103c78400, -0x8162c70c00, -0x4102850400, -0x208103c28400, -0xc143c78400, -0x4183830600, -0x102083870800, -0x2083850600, -0x104083c70c00, -0x204102c78c00, -0x4082868600, -0x61c3cf0c00, -0x106183878c00, -0x2081850c00, -0x104182870600, -0x10c107c68600, -0x4103c68600, -0x4183878600, -0x6182878e00, -0x4183c78600, -0x208265cc8e00, -0x8347e78200, -0x8103870000, -0x106183878c00, -0x6183c78e00, -0x2081050c00, -0x4183c68e00, -0x106083070600, -0x61e3cf0c00, -0x6081070400, -0x4082050600, -0x4103870400, -0x20e3cf0c00, -0x4182870400, -0x6083870c00, -0x20c103cf8600, -0x6083850e00, -0x20c3870c00, -0x6183870e00, -0x2083858e00, -0x2083850600, -0x102081870400, -0x4143c70600, -0xc167cf8e00, -0x106182cf8c00, -0x8163450400, -0x204083878600, -0x6081850400, -0x408367cf8600, -0x830c3070e00, -0xc186c78e00, -0x3083870400, -0x204103c70400, -0x204183878600, -0x104182c70c00, -0x208103c28600, -0x6082870400, -0x4183870400, -0x104106cf8c00, -0x204183c70400, -0xc083858600, -0x2081070400, -0x104002070600, -0x30c38b1c00, -0x408103878200, -0x2183cf8c00, -0x4183878600, -0x106082870c00, -0x6183878600, -0x10f305ce8f00, -0x8102c50600, -0x104102c78200, -0x4103848600, -0x4102068600, -0x106183c78e00, -0x106083870400, -0x4183870400, -0x104106cf8e00, -0xe143868700, diff --git a/samples/digitrec/digitrec/data/training_set_7.dat b/samples/digitrec/digitrec/data/training_set_7.dat deleted file mode 100644 index 56f54a602..000000000 --- a/samples/digitrec/digitrec/data/training_set_7.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x23c3c70604, -0xc041020400, -0x61e3820c10, -0x3c4810304, -0x1e3e0830408, -0x60c1820400, -0x40e0820c10, -0x3e0830c18, -0x1c1820810, -0x41e6c10608, -0x3e2c30418, -0x1e2820c10, -0x51e6c30c10, -0x21c3820400, -0xe3c1838400, -0xc3c3c78600, -0x1e1820400, -0x81c0820c10, -0x21e2830c18, -0x23c0830600, -0x1c2820410, -0x3e0c30400, -0x43c0830608, -0x3c0810600, -0x1c0820400, -0x43c0810204, -0xc2810408, -0x60c1860800, -0x1c2830608, -0xe3c1820408, -0x43c0830408, -0x1e1820c18, -0x61c0810408, -0x6344810608, -0x83e6830618, -0x1c1820c10, -0x1e0820c10, -0x1e2830608, -0xf061871820, -0x3c0820408, -0x11e7c30c10, -0x3e4c10408, -0x10e2820410, -0x3e0830608, -0x3c0830400, -0x3e0c10418, -0x81c2810204, -0x21c1830410, -0x1c0820410, -0xe141860400, -0x1e2820c10, -0x1e0830c10, -0x3c0810304, -0x63e4830c10, -0x3e0c30408, -0x40c1820c00, -0x23c0830408, -0x1e4830410, -0x43c1838600, -0x4181830408, -0x1e6830410, -0x63e0c30c10, -0x1e7408200, -0x60c3860400, -0x60c0820410, -0x41c1830600, -0x1c1820810, -0x1e0c61800, -0x1e6c30c00, -0x3c6c10208, -0x1c1830408, -0x83c0810608, -0x6142820400, -0x3c0830408, -0x3e0870400, -0x43e4830608, -0x1c4820408, -0x103c0c18618, -0x4381030600, -0xe260830418, -0x61c0830410, -0x1e6820c10, -0x1c2810204, -0x7e0418204, -0x61e0c30c10, -0x1c1820408, -0x3c4890204, -0x23c4810408, -0xc040820c10, -0xc1c0830408, -0x1e0820410, -0x61c1c70400, -0x1e0820830, -0x1c0820408, -0x61c1820c18, -0x81c1820c10, -0x41c4810408, -0x1e0820c10, -0x21c1c60810, -0x1c1c0830408, -0x1c0810608, -0x3c0c30408, -0x61e0820810, -0x1e6c10608, -0x1c0820800, -0x61e3830c10, -0x43c4810200, -0x41e1820c10, -0x180c0830400, -0x1c0820408, -0x1c1c70600, -0xe3e0830c18, -0x61e2830408, -0x3e7c10204, -0xc3c1830604, -0x1c0820410, -0x3e0830c10, -0x1e0820410, -0x3e4c10608, -0x41c0830418, -0x20e3820410, -0x43c6c1820c, -0x21c1030408, -0xc3e0c61820, -0x41e2810410, -0x3c0810408, -0x3c0810608, -0x41e1860820, -0xc0820400, -0x3c0810204, -0x1c2830408, -0x7e0c10608, -0x1c0820408, -0x3e0830408, -0x1c0830c10, -0x3e4c30400, -0xc3c6830600, -0x23c1830408, -0xe3e0c30e10, -0x1e2820408, -0x3e0c30810, -0x3c4810204, -0x3ecc10204, -0x3e481060c, -0xc3820c10, -0x1e060830c10, -0x1c68b0408, -0x1c1820408, -0x41e2820c10, -0x7e0c10608, -0x23c0830408, -0x3c7c10204, -0x1c2810008, -0x3e0820410, -0x1c0820410, -0x3e0830408, -0x3c0810408, -0x3c0810408, -0x1e0820810, -0x1c2830408, -0x81c0820400, -0x1c0810208, -0x61c1070600, -0x23c3870408, -0x83c1810204, -0xc3c0820408, -0x3c1830608, -0xe6c30c30, -0x7c0c10200, -0xe083038400, -0x1e0830c10, -0x1e6810408, -0x1e287c600, -0x3e0c30c10, -0x3e4830204, -0x21c0820810, -0x23e0830410, -0x1e1820c10, -0x1c0830408, -0x41c0830c10, -0x61c0830c10, -0x1c0820408, -0xe2820810, -0x80c0820408, -0x3c1820408, -0x1c0830408, -0x3e0410608, -0x3e0830c10, -0xc1c0830408, -0x41c6cd020c, -0x21c0820410, -0x61e0810408, -0x3e0c10608, -0x61e0830400, -0x1e0830410, -0x4041820c00, -0x41e6890608, -0x3c0820408, -0x61e0c71800, -0x1e4830418, -0xe3c183060c, -0x3c0830608, -0x21c0830418, -0x3c0820408, -0x3c0830408, -0x41c0820408, -0x3c0810302, -0x21c1030600, -0x1e2820c10, -0x3c0810410, -0x1e6830408, -0x23e0c30c10, -0xc3c0810608, -0xb1e0830c10, -0xe1e0830c30, -0x3c0830408, -0x20c1820800, -0x3e0c10408, -0x1e0820c10, -0x6041861820, -0x3c4408208, -0x3c0830408, -0x1e0820830, -0x1c0810200, -0x83c3830608, -0x1e6c10608, -0x41c0830608, -0x3e0820408, -0x81e0820410, -0x41c0820408, -0x41c0820408, -0x103c0810204, -0x1c0820410, -0x3e4c50204, -0x41c2810204, -0x43c0820408, -0x1e3c20820, -0x81e3c30810, -0x43e0c30c10, -0x3c0810608, -0x4181038400, -0x3c0830408, -0x1c2820410, -0x1c2810400, -0xc1c0830408, -0xc183c60c08, -0x3e0c30408, -0x81e0c20810, -0xd1c3870c00, -0x11e4c10408, -0xc041870400, -0x3c0830408, -0x33e1820c00, -0x41e2820800, -0x4040800800, -0x3c3c20408, -0x7c1c30608, -0x21e0820800, -0x1e2820408, -0x3e2878408, -0x83c0830408, -0xc040820c10, -0x3e4490200, -0x23c4830408, -0x3c0830408, -0x1e1820820, -0x3e0820408, -0x3e0820810, -0x3e3c30400, -0x3e0830418, -0x1c0810408, -0x6040820810, -0x1e2820c10, -0x43c0810608, -0xc3c3c78604, -0x1c0820408, -0x41e0820c10, -0x1e1820800, -0x43c0830408, -0x3e0830408, -0x3e0c30408, -0x61c3c78810, -0x3e3c30c10, -0x41c2830604, -0x163c1878608, -0x41c1820408, -0x61c0820810, -0xe141830c08, -0x3e4410608, -0x7c2c10204, -0x1e68a0800, -0xe1861000, -0x1c0820408, -0x41c0820408, -0x183c0410408, -0x3e0830c10, -0x1c4830408, -0x3c7c10200, -0x6140820400, -0x70e18e0800, -0xe1c1820c10, -0xe1e0830c10, -0x3c0810608, -0x3c2830408, -0x3c0810204, -0x1c3c1c38604, -0x3c0810408, -0x41c2820410, -0x3c0810008, -0x3c0810208, -0x1c0830408, -0xc1c0820410, -0x1c0800408, -0x1c0830408, -0x3c1830608, -0x63e0830c10, -0x1c3820c10, -0x180c0810408, -0x3c0830608, -0x61e0820830, -0x1c0820408, -0xc083820400, -0x23e0830408, -0x1c0820408, -0x3c0810408, -0x21c0820408, -0x1e2830418, -0x1e6830400, -0x41e0830c30, -0xc1c0830c18, -0x3c0810408, -0x3c0820408, -0x61c3830c10, -0x1c0830408, -0x3c6830408, -0x3e0c30c10, -0x63e0c10410, -0x7e0810408, -0x43c383060c, -0x3e0810408, -0x3e0c30830, -0x1e2820c10, -0x1e2810408, -0x3e0c30408, -0x1c2810200, -0x23c1830408, -0xc2820408, -0x3c1c60400, -0x1c2810410, -0x3c1870400, -0x21e0820810, -0x3c0830408, -0x61e0820c10, -0xc0c1830408, -0x41c1820408, -0x41c1820c10, -0xc141830408, -0x80c1820408, -0x63e6c30608, -0x11c1830408, -0x103c20000, -0xc3c0830408, -0x60c1820818, -0x1c0820408, -0x4041020408, -0x1e0820c10, -0x41c2810408, -0x8182810204, -0x23e0830408, -0x41c0830408, -0x3e4c10608, -0x83e4830608, -0x21e0820c10, -0x21e1830408, -0x41c0830408, -0x81c0830400, -0x67c0830604, -0x7e0830408, -0x1e1860810, -0x1e2830410, -0xc081038c00, -0x1c2850204, -0x1c0e0830c10, -0x3c0810604, -0x83c0830408, -0x1c0820408, -0x6140870400, -0xc040820800, -0x1e0820810, -0x83c0c10408, -0x181c0810608, -0x1c2810408, -0x41c0810600, -0x1c0820400, -0x21c2830408, -0xc1e0830c10, -0x3e0830408, -0x1c3c0830408, -0x1e0830410, -0x3c0810204, -0x60c0820810, -0x61e2830c10, -0x1c0820408, -0x1c0810408, -0x3c0820408, -0xe1c0830608, -0x3e0430c10, -0x23c0830408, -0x6040870400, -0x7c3c10208, -0x60c1860810, -0x81c0830408, -0x1e6820410, -0x21c1820c10, -0x3e1820c08, -0x6040820410, -0xc3c0820408, -0x81e0830c10, -0x43c081060c, -0x41c2810408, -0x1c1820408, -0x61c2820c00, -0x1ecc10200, -0x1e0820810, -0x21e0820d10, -0x1c2830418, -0xe1e0c71810, -0x1e3820810, -0x1c0820410, -0x1c2830408, -0xc1c0c30c10, -0x23e0c30c10, -0xc3c0830408, -0x1c1830600, -0x1c0810204, -0x1e4830400, -0x1c0810408, -0x181c1870600, -0xc0c1870c00, -0x3c0c10208, -0xc040820400, -0x21c0820c10, -0x1e2830408, -0xc3c0830604, -0x3c0820408, -0x40c1820800, -0x6142830408, -0x181c0810400, -0x1e6cb0410, -0x10e0801800, -0x1c0820408, -0x3c0830408, -0x327c10608, -0x4040820800, -0x3e0c30c10, -0x1e2820400, -0x1c1c20400, -0x1e6830c30, -0x41c1830408, -0x143c0810408, -0x1e7878410, -0x1e0830410, -0x1c0810400, -0x1c6c10208, -0x1c0820410, -0x3e081060c, -0xc1c0830400, -0x3c4c10208, -0xe1e0820c10, -0x61c2830c10, -0x3e0830c10, -0x3c0810204, -0x1c0810200, -0xe1e6830418, -0xe3c0830c10, -0xc1c6850600, -0x123e0810418, -0x61c0820410, -0x3e1820408, -0x43c2c10608, -0x3c0810608, -0x3c0830410, -0x21c0820410, -0x183c2830702, -0x1e6830418, -0x41c0830408, -0xc041c20c00, -0x11c6830408, -0x1c0830408, -0xc1c0830408, -0x3c0810408, -0x21c6830408, -0xc1c0820400, -0x81c3c78c08, -0x61e0810410, -0x81c0830400, -0x1e240870608, -0x1e68b060c, -0x61c1870c10, -0x3c0810400, -0x3c1878400, -0x60c1870810, -0xe1c0820408, -0x1c2810204, -0x7e0830408, -0x6020861800, -0xc3c1820604, -0x3c0810204, -0x4141830400, -0x7c30400, -0x1c0870400, -0x3c0810408, -0x23c1830608, -0x4041820c10, -0xe1c1870c10, -0x81e2820c10, -0xc3e2830604, -0x63c0830408, -0x7c0c10204, -0x40e1820c08, -0x63c0c70408, -0x3c0830408, -0x1e0830410, -0x1e0820c10, -0x3c3830608, -0xc2820810, -0x41c2810408, -0x1e2820c10, -0xe2820410, -0x1c0810408, -0x71e0861820, -0x81c0820408, -0x1e2820810, -0x1c0820410, -0x21e6490400, -0xe3e1878c08, -0x3e4c30418, -0x1e6820418, -0x1c0820410, -0x1e2830410, -0x21e0820c10, -0x23c0830408, -0x43c4810608, -0x21c1820c10, -0x3e3c30608, -0x41c3820408, -0x81c0830400, -0x61e1820c10, -0x3c4890204, -0xe0e0c31820, -0x246810204, -0x1c0830410, -0xc140830408, -0x61c1820c10, -0xe3c0830408, -0x41e2830408, -0x1e7c30c00, -0xc1c2870408, -0x1c4810204, -0x3e0820810, -0x1e2830c10, -0xe0c1830400, -0xe1e0830c10, -0x1c3820408, -0xa3c1830418, -0x81c3820400, -0x3c0830408, -0x1e0820810, -0x3e0830408, -0x83e0830408, -0x41c0830408, -0x1c0820410, -0x61c1070400, -0x21c0820400, -0xc0c1820400, -0x1c0c0878408, -0x1e2820c10, -0xe1e0c71800, -0x1c4810208, -0x41e6c30410, -0x103c3c78604, -0x21c0820c10, -0x3a4c10600, -0x3e7c10204, -0x23e2830408, -0x81c1830200, -0x1e2810408, -0x3e0c10600, -0x1e2820c10, -0x63c1820408, -0x43c0810204, -0x1c0830410, -0x3e4438200, -0x41c2830400, -0x83c083060c, -0x3e0830400, -0x1c1820c10, -0x1c0830410, -0x43e0830408, -0x3e7c30608, -0x1c1c1c30600, -0x1c1820408, -0x1e3820408, -0x31e0820c10, -0x1363810600, -0x23e0830c10, -0x81c0820400, -0x103c1c18e00, -0x1c0830418, -0x7c0c10608, -0xc2820408, -0x3c0810204, -0x181c60000, -0xc1c6838302, -0x1c0820408, -0xc1020400, -0x1c0830418, -0x1c0810410, -0x3e0830410, -0xc041820408, -0x3e4830c18, -0xc2820c10, -0x3e0c30c18, -0x3e4830c00, -0x1e0810408, -0x43e0c30c18, -0x61c0820c10, -0x3c0810208, -0xc1c0830408, -0x1c0820408, -0x3c4878200, -0x3c6810604, -0x3e0830c00, -0x3e0810204, -0x1e6c90418, -0xf324878c00, -0x1e1820c10, -0x3c0810408, -0x1c7830408, -0x1c0830408, -0x33e0c30c18, -0x181030200, -0x3e0c10618, -0x23e4c30c10, -0xc0c1c60408, -0x6141030408, -0xe3c083060c, -0x1c2830400, -0x21c1830200, -0x3e481020c, -0x1e2830c10, -0x1e0830400, -0x21e1870418, -0x3c1878408, -0x1e0821820, -0x60c2820c10, -0x3c0830408, -0xe1c0830c10, -0x7c0c18204, -0x1e6c30410, -0x3e4c10608, -0x1c2810608, -0x3e0810408, -0x21e0820c30, -0x1e0830c10, -0x1c2830604, -0x3c0838410, -0x3e283060c, -0xc3c0810204, -0x1c2810204, -0xc1c0830418, -0x3e0c30c10, -0x6181070600, -0xc040820c10, -0x1c6810204, -0xe1c0830408, -0x43c1820408, -0x3c0810608, -0xc3830400, -0xe3e0830c10, -0x1c0830410, -0x1e4830408, -0x167c10408, -0x41c0830c18, -0x3c3c38c00, -0x10c2820c10, -0x3c1830408, -0xc0c3820c10, -0x1e68b0418, -0x7e6c10604, -0x1c0810608, -0xc3820400, -0x61f0c30c10, -0xc1c0820408, -0x6041820810, -0x1c6810204, -0x3c0830c18, -0x21c1078c00, -0xe041070800, -0x1c0820408, -0x3c0810608, -0x1c3820400, -0x43c0878200, -0x41e2c30830, -0xc2830408, -0x81e0830c10, -0xe0c1870c10, -0x1e0820c10, -0x41c1020400, -0x3c0810208, -0x6040820800, -0x3e0c30400, -0xe1e1860c10, -0x43c0830408, -0x3e0830408, -0x60c1820c10, -0xf1e1861800, -0x1e2c30c30, -0x7e0c10204, -0x61c1820c10, -0x43c0810204, -0x1e6820410, -0xe3c0830408, -0x1e4830410, -0x303c0c10204, -0x23e0830c10, -0x6041020c00, -0xc7e0c30408, -0x21c0830408, -0x21c0820408, -0x3c0810608, -0xf0e08f0c10, -0x43e0820c10, -0x63e3c30408, -0x43e0c10418, -0x101c0870400, -0x60c0820c10, -0xc0c1020400, -0x21e0830c10, -0x181810204, -0xc1c0830408, -0x43c0810204, -0x1e0810408, -0x1e0820c10, -0xe041820c00, -0x1e2810408, -0x23e4c10408, -0x3e3810208, -0x61c0830c10, -0x41c0820410, -0xe040820e00, -0x43e4c18204, -0x7c0810208, -0xc3e0810608, -0x3e0c60800, -0x3c0830c10, -0xc1820408, -0x21c6830408, -0x3c0810400, -0xc0c38f0600, -0x1c0820c10, -0xc1c0830408, -0x1c1820400, -0x13e4830400, -0xc3850400, -0x21e2820c10, -0xe1c0820810, -0x81e6830408, -0xc3c1870400, -0xc1c2830408, -0x3e0820c10, -0x61e1820c10, -0xe1c0c78408, -0x11c2820c10, -0xe0e0830c10, -0x23c0830408, -0x21c0820410, -0x71a6c90408, -0x43e0c30c10, -0x61c2820810, -0x8180810600, -0x183e0c18608, -0x41e0c30c20, -0x3e4830408, -0x1c2810608, -0xc2c0878408, -0x3140820c10, -0xe1c0830408, -0xe0c1820408, -0x23c0830410, -0x3c0810204, -0x3e1c78608, -0x1c0820408, -0x61e0830410, -0x63c4830408, -0xe1c1820c18, -0x3c4c10204, -0x3c0810204, -0x3e7c18204, -0x1c2830408, -0x80c3820400, -0x3c0830408, -0xc1c0878a00, -0x3c0810408, -0x4120820c10, -0xe043c60400, -0x1c2c30208, -0x7c0830604, -0x3e3c30c00, -0x1c0830608, -0x1c6810204, -0x3c0830608, -0x3c1810200, -0x3e4810604, -0x1e2830c10, -0x41e4c30c20, -0x1c2820410, -0xc0c1820408, -0xc3c1830604, -0x3e0c30410, -0x20c1820c08, -0x3c0810604, -0x61e2830c10, -0x6142878c00, -0x143c0810204, -0x61c1820408, -0x3c0830408, -0x20c0820410, -0x20e1861820, -0x3e281060c, -0x3c3830410, -0x11e2830400, -0x3c083060c, -0xe3e7c30608, -0x1e0820410, -0x1c2810408, -0xc3c1870600, -0x3c0810608, -0x3e0820410, -0x3e0830c10, -0x3e0830418, -0xe040870408, -0x3c4810204, -0x181c60400, -0x3e0830604, -0x381c60408, -0x63e3838c10, -0x3c0810604, -0x3c0810608, -0x1c0820408, -0x60e1861800, -0x1e3830400, -0x3c2810302, -0x3c0810204, -0x7e0c10608, -0x1c2820408, -0x60c1820c10, -0xc1e0c30c10, -0xc0c1860c08, -0x3e781060c, -0xe3c1830608, -0xc1c0c10608, -0x1e0830c10, -0x1c0830410, -0x21c2870c08, -0x1c2810204, -0x47e0c1020c, -0xc0c0830400, -0x41e0820410, -0x21c0830400, -0x41e0820c10, -0x3e3c78c18, -0x41c2870408, -0x1c0830408, -0x2000020800, -0x7020820820, -0x43c1c70408, -0x3e4410000, -0x3c1830604, -0xc1c0830c10, -0x1e2820410, -0x1e2820c10, -0x6141820c10, -0x3c0810608, -0x3e1830618, -0x80c0830400, -0xc081870400, -0x83c18f8400, -0x81c2810200, -0xc0c3c30800, -0x81c1820408, -0x1e3830c10, -0x103c0810204, -0x1c0820400, -0x183e0c30c10, -0x41c0820400, -0x81e6c10408, -0x43c1830608, -0x3c0810608, -0x1c0820400, -0x1c0820410, -0x1c0830408, -0x21c2820c00, -0x23e0830c08, -0x3e0830408, -0x41e3830c10, -0xc3c0830408, -0x3c0810208, -0x3c0830408, -0x21c0820410, -0x7e7c30600, -0x21e0820c10, -0x1e0820810, -0x3e2c10604, -0x21c2820c10, -0x3c0820408, -0x6140830400, -0x6141020408, -0x3c0830408, -0x41c1830204, -0x3e0830408, -0x20c0820810, -0x1e3820c10, -0x8240830408, -0x3c0810608, -0xa1e0820c10, -0x3e0830c18, -0xc141820400, -0xc1c0820410, -0xe1c1830c00, -0x3c0c10208, -0xe141820c10, -0x61c0820410, -0x1e2820410, -0x1c4810204, -0x51e2820c10, -0x40c1820810, -0x3c0c18204, -0xe1c0830410, -0x41e6828c10, -0x41c0820408, -0x3c0810608, -0x1c0820c10, -0xc1c1820400, -0x3c1830408, -0x3c3830608, -0x380810204, -0x43c0810204, -0xc081020400, -0x61e1820810, -0xe1c1c60c10, -0xe1e0830418, -0xc083830600, -0x1c2810408, -0x21e7890608, -0xe0c1020408, -0x1e0830408, -0x63e0830c10, -0x81e0820820, -0x61c1870c10, -0x61c1820c10, -0x8180810200, -0xc1c1820408, -0x6141820c00, -0x31e0820c10, -0xc0820410, -0x23c0830408, -0x3e0830408, -0x41c0830408, -0x3e7820408, -0x1c0810408, -0xc2800408, -0x1e0830418, -0x103c087060c, -0xc041020410, -0x60c1820400, -0x61c1820810, -0x3c6810604, -0xc0c1830608, -0x3c0810608, -0xe1e3830608, -0x2181030400, -0x43c0830410, -0xc140830408, -0x81c3c70408, -0x21e2810408, -0x23c0810204, -0x3c0830604, -0x7c0c18608, -0x1e2c10410, -0x41e0c30c10, -0x3c0810208, -0x3c0810408, -0xc1c0820c10, -0x23e4c10418, -0x61c1870800, -0x1c3c0838702, -0x3e0c30c10, -0x41c1830608, -0x40c1820c00, -0xc3c0810204, -0x1e0820c10, -0x1e0830410, -0x43e0830608, -0x3e0c10410, -0x21c2810608, -0x81c3860408, -0x1c1830608, -0x71c0820c10, -0x23c0830c10, -0x71e0820c10, -0x1e3820c10, -0x3c0820408, -0x6041860400, -0x7c081060c, -0x1c0830400, -0x4142830400, -0x3c1820408, -0x1c1870408, -0x1c0820800, -0x1c2820408, -0xc140830408, -0xc1c1878400, -0x1c2820410, -0x41c6810604, -0x81c0830400, -0x1c0820800, -0x41c1830410, -0x61c1820c00, -0xe041870408, -0x3e281020c, -0x60c1820c00, -0x1e6820410, -0x81c0830400, -0x41c2820408, -0x21c3820810, -0x3c0810204, -0x1e4830410, -0x1c0830200, -0x1c0810400, -0x3e0c30c30, -0xc3c0810608, -0x81c0820408, -0x3c0810608, -0x7c0810204, -0x3e0410410, -0x381c20400, -0x6041820c10, -0x3e0c30c10, -0x41c0830408, -0x3e4810608, -0x3e0830c10, -0x27c083060c, -0x103c3838418, -0x41c0830408, -0x1c0830400, -0x81c0810400, -0x1c2810608, -0x3c0810408, -0x20c3020408, -0x41c1c20408, -0x3e2c10200, -0x41c2830408, -0x103c0810608, -0x3e0820408, -0xe040820c10, -0x1c2810204, -0x71e3830c10, -0x81e2c10408, -0x3c0830408, -0x1e0830418, -0x3c1830204, -0x3c0830608, -0x23c2810608, -0xf3c18f0604, -0x3c0830408, -0x81e0830c10, -0x3c0810408, -0x11e6c30c08, -0x21c0820c10, -0x3e0820810, -0x1c0820410, -0x3c2810408, -0x3c0830408, -0x1c1830408, -0x21c2810408, -0x1c043cf0204, -0x63e0430408, -0x1c3820c10, -0xe1c0830e00, -0x61c1020410, -0x41e0820c10, -0x21c0820410, -0x3e0810408, -0x3e7830408, -0x3c0830408, -0x61e6c10608, -0x1e6830418, -0x3e7c30608, -0x7c0c18304, -0x43e4c10400, -0x3c3810418, -0x1e6c10400, -0x3c0830410, -0xc041820408, -0x43c0830408, -0xc0c1830c10, -0x1c1c1830600, -0x3e0830410, -0x1c0810604, -0xc0c1830408, -0x81c0810408, -0xc140830408, -0x83c0830608, -0xe0c1020c00, -0x1e4410200, -0x1c0830200, -0x1c0c0870608, -0x31a0820820, -0x3c6810204, -0x4142830400, -0x3c183060c, -0x4183c30600, -0xe1c1870c10, -0x3e0820410, -0x1e3820410, -0xe0418f0200, -0x6040830800, -0xc1e0c10c10, -0xc0c1860400, -0x43c0c10608, -0xc1c1c70c10, -0x1e0830c10, -0x1c0830400, -0xc0c0c60408, -0x61e2820810, -0x41c1820400, -0x21c2830408, -0xe3c1878c10, -0x7c0818304, -0x21c0820408, -0x3e7c98200, -0x3c0810204, -0xc040830410, -0x1c2830418, -0x1c2830408, -0x11e68b0400, -0x21e0820c10, -0x23e5c30c10, -0x3c4810204, -0x23c3878c10, -0x81e0830c10, -0x1e0820c10, -0x61c1820c10, -0x3e4410204, -0xc1c0830c10, -0x3c0830608, -0x3e0c30c30, -0x41c0830410, -0x10c0820000, -0x7e0810408, -0xc0820800, -0x1c3020408, -0x33e0c78c00, -0x43e0c30c10, -0x1c1830408, -0x63e0830c10, -0x11e68b0c10, -0x8381830408, -0x121c1870204, -0x3e4810204, -0x41c6838408, -0xc140830400, -0x1c1820408, -0x3c081020c, -0x43c0830408, -0x7c8c18304, -0x1c0820408, -0x1e6830608, -0x41c0810608, -0x3e0830408, -0xe2830410, -0x21e4810418, -0x21c2830418, -0x3c3810204, -0xc0820c10, -0x61c0830408, -0x20c3820810, -0x3c0810408, -0xc1c0870400, -0x1e4410410, -0x1e4810408, -0x1e2830410, -0x3e0830c18, -0x1e4820800, -0xc1c0830410, -0x3c4810608, -0xc1c0830608, -0x3efc10200, -0x6341830408, -0x61c1070408, -0x6041820810, -0x1c3c20400, -0x1e1820c10, -0x1c1820408, -0x61c1820c10, -0x1e7c98300, -0x3e0c10400, -0x3e2830410, -0x83c3830604, -0x4141020c00, -0x11e1820810, -0x6043820400, -0x21c0820400, -0x3e4830410, -0x1c1c0830408, -0x83e4810608, -0xc1c0830410, -0x21e0820830, -0x1e0830410, -0x3e4c1060c, -0x4140820400, -0x3e0810608, -0x41e0830c10, -0x3e1820c18, -0x3c081040c, -0x43c0810608, -0x61e2830c10, -0x10e10e0800, -0x3c6810204, -0x60e0820800, -0x3c0810608, -0x21c1820408, -0x67e0810608, -0xc1c1820404, -0x3c0810204, -0x81c3870400, -0x1e0820c10, -0x1c0820408, -0x83c0810608, -0xc181830604, -0xc3c7830608, -0xc1c1830408, -0xe3e1830408, -0xc0c1830600, -0x1c0830408, -0xc3c1870408, -0x43c0830604, -0x3c0c30408, -0x83e0c30c18, -0x61e2890410, -0x61c0820810, -0x3e0c10400, -0xc240830400, -0x21e1820c18, -0x3c1830604, -0x30e1860800, -0x1c0810408, -0xf360820c10, -0x1c1820408, -0x83c0810408, -0x21c0820c10, -0x40e1871c00, -0x63e4830408, -0x1c0820810, -0x73e0c10410, -0x3e0810408, -0x81c0810408, -0x43c0830408, -0x3c1820408, -0xc3c6810204, -0x41c0820408, -0x1c0020408, -0x6041860810, -0x41e4830c10, -0x23e4c10608, -0x3c4c18304, -0x1e2820c10, -0x63e4810204, -0x23c583060c, -0x21e0820c10, -0x1c181078400, -0x41e2820c10, -0x21c0820c10, -0xe3c0830c10, -0x1c0820410, -0x21e2830c10, -0x1c0820410, -0x1c1860800, -0x1c3830400, -0x1c6810608, -0x41c0830408, -0x47c1830604, -0x3c1878400, -0x1c2830408, -0x23c0820408, -0x6140820810, -0x1e2820820, -0x3c0810408, -0x3e0c10408, -0x83e0c30c10, -0x3c2810200, -0x1e6c10410, -0x81c0830408, -0x81c0830c10, -0xe1e1830c10, -0x60c3c60c08, -0xe240810600, -0x41c1820408, -0x31e3820c10, -0x1c0820410, -0x3c0c1060c, -0x41e0830c10, -0x21e0830c30, -0x3c0830408, -0x3e0c10618, -0x60e1861800, -0x1e6c30418, -0x3c0810608, -0x1c0820408, -0x1c0820800, -0xe3e0830c10, -0xc3c0810204, -0xe0e3860800, -0x83c0830408, -0x23c0830408, -0x1e0820810, -0x1c0820408, -0x41e0820820, -0x182810208, -0x7e0c30608, -0x11c1820800, -0x31e2830c10, -0xe160820c00, -0x4041020408, -0x1c1c3c70608, -0x43c0810208, -0x1c0830408, -0x1c2820418, -0x3e0c30c10, -0x2041020800, -0x61c2830418, -0x41c0820408, -0x23e0830410, -0x1e2820c10, -0x1c0830410, -0x43c0870408, -0x3c0810608, -0x3e0c30c00, -0x83c0810408, -0x3e0830408, -0x3e2450204, -0x3e2c30c10, -0x4140810408, -0x6140820400, -0xe1c1830c00, -0xe6c10400, -0x7c30400, -0x41c3820408, -0x3c0830408, -0x1c0830418, -0x140810408, -0x21c1820410, -0x41c0830408, -0x1e1830c10, -0x1c1810204, -0x1e6830418, -0x183c20400, -0x63c1820c18, -0x11e7820c10, -0xe0c0820c10, -0x3c0830608, -0x43c0830604, -0x1c3c0c10608, -0x60c3860800, -0xe141830400, -0x3e6c30400, -0x1c2810408, -0x3c0810200, -0xc1820c10, -0xe140820c10, -0x1e0820410, -0x3c0820408, -0x21c2820400, -0x3061861820, -0x3c0830408, -0x23e4810418, -0x1e7c10608, -0x1c2810200, -0x183c1c30c00, -0x380818204, -0xe0c3c20c00, -0xc1c183060c, -0x41c0820408, -0x63c0830408, -0x7e6410204, -0x1e4830410, -0x3c1820408, -0x80c0c30800, -0x61e0830c10, -0x81e0820c10, -0x83c083060c, -0x1e6810408, -0x3c1810204, -0x3e0c30c18, -0x23e6878204, -0x71e0820c18, -0x1e2820810, -0x61c1820c10, -0x43e0830410, -0x40c1820c10, -0x3c0830604, -0x83e0830608, -0x3e0830c18, -0x61c0830408, -0x1e0830410, -0x1c0820810, -0xe041870800, -0x43c4810200, -0x1c1830400, -0x3e0810408, -0x1c0830608, -0x1c2810408, -0x1c2820410, -0xe1c1820c08, -0x80c0810408, -0x61e0820c10, -0x60e1c70800, -0xc1020c00, -0x41c0820408, -0x41c0810400, -0x61c1830408, -0x41c0870408, -0xc041830600, -0x1e6c30c00, -0x3c3810204, -0x3c4810204, -0x3c081020c, -0x63e1830e10, -0x3e6830408, -0x3e0830c10, -0xe122c10400, -0x3c0820408, -0xc3c0818204, -0x3c0810408, -0xc1e0830c10, -0xc1c1830c10, -0x1c3820c10, -0x1c2830410, -0x1c2810200, -0x1e0830408, -0x21c0820410, -0x3e7810200, -0x81e0820c10, -0x7c0c10204, -0x41c0830c10, -0x3e0830410, -0x133e1820e00, -0xe040831c00, -0xc140830408, -0x3c0830408, -0x21c0820c10, -0x3e0830c10, -0x3e3c30c10, -0x31e2410c10, -0x1e0830400, -0x1c0820408, -0xc1c1830608, -0xc3c0870408, -0x81c0820400, -0x20c1820400, -0x3c0810204, -0x30a2820800, -0x1c2810400, -0x83c0810608, -0x3c0810204, -0x1c0810600, -0x3c4830608, -0xc041830400, -0x21c2820c10, -0x41020408, -0x1c1820810, -0x81c1830600, -0x1c2830408, -0x1e0830c10, -0x1c6810408, -0x1c0830408, -0x3c0810408, -0x23e0c30410, -0x1e0820820, -0x83c0c30418, -0x1c0c1870200, -0xc0c0820800, -0x7e0810408, -0x103c1830604, -0x1c1c1830608, -0x103c6c10204, -0xe3e4c1860c, -0x80c0820800, -0x61e6c10410, -0x3c0830408, -0x1e0830410, -0xc1e0820810, -0x31e6c30c10, -0x21c0820410, -0x1c2850204, -0x1e2830408, -0x60e1861800, -0x3c4830608, -0x3e0410410, -0x1c2850208, -0x81c0810408, -0x1c0c1878e00, -0x1c2810604, -0x101c0830400, -0x3e4830418, -0x1e3820c10, -0x23c0830408, -0x1c3c0810408, -0x21c0830408, -0x183c0810608, -0x1c2830408, -0x3c0830408, -0x1c4830408, -0x41c1870c10, -0x183c081060c, -0x380810204, -0x3c2830408, -0x41c0820408, -0x3e3c30200, -0x3c0810208, -0x1c0830408, -0x1e7810608, -0x21c0820418, -0x103e6c58204, -0x1c0810408, -0x1c2810204, -0x1c2830408, -0x1e2820c10, -0x1e6830410, -0x81c0830408, -0x23a0820810, -0x1e4830608, -0x1c2830418, -0x41e1820c10, -0xc160c30c10, -0x61c0830408, -0x61c1830408, -0x21e0830c30, -0xe3c1870408, -0x41c1870604, -0x43c0830408, -0x1e4830410, -0x41e0010c10, -0xc1c1830600, -0x61e2830410, -0x1e3830c10, -0xc0c0820408, -0x3e0830c10, -0x1c0810408, -0x61c3820c10, -0x3c0830604, -0x1f1e1830c18, -0x6140820800, -0x1c0810408, -0x23e4830408, -0x21e0820c10, -0x1e4810408, -0x61c5878408, -0x1e6830c10, -0x81c2830408, -0x20e2820c10, -0x3c2810204, -0x1c1820408, -0x3e4830418, -0x23e4810408, -0x1c0820408, -0x41c1820408, -0xe1c4830608, -0x3c0830408, -0x61c1820810, -0x6140820c10, -0x1c0c0810400, -0xc3c0830400, -0xc7e0c3060c, -0xe0861820, -0x7c0810304, -0xc1820810, -0x3c0830408, -0x3c0820408, -0x41c0820410, -0x1c0820c08, -0x41c1820408, -0x7c0810204, -0x1c2820408, -0xe041820408, -0xc1c0820408, -0x43c083060c, -0x3e0810408, -0x1c2820408, -0x81c2810608, -0x61e6830c10, -0x11c3820c10, -0x6120820810, -0x43c0810608, -0x3e0830c10, -0x41e6830400, -0x21c1020410, -0x1c0820408, -0x21c0870600, -0x3c6c58204, -0x4141860408, -0x1c0830400, -0x61c1879820, -0x3e4810608, -0x1c1820408, -0xc181030604, -0x1c1c0830408, -0xc3c0830408, -0x4041020400, -0x83c0830608, -0x60c1020400, -0x3e0c10608, -0x41c0820410, -0x1e0820418, -0x20c1820810, -0x3c2810608, -0x3e0830410, -0xe1c1870408, -0x3e0830408, -0x81e0830408, -0x81c0820400, -0x21c1820c10, -0xc0820418, -0x1c2830408, -0xc3c0830608, -0x3e0820c00, -0x6061860800, -0x1e0830c10, -0xc1c1c60c10, -0x43c0828500, -0x3e5830c18, -0xe3c1860c08, -0x63c1870600, -0xc040830c00, -0x3c0830408, -0x40c1020408, -0x3c0810418, -0x3e2c10608, -0x83e3c30608, -0x1c6830608, -0xc1c0830408, -0x3e3c30410, -0x41c2820408, -0xe1c3830608, -0x1c0820c10, -0x63c4830c10, -0x1c0830410, -0x4141860400, -0x1c2810410, -0x3c7810204, -0x1c0810408, -0x1c0810408, -0x63c0810608, -0x61c0830c10, -0x1c0810408, -0x1c0820400, -0x3e3830c00, -0x33e1820c10, -0x1e0820c00, -0x61c1020408, -0x21c1470400, -0xc1c3820408, -0x3c0810208, -0x83c0810608, -0x3e0830418, -0x1e4c30410, -0x3e4830604, -0x3c4830608, -0x21c1030400, -0x1c0820c10, -0x1e2820830, -0x1c040810410, -0x41c2810608, -0x8181830608, -0x1c0820400, -0xe3c0830608, -0x1e3c1870604, -0xe1e0820810, -0x3c0810608, -0xc3e0c30c10, -0x43c081020c, -0x11e6830c10, -0x43c0830408, -0xe2820800, -0x23c0830408, -0xc3c0830408, -0xc3e0830408, -0x41e6458608, -0xc1c4830408, -0x3c0810408, -0x1c0810204, -0x21e0820820, -0x21c1878c10, -0xe041020810, -0x1e7830418, -0x63c1830408, -0x3c4810204, -0x41e2830c10, -0xe0e0820810, -0x61c1860c00, -0x61e0831810, -0x4181070604, -0xc1e0830c10, -0x1e6c20820, -0x4041820800, -0x3c0830408, -0x1e3830c10, -0x4140820408, -0x1c2810204, -0x7c4c18204, -0xe0c0820408, -0x3c1c30400, -0x382838200, -0xc0c1820c00, -0x1c0810608, -0xc3c0830408, -0xc040820400, -0x11e0820c10, -0x23e0830418, -0x61c0830c10, -0x8080810400, -0xc3c0830408, -0x41c1820c10, -0xc140830408, -0x61c1820408, -0x1c6810204, -0x41c683060c, -0x61c1820408, -0x3e4830608, -0xe2820820, -0x3c0830408, -0x41c1c38410, -0x23c0810604, -0x61e0830c10, -0x3c0820410, -0x83c0810408, -0x1c7830408, -0x1e0c30c10, -0x63c1830408, -0x23c1830408, -0x7c0810204, -0x11e0820830, -0x3e3830400, -0xe141820820, -0x3c4810206, -0xe041820c10, -0x83e6830418, -0x1c3c3c78400, -0x381810204, -0x61c1870c00, -0x51e1861820, -0x1c040830408, -0x21c7830608, -0x1c0c1830400, -0x7c0810204, -0x80e1820c00, -0x1c0810208, -0x3c1830608, -0x1c0810408, -0x1c2810608, -0x43c0810204, -0x1c4810408, -0x61e0830c10, -0x3e0810410, -0x80c1820408, -0x1c1e0820c10, -0x23c1870600, -0x1c0830204, -0x3c0810604, -0x3e0410608, -0x1e1820c18, -0x1c2830408, -0xe240830408, -0x41c3830408, -0x1c0830408, -0x3e0830c10, -0x1c2870608, -0x1c0810604, -0x1e4c90410, -0x1c283060c, -0x3e0830408, -0x23c3830408, -0x3c0810608, -0x43e0830c10, -0xc3c0810608, -0x1e3e1c70608, -0xe1c1830408, -0x3c0820410, -0xc0c0820408, diff --git a/samples/digitrec/digitrec/data/training_set_8.dat b/samples/digitrec/digitrec/data/training_set_8.dat deleted file mode 100644 index 16b3330da..000000000 --- a/samples/digitrec/digitrec/data/training_set_8.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x7163850e00, -0x6141030e00, -0xe1e1870e00, -0xe1c3858f00, -0x29f3860c10, -0xe1c1870e08, -0xc143858e00, -0x71e7860e18, -0x31c3850c00, -0xf1a3870c00, -0xf3e7c89f00, -0x71e3870e18, -0x60c1870c00, -0x7163860c00, -0x61e3830608, -0xc181c30600, -0xf1c60800, -0x60c1060c00, -0x60c1860c00, -0x71a1c71c00, -0x71e3830e00, -0x60c3050e00, -0x31e3861800, -0x4141820600, -0x71e3861e00, -0x30e30e0e00, -0x6147830c00, -0x31e38f1e00, -0x6143820408, -0x61c1820e00, -0x71e3870c00, -0x61e3870e10, -0x61e1c60c10, -0x60c1830600, -0x21b4c60c08, -0xe3c3c48f00, -0x20a2060c00, -0x6103e60c08, -0xc3e1858f0c, -0x71e3850e00, -0x60e18e1410, -0x61e1860c00, -0x4081830400, -0xc1c3870e00, -0xf367858e00, -0x7183c71c00, -0x71e1870c00, -0xe343878f00, -0x21e3860c10, -0x41e3070e08, -0x60c3830e08, -0xe141830e08, -0xf1638f0e00, -0x71e3861c10, -0x4181850e00, -0x61c1830e00, -0x61c1830e00, -0x4083840600, -0x61c307870c, -0x6183058f00, -0xc1c7830604, -0xf1e3870e08, -0x3161870e00, -0x11c3070e00, -0xf1e3cd9e00, -0x61f3870c00, -0xc1c3030200, -0x20c1860c00, -0x6163870c00, -0x71c1860c00, -0x61c1830c00, -0x30e3850e00, -0x61c3870e08, -0x6163870e00, -0x61e1860e00, -0xe3e3489f08, -0x61e3050e00, -0x30e1050c00, -0x61c3060e00, -0x60e1860c00, -0x60e1861c00, -0x7181870c00, -0xf303858e00, -0xc0c1850000, -0xc1c1830e00, -0x60c1850c10, -0x20e1861c00, -0xf323870e00, -0x31e7870e0c, -0xc163850e00, -0x6021860c00, -0x71e1060c00, -0x61c3870e00, -0x61e3870e00, -0x61c3060600, -0x7123861c00, -0x11e3820c00, -0x71e3860c00, -0x7123cd9300, -0x60c1030e00, -0x30e1060c10, -0x3e3c21c00, -0x61e3870e00, -0xc1e3070600, -0x61c1820c00, -0x6083c60c00, -0xa1c3058f00, -0x71238e1c00, -0xc141830600, -0x71e3060e18, -0x61c1870c00, -0x6143858e00, -0xe1c3030600, -0x21e3830c10, -0x60c1820c08, -0x60e3860c00, -0x3f3860c00, -0x6103c49100, -0x2081860c00, -0x6141870e08, -0x61e1820c00, -0xe243c8ce00, -0x6103848800, -0x61e7870e08, -0xc123870600, -0xe143860e00, -0x71e1860c00, -0x71b3c61c00, -0x31c3870c00, -0x61e3870c00, -0x30a1820c00, -0x71a3871a00, -0xf343869e00, -0xc1e2870600, -0x71e1871e10, -0xf103850e00, -0x61c1870c00, -0xc141870600, -0x50e1861c00, -0x70a1861c00, -0x71a38e1c00, -0xe1438c8f00, -0x61e3870e00, -0x6141060c00, -0x31a38f1c00, -0xf1a1870c10, -0x70e3061c00, -0x6081030400, -0x7081830c00, -0x61c3070600, -0x7163070c10, -0x6141850c00, -0xc141830600, -0xe263070f04, -0x7322c70c00, -0x60c1020c00, -0xc3c3820604, -0x61e1870e00, -0x1c1c1870e00, -0xd161830e00, -0xe343030600, -0xe343830600, -0xe1e1870e00, -0xc373c70e00, -0x61c1060e08, -0x61c1830e00, -0xc363878e00, -0x6143820c00, -0x71a3c99c00, -0x70b1861c00, -0x6146878608, -0x70c1851e00, -0xe3e3830600, -0xe1c1050e00, -0x61c3070e08, -0x6141820600, -0x6143878f00, -0xc141838600, -0x61c3060c00, -0x41c1830c00, -0xe1c3850e00, -0x6141030608, -0x8e38e1800, -0x68f1861c00, -0x30a38e1800, -0x30a1860c00, -0x61e3860c00, -0x31e3850e00, -0xe1e3830e08, -0x6041040a00, -0x70e3861c10, -0x71e1870c10, -0x6143060e00, -0xe3c3830e00, -0x60c3860c08, -0xe141870c00, -0x6123860c00, -0x4103830600, -0x61e1850e08, -0xe143858e08, -0xe1c3858f00, -0x6141870e00, -0xf3e3c70e00, -0x6141070c00, -0x6141030600, -0x6081820600, -0x50c1060c08, -0x11238e1c10, -0x60c3850c00, -0x30a1861c10, -0x60c1020400, -0x61e3830e08, -0x6143850c00, -0xe3e3838e00, -0xe0c1870e00, -0x71e3061e08, -0x61a18d1e00, -0x60c1050c00, -0xf323830e18, -0x71e3860c00, -0x70e1871c00, -0x61e1860c00, -0x6041020c00, -0x30e1870c00, -0x60e3860c00, -0xc183c28600, -0xe161870e08, -0xe121830c00, -0x61c3848e00, -0x7143820c00, -0x61c3860e00, -0x61e3c70c00, -0x61e1060c08, -0x61e30e0e00, -0xe1c3850e00, -0x60c1060c00, -0xc1c3830e00, -0x6081820c00, -0x61c1870e00, -0x61c1861c10, -0x60c1850e00, -0x612286060c, -0x61c1870e00, -0x4173851c00, -0x23e7860e00, -0xc1e3860e08, -0xf343078f00, -0x31e3861c10, -0x60c1850e00, -0x61c1070e00, -0xc3e3870e08, -0xc383c48f00, -0x1c2e7838f08, -0xe083070e00, -0x61e1871e10, -0xe1238d1a08, -0x61e3860c00, -0x61c1870e08, -0xb0a1870e00, -0x30a38a1c00, -0x71a2860c18, -0xf1e3870e08, -0x60e3870c00, -0x70c1870c00, -0xe143020c00, -0xf3c3078700, -0x60c3850e00, -0x7163860c00, -0x4143830600, -0x31c3870c00, -0x61c1020608, -0x7122820c00, -0xe1c1840a08, -0x71e3060c10, -0x30e1861c10, -0x6163820c10, -0x6021860c00, -0x61c3870e08, -0x71a1870c00, -0xc3c383060c, -0x6163860c08, -0x60e3860c00, -0x6163870e00, -0x6183850e00, -0x61e1820c00, -0xe123878e00, -0x1a3cf0800, -0x6181860c00, -0x70c1860c00, -0x91c1850c00, -0x20e3891c00, -0x71e1871e18, -0x50e3070e08, -0x60e38e0c00, -0x7121860c10, -0x31c1870c00, -0x61a3830c00, -0xf1e38f1e18, -0x30c1061c00, -0x61c3870600, -0x71e3860e00, -0x61c1870608, -0x73e3870e08, -0x61e1870c00, -0xe1861000, -0x30c3850e00, -0x71e1870c00, -0xc3e3878700, -0x4143020600, -0xe141870e04, -0x7161860c00, -0xc1a1870c00, -0x71e3860c10, -0x61e3860c08, -0xe183848f00, -0xe163850e00, -0x61c38c8e00, -0xc0c1830600, -0x71c1860c00, -0xc1e1870600, -0xe363870f08, -0x70e3870e00, -0x71e3870e00, -0xe3c3848f00, -0x71c1871c00, -0xb143030600, -0x1e321830e08, -0x61c3870e00, -0x6121850e00, -0x61c3850e00, -0x6062820e08, -0x6143850e08, -0x71f1861c00, -0x31e3871c00, -0x60e1860c00, -0x71e3850e08, -0xe161870e00, -0x71a3c49e10, -0x61a1870808, -0x2183c28e00, -0x70a3861800, -0x6123870a00, -0x60e1c60c00, -0x6163830600, -0x6141860c00, -0x60c1850e00, -0x61e1830e08, -0x6143850e00, -0x7101070600, -0x71e3870e00, -0xe1c3848e00, -0x7193c70e00, -0xf163858e08, -0x61e3c70c10, -0xc1c3850e00, -0xf323870e00, -0x71c3861c00, -0x6143448e00, -0x71e38f0e00, -0x7163830c10, -0x41c3830400, -0x71e3e71c10, -0x71e1870e08, -0x31e3860c00, -0xe1c3070e08, -0x6081c60c00, -0x61c1830e00, -0x61c1870c00, -0x6143850e00, -0xf121860c00, -0xe1e3870e08, -0x30e1871e00, -0x60c1870e00, -0x60c1871400, -0x7363870c00, -0x30e38e1c00, -0x71a1870c00, -0x60c1870c00, -0x30c1820c00, -0xe1c3848e00, -0xc14183060c, -0x3081ce1800, -0xe1c3830600, -0xf121850e00, -0xc1c3c48700, -0x71a3860c10, -0x60c1850e00, -0xe1c1070e00, -0xe3e3870e0c, -0x61c3070e00, -0xe141830e00, -0x6143070e08, -0xe3c3858e08, -0xe0c1870600, -0x6061870c00, -0x20c1870c00, -0x20e1870c18, -0x7143060c00, -0xe1e3870e00, -0x30e38e1800, -0x61e3ce0c00, -0x7143871e10, -0x4161860c00, -0x70e1861c10, -0x30e1870c00, -0x6143850e00, -0x60e3060e00, -0xe103c70e00, -0x40c1860400, -0xe141030608, -0x61c3070e00, -0x60e3060c00, -0x71e3860c00, -0x61c1871e00, -0x61c3820600, -0x1e363858f04, -0x60c1020c00, -0xe173870e08, -0x71e1860c00, -0x61c1870e00, -0x3041060c00, -0xe121870e00, -0x71e3870e08, -0x61c3060c00, -0x31e1070c18, -0x6181850e00, -0x1e38e0c00, -0x30c1060c10, -0xe143848e00, -0x71e3861c00, -0x6163850e00, -0xe247878300, -0xe247c48f00, -0xc3c1830e08, -0x60e1871c10, -0x41e1851c00, -0x70e1861c00, -0x30c1060c00, -0x61c3060c00, -0xe141870e00, -0x61c3070c00, -0x71e3ce1c00, -0x51e3070e08, -0x60e1820c08, -0xc143830600, -0x6163870e08, -0x60c1060c00, -0x70c1850e00, -0x1e1e1850e08, -0xe1c3850e08, -0x61e3870e08, -0x60c1860c00, -0x73e7860c10, -0xe1c3848e00, -0xc387c68600, -0xd1a3cf0c00, -0xc143838600, -0x40c1820400, -0xe141030600, -0x6141820c00, -0xe181050e00, -0x61c3870e00, -0x7163850e08, -0x60c1020400, -0x61c1050e00, -0xe123850e00, -0x61c3070e00, -0xe1c1870e08, -0x60c1830c00, -0x61c1860c00, -0x71c3870c10, -0x6141870e00, -0xe1c1850e00, -0x1f3870c00, -0x6123c60c00, -0x1f1c1878e00, -0x6181c70e00, -0x61c1820c00, -0x71e3860e08, -0xe143830e08, -0x61c1070c00, -0x61c3820400, -0x60c3860c18, -0x60c3020408, -0x41e1860c00, -0x60c1820c00, -0x1c321838f08, -0x71e1870c00, -0xe143830600, -0xc141870e00, -0x6101820600, -0x11e3871e00, -0x71e3c61c18, -0x60c1860c00, -0xe323c60c00, -0x1e3e7c78900, -0x61c1870e08, -0x6143060c00, -0x61c1030600, -0x30c1860c00, -0x71c1070e00, -0x31e3860e00, -0x61c3830c00, -0x61e3c70e18, -0x61e1860c00, -0xc1d3c70e00, -0x7163860c00, -0x61e3850e08, -0x71e3851e10, -0x6132c61c00, -0xe143c88f00, -0x71e1870c00, -0x1f3860c00, -0xc081010e00, -0x61c1870e08, -0x6101850e00, -0x41c3830a08, -0x10c1060c00, -0x6123c61c10, -0x71e3861c10, -0xf3e7cf8e00, -0x4143020600, -0x61c3870e00, -0x60e1861c10, -0xe1e1c51c00, -0x71a3860c00, -0xe3e3c70e08, -0xc141030600, -0x61c1830c00, -0x61c3030604, -0x61c3850e00, -0xe1c1870e08, -0x71e3860c00, -0x60c1870c00, -0x6143070e00, -0xc183030300, -0xe141870e00, -0x60c3850e00, -0x31e3858e00, -0x79a1861e10, -0x61c1850e00, -0x61e3860c00, -0x71e1871c00, -0x31e3870c00, -0xc141030a08, -0xc1c3030600, -0xf163870e08, -0x61a3860e00, -0xe143850e08, -0xf1338e1c00, -0xc0c1858e00, -0x60c1060c00, -0x71a3060e00, -0xc141830600, -0x61c1870c00, -0x61c1830e08, -0x63c3830e08, -0xf122870e08, -0x61e1830e00, -0x71e3860c10, -0xe263858700, -0xf3e7870f08, -0xd3e787870c, -0x6143820c00, -0xc1c1830600, -0x30c1870c00, -0xe3c7838704, -0x1f3c60c00, -0x7123850e00, -0x61e3870e00, -0x6141070a08, -0x6143850c00, -0x6101830e00, -0xd367870e00, -0xe3c3838700, -0x61c3070e00, -0x6923860c00, -0x70e1851e00, -0x4041820600, -0x71a3850c00, -0xc223878600, -0x60c1870e00, -0x30c1870c00, -0x71e1870e00, -0x6181030600, -0x6143851610, -0x7121860c10, -0x61c1050a08, -0x60c1060c00, -0x60c1030e00, -0x71a1861c10, -0xf1c1030c00, -0x71a14e1c00, -0xe163870e00, -0x71e1870c18, -0xe1e1850e00, -0x30e1860c00, -0x30c3020c00, -0xe102c30600, -0xe141030e08, -0x60c1020c00, -0x61c3870e00, -0x61e2c61800, -0x61c3870e00, -0x61c3830e08, -0xf1f3851e00, -0x60e1840c00, -0x61c3860c00, -0xe1c1850c00, -0x71e38f1e10, -0x40e1020c00, -0x31e3850e00, -0x31c3030a08, -0x61c3030600, -0x4141820600, -0xc1c3070600, -0x71e1871c00, -0x7163861c10, -0x60e1860c00, -0x6143070600, -0x61e3860e08, -0x71e3c61c30, -0xe141870e08, -0x71e2860c10, -0x70a1cd1e00, -0x71a3860e10, -0x71e3c71e00, -0xe043830e00, -0x1c2c3878700, -0x4163030c00, -0x6123861c10, -0x10c3861c00, -0x60c1830c00, -0x60c1820c00, -0x6161870c00, -0x7081860c00, -0x31c3030408, -0xf1e3850e08, -0x60a3ce1c00, -0xc3e7838704, -0x6161830e00, -0xf3c3070702, -0x71e3871c00, -0x30e1861800, -0xc141060e00, -0xc3c3030700, -0x60a3ca1c00, -0x60a1861c10, -0xe141030600, -0x6143850c00, -0xe141830600, -0x5163861c10, -0xe1e1870e08, -0x2081020c00, -0x71238f1e00, -0x61e3860c08, -0x61f38f1c00, -0x61c1870e00, -0xe1c1c70608, -0xe3438c8f00, -0x6161870e00, -0x60e1870e00, -0x61c3c68e08, -0x61e3c30430, -0x71e3850e00, -0xc143860e08, -0x61c1850e00, -0x61c1860e00, -0x61c1030e00, -0xc0c1830c00, -0xe143850e00, -0xe3e3850f0c, -0x63e7870608, -0x71818f1c00, -0xe163860e00, -0x60a1820c00, -0x61c3870600, -0x6373860e00, -0x30a1860c00, -0x61c1030600, -0xc183850700, -0x51c1820c00, -0xc363070e00, -0x2123850e00, -0x6163820c08, -0x61c3851e00, -0x60e1860c00, -0xf3c1830600, -0x71e1861c10, -0x41e3870e08, -0x6141850c00, -0x73c3830e00, -0xe1c3050e00, -0x7303c71c00, -0x6103030600, -0x71c3060c00, -0x70e38f1c10, -0x71e3871c10, -0x71e38d1c10, -0xc343830600, -0x61c3830600, -0x61c1870c00, -0xe123870e00, -0x61c1030600, -0xe161830e00, -0xc1c3870e00, -0xe1c3830600, -0x6143860e08, -0xc1e3c60c10, -0x60c3050e00, -0x60c1820c00, -0x61c3820c00, -0xe3e3870e08, -0x71b3c61c10, -0x71c1070c00, -0x61a3860c00, -0xe227c89f00, -0x6141070e00, -0x60e1860c00, -0xf1e3870e00, -0x41c1870c00, -0x3e7848f00, -0x6143830600, -0xe161870e00, -0x61e3850e00, -0xe1c1850e08, -0xe3e3c70e00, -0x71a1860c10, -0x30e18e1800, -0x6143050e00, -0x10e1061800, -0x70e1871c10, -0x1e3c61c00, -0x61c3870e00, -0x20c3830c00, -0xc1c1850e00, -0xe347878f00, -0x61c3030600, -0x6367838700, -0x6143820c00, -0xe343850e00, -0x6103830600, -0x30a3860c00, -0x23e7c78e00, -0x6163870e00, -0x33e1861410, -0x30e1861c10, -0x61c3810e00, -0x61c3c48f00, -0x60e1870c00, -0xe141830600, -0x61c3870e00, -0xe143870e00, -0xe161860c00, -0x31e3870e08, -0xe1c1850c10, -0x71e3cf9e00, -0xe363870f00, -0x61c1870e08, -0xf163870e08, -0x71e38f1c00, -0xe181850600, -0xc3c7c68700, -0x61c1860c00, -0x6161820e08, -0x183c3878700, -0x70a3860c00, -0x6141830e00, -0x4141830400, -0x40c1070600, -0xf123cc9e00, -0xe1038d0e00, -0x3e3c20c00, -0xe123830600, -0xc1c3830700, -0x6143870c00, -0x71e3860c00, -0x60c1070e00, -0x6163c70c00, -0xe1861800, -0x8180830600, -0x60c3850a08, -0x1e303830704, -0x61c3070e00, -0x6163820608, -0x61e3870e00, -0x4161860c00, -0x71b38e1c00, -0x71e3861c00, -0xe7ca1800, -0x71e3870e00, -0xe0e1870e00, -0x61e1870c10, -0x70c1060c00, -0x61e18f1c00, -0x63c3830608, -0x6141820c08, -0x60a3ca1800, -0xe1c1830600, -0x71a3860c10, -0xe163c60c00, -0x60c1870c00, -0x70c1860c10, -0x61c3070604, -0x20a3860c00, -0x8e3860800, -0xe0c3850d04, -0x4113850c00, -0x61c3850e00, -0xe161070a08, -0x21c1870e08, -0x60e1860c10, -0x2081850c00, -0x6343030608, -0xe141070e00, -0x6141870e08, -0x30a3860c10, -0xe163030e08, -0xe143828e00, -0x61c3860400, -0x7103820e00, -0x1c363858700, -0x4041070600, -0xe123830e08, -0x7141060c00, -0xe1c1870e08, -0x61c1870e00, -0x71c1870e00, -0xe223830e08, -0xe141870e00, -0x31e3860c00, -0xe143848e00, -0xc181070400, -0x21e3cf0800, -0x61e7860c00, -0x71e3861c10, -0x71e3871e10, -0x31e1871c00, -0x1e3e7cf9e00, -0x61c1870c00, -0xe1c3848e00, -0x71e3860e00, -0x71e1850e00, -0xe023860c00, -0x61e1820e00, -0x30a2820c00, -0xe247c89b08, -0x51e3060c00, -0xc141830e08, -0x71c1870c00, -0x71e1830e00, -0x7123861c00, -0x6081850c00, -0xe1c7cc8e00, -0x60c1860c00, -0x61c1030600, -0x6163860e00, -0x61c1830c00, -0x6143830e00, -0x6143820400, -0x71a1870e00, -0x61c3830e08, -0x7081850e00, -0x6143870e00, -0xd163850e00, -0x60c1860c00, -0xe143858e00, -0x1071c3879e00, -0xc3f3c51e00, -0x61c1830c00, -0x6143848f08, -0x6141830e00, -0x71a1861c10, -0xe103c60c00, -0x71e1060c00, -0x71e3870e00, -0x71a3851e10, -0x20c3020600, -0xc3c3830704, -0x6123830c00, -0xe1c1830e00, -0x50b3ce1800, -0xc161870e00, -0xc143c60608, -0x11c3070600, -0xe1e3070e0c, -0x43c3830208, -0x6163860e00, -0x30c1861c00, -0x49e3860e00, -0x61c1870e00, -0x70a1861c00, -0x61c3858e00, -0xc143850600, -0xe343808f00, -0x31e3861c00, -0x61c38d9e00, -0xe1c1870e00, -0xe103830600, -0xe363870e08, -0xe1c3c48e00, -0x61c3070e08, -0x61e3c70c00, -0x71e3850e10, -0xc0c1870c00, -0x71a18e1c00, -0x20e3840820, -0xe343870f08, -0x6141070e00, -0xc0c1050c00, -0xf1e1870e18, -0x61e3860a00, -0x20c1060c00, -0x7123cf0e00, -0xc143830600, -0x6143868f00, -0xe3c3870e00, -0x83b3c30608, -0x61c3070e00, -0x6143850e00, -0x71a1860c00, -0xe163870e00, -0x20c1060c00, -0x19e3060c00, -0x70e38d0e00, -0xe1c3870e08, -0x4143c30600, -0x30e1861c10, -0xf3e3c70e08, -0x71e3871e00, -0x61e3878e08, -0x6143848e00, -0xe143868f00, -0x61e1870c00, -0x30e1861410, -0x61c3870c00, -0x7123c61c10, -0x61c1850e00, -0xe143858700, -0x61c3c49e10, -0x71a3c58e00, -0x61c3850e00, -0x60c1870e00, -0xe1c3c48f00, -0xe081848e00, -0x41c1830e08, -0x61c1870e00, -0xf1e1860c00, -0x6141060e08, -0xe3e3870e08, -0x61e3070e00, -0x60c3870c00, -0x49e30f0c00, -0x6141850e08, -0x61c1830c00, -0xc183830600, -0xf367870f08, -0x60c1050e00, -0x61c1070e00, -0x7123860c00, -0x4141810600, -0x61c3830a08, -0x60c1060c00, -0xf327c58f08, -0x70a1870c00, -0x41c3830600, -0x61c3050600, -0x21a2860c00, -0x2143860c00, -0xe141850e00, -0x7081871c00, -0x6163c68e00, -0x61c3870c00, -0xe121060400, -0x70e1860c00, -0x1c3c7c78700, -0xc1c3820608, -0x7143870e00, -0x6143820e00, -0x71e3860c00, -0x61e1830c00, -0x71e3050c00, -0x60e1860c00, -0x61c383060c, -0x31e3861c00, -0x6181050c00, -0xe3870c00, -0xe383878704, -0xe143c48f08, -0x71e3871e08, -0x3081860c00, -0xe143c48e00, -0x60c1870c00, -0x61c6820e00, -0x60c1860c00, -0x30e1861c00, -0x6123860c00, -0x41c1860600, -0x41e3830608, -0xe3e3c78e00, -0xe3c3870e00, -0xe183070f00, -0x6081860c00, -0x51e3860e08, -0x6101060c00, -0x7143070e00, -0x41e7870e00, -0x60e1860c00, -0x6161830c00, -0x6163c78f00, -0x61c3070600, -0xe263870f04, -0x61e18e0c00, -0x20e3841010, -0x41e3870e08, -0x61c1870e00, -0xc163830600, -0xe247c48f00, -0x60c3870e00, -0x61c1830c00, -0xe3c1878600, -0x61e3870e00, -0x11e3860810, -0x43c3830e08, -0x61e3830c00, -0xf123cd1e00, -0xe143848e00, -0xf3e3871e08, -0xe3c3830600, -0xe1e1850e08, -0x71c1850e00, -0x60e3850c00, -0xe181030600, -0x31e3860c00, -0xe367c71900, -0x60a3861c00, -0x7081870c00, -0x61c3870e00, -0x71e3870e08, -0xc1e3070e00, -0x61c1870c00, -0x6143030600, -0x41e3860c00, -0xe1e3860e08, -0x40c1870400, -0x11e3871c10, -0x7143858e00, -0x60a1851c00, -0x71e1871c00, -0x70a1871c00, -0x71e2861c00, -0x70b1871c00, -0x60c1820c00, -0x4121860c10, -0xc3a1c70e08, -0xf163070e00, -0xc183030700, -0x61a3860c10, -0xe141870c00, -0xf123859e10, -0x60c1830e00, -0x30a38e1c00, -0x61e7cf0c00, -0x60a1c60c00, -0x60c1870c00, -0x30e38e1c00, -0x60e1870e00, -0x60e1861800, -0x6081820a00, -0x6143030700, -0x20c3850a00, -0x61e3860e08, -0xe1c1848f04, -0xf3e687060c, -0x70e3871600, -0xe3c3878e00, -0x71e1861c10, -0xe181830e00, -0x61c3070e00, -0x21c3030600, -0x60e3860c00, -0x21a5430c00, -0x6343c58e00, -0x71c3820c00, -0x60c3060e00, -0xe141050e00, -0x6141870c00, -0x6161870e08, -0x60e3860c10, -0xe1c1850600, -0x61c1870c00, -0x21c2830600, -0x60c1820400, -0x60c1070e00, -0x6141870e00, -0x49e1871c00, -0xe143870600, -0x71e3870c00, -0xe1c3830600, -0x61e3830e00, -0x60c1860c00, -0x31e3860c00, -0xc141830600, -0x7163871e00, -0xe143848e00, -0x6121860e08, -0x70e1870e00, -0x6143830e00, -0xe103c70e00, -0x71f3861c00, -0xe123840e00, -0xe1e3470c18, -0x61c2870600, -0x51a2ce0800, -0x4187830604, -0xe1e3830608, -0x63c7830e00, -0x6143870e00, -0x70e1860c10, -0x61e3cf1e00, -0x61c1850c00, -0x31a1870a10, -0x8141830600, -0x6143870e08, -0x31e3860c00, -0x60c1850e00, -0x61c1870e00, -0x6103860800, -0x71a3cd0e00, -0xe343cc9f00, -0x60e1860c18, -0x6161020c18, -0x60c1850e00, -0x61c3849200, -0x60c1020c00, -0x60c1020400, -0x71a1871e10, -0x61e1871c00, -0x6141820c00, -0xe1c1830608, -0x71e1871c00, -0xe181010600, -0xe123870e08, -0xe141c60c00, -0x6163870e00, -0x1e7c60800, -0x61c3850e00, -0x60c1850c00, -0xe143848e00, -0xf0e3870c00, -0x71a3871c00, -0xe141030600, -0x2243878e00, -0xe1c3c78f00, -0xe163870e08, -0x4143830600, -0xc143830600, -0xc043848f00, -0x4183830600, -0x61c3060e08, -0x6163820e08, -0x61e3859e10, -0x61c1870e00, -0x70e1060c00, -0x31a38f1e00, -0x61c3870600, -0xc1e3870e00, -0xe103c30f04, -0xe143428e08, -0x71a3871a00, -0x61c1830e00, -0x61e3870c00, -0x41c1850e00, -0x7143861c00, -0x31a3860c08, -0x6363850e00, -0x6141870e08, -0x41c1830400, -0x30a1861c10, -0x61c1830600, -0x6043860c00, -0xe323870f00, -0x71a3871c10, -0xe143870f00, -0x6141030e00, -0x21c1030c00, -0x60c1870c00, -0xc161870e08, -0xe143c50c00, -0xe022c60c00, -0x71a3860e08, -0xc143850e00, -0x61e3850c00, -0xc1c1830600, -0x141030c00, -0x1e3c61800, -0x70a1861c00, -0xc143848e00, -0x30e3871c00, -0x31e3860c00, -0x70e1861400, -0x21c3050e00, -0x71e3ce1c00, -0x61a1870e00, -0xe141830c08, -0x6141030600, -0x60e1871c10, -0xe147870a08, -0x70a38f0e00, -0xe1c3850f04, -0x6141030c00, -0xe143830600, -0xf1861800, -0x6143030a00, -0x71c1060e00, -0x60c3830400, -0x1c3c3878700, -0x6141070608, -0xe163c70e00, -0xe243858f04, -0xc343030600, -0x71c3870c00, -0xe3e7c78e00, -0x61c3878e00, -0x71c1860c00, -0x6163060e08, -0x60c3c89e00, -0x60e1070e00, -0xc347830600, -0xc1c183060c, -0x6081010400, -0x60e1860c00, -0x60c1870c00, -0x31e38e1800, -0x30e18e1800, -0x6183050e00, -0x41c1830600, -0x21e1c71e18, -0x21c3860e00, -0x70a3861c00, -0x20c1860c00, -0xe1c3858e00, -0xc1c1030600, -0x70a38e1c10, -0x7121860c00, -0xe133870e00, -0x6041020c00, -0x1c3c3c78704, -0x60c1850e00, -0x61c1820c00, -0x6141030c00, -0x71e1871c00, -0x6143868d00, -0xe0c1050e00, -0x40c1030600, -0x20c1020c00, -0x51c3060c00, -0xe163870600, -0x70a38e1c00, -0x2183830e00, -0x60c1060c00, -0x61c1830e00, -0x7061860c00, -0x61e1870e00, -0x61e3850e08, -0x71e3861c00, -0x60c1050e00, -0x61c3870c00, -0xf163c48f00, -0xe1c1850e00, -0x6143870e00, -0x61c3830e00, -0x31e3861c00, -0x7143060e00, -0x71c3860c00, -0x61c1830600, -0xf1e3851e00, -0x30c3060e00, -0x60c1050e00, -0xc143870600, -0x6161050c00, -0xe1860c10, -0xc1e3870e00, -0xc141830600, -0xe1c1858e08, -0x61e3870e08, -0xc1c3070600, -0x20c1070e00, -0x61c38f0e00, -0x60c1860c00, -0x60c1830e08, -0xe3e3830e18, -0x61c3050e00, -0x2083030600, -0x60e1061c10, -0x6143c60c10, -0xe1e1870e00, -0x60c1851e00, -0xe3e7c60e00, -0xe1a1c20c00, -0xc341828600, -0x61c1820e00, -0xe141830600, -0x4141010e00, -0x6163860c00, -0xc1c1870600, -0x71a3cc1800, -0xe141870c00, -0x30c1860c00, -0x61c3870e08, -0x7121871e10, -0xc163860c00, -0x71c1871c00, -0x7163861c10, -0xf1e3c70e18, -0x7141870e00, -0x7081850c00, -0x60c1860c00, -0xe367830e00, -0x71c3870e00, -0xe14383070c, -0x30e3861c00, -0x70c1060c00, -0x7141861e00, -0x60c1030c00, -0xc181850600, -0x71e1060c10, -0x30e1871c00, -0x6141820c00, -0x4143830408, -0x71c3051e00, -0xe1c1851e08, -0x63e7858f08, -0x61c1020c00, -0x31a1861400, -0x60c1870400, -0x71e3060c08, -0xe143870e00, -0x6132c61c00, -0x6103c78e00, -0xe1c3870e00, -0x6142020e00, -0xe163870e08, -0x6141830e08, -0x61c3860e00, -0xe101830e00, -0x6081020600, -0x6163860c00, -0x21c3070e00, -0x61c3070e00, -0x61e3c61c10, -0x6143870e00, -0x4143820e08, -0x60a38e0e08, -0x1f3c60c00, -0xa3c3830600, -0x70e1860c00, -0x30a1861800, -0x7123860c00, -0xf1e3c78e00, -0x6103860c00, -0xe3e1870e08, -0x4143830400, -0x41f1870e00, -0x71e1871c00, -0xe3e7cf8f08, -0xe123870e00, -0x60a3c61c10, -0x6143050a08, -0x6143860c00, -0xe143830608, -0x70a1870e00, -0x61c3870e08, -0xe1e38f1e00, -0xe3c3878f04, -0x60e3860c00, -0x61c1070e08, -0x30c1060c00, -0x6147870e00, -0xe163870a08, -0x71c3830e00, -0xe123870e18, -0x71e3870600, -0x71e38f1e10, -0xe143848e00, -0x20f1860c00, -0x61e3860c00, -0x61e3871e00, -0x30e38e1c00, -0x6143848f00, -0x31e3861c10, -0x6143850e00, -0xe103050600, -0xf1e3870e18, -0x61c3850e00, -0xe1e3878e00, -0xe123848f08, -0xe163860c00, -0x60c1060c00, -0x1e7870c00, -0x61c3830e00, -0x71e3860c00, -0xe1e3830e00, -0x61e3851e00, -0x6181870e00, -0xe363850e00, -0x70c1870c00, -0x6143870e00, -0x31c3060e00, -0x60c1850e00, -0x61c1830e00, -0x7163830e00, -0x60e1c20c10, -0x60e1860c00, -0x61c3870e00, -0x61c1870e08, -0x20e1860c18, -0xd3c383070c, -0x71e1851c00, -0xf1f3070c00, -0xe163870e08, -0x6143060c00, -0x1e3861800, -0xe1c1850e00, -0xe143870e00, -0x1e243c48f00, -0xe1e3070e08, -0x61c1050e00, -0x60e1860c00, -0x1e1c50c00, -0x61c3830e10, -0x71e1860c00, -0x61c1870e00, -0x60c1860c00, -0x71e1851e00, -0x61c1850c00, -0x71e1861c10, -0xa3c3830700, -0x40c3850c00, -0xe303c70c00, -0x61e3c68f08, -0xc3e383060c, -0xe123830e08, -0x4141070e00, -0x6323830608, -0x1e3c3830704, -0x60c3850e00, -0xe163870600, -0x60c1c70c00, -0x30e3861400, -0x61e1870c00, -0x63e7850e08, -0xe163858e08, -0x6143838e00, -0x71e3870e18, -0xe0c3850c00, -0x61c1020600, -0x71e38f0e00, -0xe3c1830608, -0xe0a38d1e10, -0x61c3870e00, -0x20a3861c00, -0x6163070e00, -0x61e1860c00, -0x73e3871e08, -0xf341830e08, -0x71f3c71e18, -0x60e1060c00, -0x39a3070c00, -0x6163860c00, -0x6163830e00, -0x71e38d0c00, -0x6183030600, -0x61c3820c00, -0x6141870e08, -0x60a3861800, -0x71e3870e00, -0x71c3861c00, -0x71c1870c00, -0x70a1c61c10, -0xc123050f00, -0x71e7c89e00, -0x70e38f1c00, -0x7123860e00, -0xc143030600, -0x61c3060c00, -0xe347848f08, -0x31e3871c10, -0x70c1870c00, -0x41e3820c08, -0xe143830e08, -0x6163870e00, -0x61a3870c00, -0xe1c3870e00, -0x7163871c00, -0x6143830e08, -0x41e3060c00, -0x2041020a00, -0x60c3851e10, -0xc143830700, -0xf363870e08, -0x6163870a00, -0x71e3050e00, -0x4143870400, -0x31a3861c10, -0xc181070700, -0x43e3870e08, -0xe141050a00, -0x20c3060c00, -0x60c1860c00, -0x71a18f1c00, -0xe1c1030600, -0xe143870e08, -0xc347c68f04, -0x60c1050e00, -0x60c1820c00, -0xe143858e00, -0x41e3870e00, -0x4143830600, -0xe143830e10, -0xe1c3850e00, -0xc1c1870e00, -0xe1c1850e00, -0x4163870c00, -0xe143848f00, -0x61e1870e00, -0xe3a7c48e08, -0xc1c1870600, -0x7123871e10, -0x61e1870e00, -0x30c1020c00, -0x60c1820c00, -0x70c1070e00, -0x21c3820608, -0x1c101878f00, -0x7163860e00, -0xc161850a00, -0x70a1c61c00, -0x61c1050e00, -0x70e1060c00, -0x61c3848e00, -0xc141030600, -0x70e1820c00, -0x11c3860c00, -0xe141870c00, -0x51e3860c00, -0x21c3860c08, -0xe1e3870e00, -0x61e3c70e00, -0xf3c3060e00, -0x6181850c00, -0xc3e3c58e00, -0x61e3870e00, -0xc1c1850a0c, -0x4143870e00, -0xe143830e00, -0x6383050700, -0xf1e3871e00, -0xe1c3c48f00, -0x71c3020c00, -0x1c5c60000, -0x1367838704, -0xe163cc8e00, -0x6081030600, -0x10e2861820, -0xe143820c00, -0x30e1861c00, -0x6161870e00, -0x7363851e10, -0xc343838700, -0xe141830e08, -0x6141020600, -0x60c1870c00, -0x6141030c00, -0x7141850e00, -0x20c1060c00, -0x61c3830e10, -0x71c3070e00, -0x70c1061c00, -0x70a3c61800, -0xe1e1860c00, -0xe103820600, -0x61e1860c00, -0x61e3870e00, -0x61c1870e08, -0x71a3870c00, -0x6163820600, -0x6081810400, -0x6081870c00, -0xc1e1870e08, -0x70e1060c00, -0x7163860c10, -0xf363830600, -0x6143830608, -0x60c1860c00, -0x60e1870c00, -0x61e38f1e00, -0xe30c0000, -0x60e1870c10, -0xf1e1871e10, -0x1e7870c00, -0xe283828600, -0x7143060e08, -0x21c3830e00, -0xe143830600, -0xe1e3c60c00, -0x61e3820c00, -0x71a1870c00, -0x71c3860c00, -0xc1e3870e08, -0x20e3061800, -0x6143820e08, -0xe163870e08, -0xf3e3870e00, -0x20e3c60c10, -0x7143c28e00, -0x60a1870c00, -0x60e1c61c00, -0xe1061800, -0x71c3870e08, -0xe163870e08, -0x71e1870e08, -0x30e1861c00, -0x31e3860c18, -0x6141830400, -0xe1c3050600, -0x60c1050e08, -0x31a3860c10, -0x1c363878700, -0x6141830400, -0x30e1860810, -0x7103860c10, -0xe243c88f00, -0xe0c1870e00, -0x71e3860e00, -0x71e3060c00, -0x4143870600, -0xe141870e00, -0x20c1860c00, -0x31c1070c00, -0x31a3860c00, -0x71e3870c00, -0x71a1870e08, -0x61e1870e08, -0x6143050a08, -0x60c1851e00, -0x61c3870e00, -0x31c3820c00, -0x6163c48e00, -0xe141870e00, -0xe343849318, -0xe1e18f1e00, -0xe1e1870e00, -0xc3c3c48f00, -0xe3e3870f00, -0x60c3891e00, -0x60c1030c00, -0x6143830e00, -0xe143870e00, -0x61e3c59c00, -0x30e18e1c10, -0x71a3861c00, -0x61e3860e08, -0xe1e1870e08, -0x41c1050a08, -0x6143850a04, -0x61c3850c00, -0x61e3c61c10, -0x61c1850e08, -0x51e1861c00, -0xe163c89e00, -0x60c1060c00, -0x8141860c00, -0xe363078d0c, -0x71e3861e10, -0xe3c3030604, -0x61e1871c10, -0x61e3060c00, -0x71a3870e00, -0x41e7838608, -0x60c3850e00, -0x61e3860c00, -0x61c3850e00, -0x4143830600, -0x61c3860e08, -0x5121870c00, -0x60e1060c08, -0x61e3c71e10, -0xe163870e00, -0x6161830e08, -0xc1c3830600, -0x71c1830c00, -0x60a3860c00, -0x61c1870e00, -0x40e1830c00, -0xe267838600, -0x71a3860e00, -0x31e1861800, -0x60c1020c18, -0x70c1871c00, -0x21e2860c00, -0x6161850a08, -0xc1e1830e08, -0x6163860c00, -0x7123820a14, -0x60c1070e00, -0x70e1860c00, -0x6141050e00, -0x61c3830e08, -0xe1c1830600, -0xc143050600, -0xe3e3878e00, -0x1e1ce1800, -0x71c1850e00, -0xe141030600, -0xf143870e00, -0x61e3ce0c00, -0x1f7870e00, -0x61c3870600, -0x6121860c00, -0x6143820600, -0xe343870e08, -0x61e3cf1e18, -0x41c1070600, -0xe1c1830e00, -0x30618e1c00, -0xf1c1850e08, -0xf1e3870e00, -0xe1861800, -0x7163870e00, -0xe3860800, -0x7163060c00, -0x71e3870c10, -0x21c3830400, -0x4143820600, -0x61e3850e08, -0x61e38d0e00, -0x6163860e08, -0x70a3ce1c00, -0x60a1861c00, -0xd1c1050e00, -0x61e3c70e00, -0x7081060c00, -0xe141870e00, -0x60c1060c00, -0x60c1060c00, -0x1a3860800, -0xe3e3830a0c, -0x40c1850e00, -0x61c1870e00, -0xe3e3870e08, -0x6141830c00, -0x31e2861c10, -0x61c3850e00, -0xe163830e08, -0xc1c0448e00, -0x61c1870e00, -0xe343830e00, -0x6141020608, -0x61e2860c10, -0x61f2cf1c18, -0xe1c3c78e00, -0x6143870e00, -0x31e3860c30, -0x6081820c00, -0xe163830600, -0x70c3871c10, -0x61e3870e00, -0xe3e3c70604, -0xe3c3878f08, -0x6163078e08, -0xe1e3cc8f0c, -0x6143830e08, -0x6143830608, -0x61c3830e00, -0x50c1860c00, -0xe1c1848f00, -0x71e3871e10, -0x40c1850e00, -0x11e3860c00, -0x1c343838700, -0xe3e3860e0c, -0x63e786060c, -0x71e1860c00, -0x6081c70c00, -0x6143870e00, -0x60c1070c10, -0x61c3030600, -0x61c1850e00, -0x6143820a08, -0xe163870e00, -0x61c1030600, -0x60c1060c00, -0xe141870e08, -0x61e3c60c08, -0x71e3861e10, -0x70c1050e10, -0x61c3870e00, -0xc103870600, -0x31a3860c00, diff --git a/samples/digitrec/digitrec/data/training_set_9.dat b/samples/digitrec/digitrec/data/training_set_9.dat deleted file mode 100644 index 344dc48d9..000000000 --- a/samples/digitrec/digitrec/data/training_set_9.dat +++ /dev/null @@ -1,1800 +0,0 @@ -0x1c7870408, -0x31e7860408, -0xe367c08e00, -0x40c3870600, -0x1e5cf0c10, -0x43c4870204, -0x63e4878204, -0x41c7870608, -0x41c3830408, -0x1e3820410, -0x61e7870408, -0x21c3830c10, -0x61e1820c10, -0x1c2870204, -0x21c7c20800, -0xc142850e00, -0x61c1810408, -0xc3820408, -0x146810200, -0x61e7830c10, -0x40c1830410, -0x41c3830408, -0x4143820408, -0x61c3820408, -0x1c2810408, -0x21e3830c10, -0x43c4830100, -0xe3820830, -0xe1c1848e00, -0x1e3830408, -0x61c6870204, -0x61c3830608, -0x21e3870408, -0x41c7c50608, -0xc3820410, -0x1c5c68080, -0x41c1820408, -0x4144850200, -0x21e7cd0408, -0x41c4870404, -0x41c3c30608, -0x21c3870408, -0x41c3870200, -0x1c3810604, -0x61e3870400, -0x20c3820810, -0x41c3830408, -0x41c2870204, -0x1c3850204, -0x1e7860820, -0x41c3820408, -0x83820408, -0x4143810408, -0x21c3820408, -0x2183810400, -0x1c2870600, -0x6163820408, -0x61c2870608, -0x61c1820400, -0x1c3030408, -0x1c1820810, -0x6125c70c10, -0x4102870200, -0x61e7870604, -0x21c7870608, -0x71e7c70418, -0x6143820408, -0x31e5870408, -0x41c3850200, -0xc146870200, -0x2183820c10, -0x4143020400, -0x4143810200, -0x4143820408, -0x6143830604, -0x41e5870208, -0x21c3830410, -0x6147870408, -0x20c3830400, -0xc3820400, -0x6147870408, -0x20e3830410, -0x41c7850200, -0x41c3830408, -0x41c3810204, -0x1c3820408, -0x1e3820408, -0xc3830204, -0x2143820810, -0xc3a7c18304, -0x4144850200, -0x83c3810204, -0x1c2870408, -0x43c7c70604, -0x1c3810408, -0x183810204, -0xe14287020c, -0xc3e6c78300, -0x6166870408, -0x21e7870410, -0x4145850204, -0xc1c6870208, -0x6143820400, -0x1c2860400, -0x1e6c70204, -0x20c2860800, -0xc3820c10, -0x4143810604, -0x6183830400, -0x43c6870204, -0x61e7c70600, -0x61c3870400, -0x1e3830410, -0x1c6868102, -0x41c3850604, -0x21c3820408, -0xc244c70200, -0x30a3820800, -0x41c3830608, -0x41c3820408, -0x21a3820800, -0x20c1820c10, -0x1c5870204, -0x1c7850608, -0x60c3820400, -0x41c3830208, -0x20c3820408, -0x4147858608, -0x61c3870408, -0x4143850408, -0x1c7850204, -0xc142870200, -0xc141810e00, -0x3c4ce8102, -0x61e7cf8600, -0x21e4cf0408, -0x61c3820408, -0x61c3820400, -0x43c7870604, -0xc143850200, -0x61c3830600, -0x41c7c70400, -0x4142870204, -0x143810200, -0x61c3830608, -0x1c3830408, -0x1c3810204, -0x40c1820408, -0x1c244c70300, -0x43644f8100, -0x1c2870204, -0x1e3830400, -0x1c3830000, -0x61e1831c00, -0x61c3810208, -0x1c3810400, -0x20c3820400, -0x61e78f0600, -0x41c7850204, -0x21a3820410, -0x6183870200, -0xc1c3830204, -0x1c58f0204, -0x61c3820c00, -0x4143060200, -0x61c3830400, -0x41c3830400, -0xc3810200, -0x40c1820400, -0x21c3830408, -0x4143810600, -0xc2044fc080, -0x41c3830410, -0x21c7870408, -0x63e58f0408, -0x41e7830608, -0x20c3810400, -0x1c3810204, -0x61e7c50204, -0xc3820408, -0x41c3810608, -0x1c3830204, -0x4142860400, -0x41c7870408, -0xc245850200, -0x61c3830408, -0x4102810204, -0x61c3810608, -0x4143830600, -0x4143810600, -0x21c3850408, -0x41e3830600, -0x21c2870200, -0x21e2870410, -0x1c7c70204, -0x61e3870408, -0x41c3870208, -0x20c1820820, -0x41c3830604, -0x183830604, -0x41e3830c10, -0x1c3850204, -0xc343810204, -0x6166870600, -0x21c3c10410, -0x61c3870408, -0xc3860408, -0x1c3810408, -0x21e3830c10, -0x4142810600, -0x20c3860810, -0x1c3830408, -0x71e3c70c10, -0x187810204, -0x61c3810608, -0x83c7830204, -0x43e7c10608, -0x4143870400, -0x1c7850204, -0x183810204, -0x21e7c70204, -0x4143810e00, -0xc344870200, -0x1c7870608, -0x61c7870408, -0x43c7c38300, -0x41c3870204, -0x41c7810204, -0x41c3810204, -0x41c3830408, -0x1e3830408, -0x61c3820400, -0x61e3830410, -0x6106c70400, -0xc2860400, -0x6144870200, -0x143830408, -0x6167870400, -0x61c2870400, -0x61c3810608, -0x2184c70410, -0x3c7810204, -0x61c38f0200, -0xc207c08100, -0x6142870600, -0x21c3820410, -0x31e7860c10, -0x1c3810200, -0x1c3830400, -0xc102830400, -0x1c3820410, -0x41e3820400, -0x4142830400, -0x6143820408, -0x41c7870304, -0x4143870200, -0x21c3830410, -0x7182c70600, -0x1c3810204, -0x1c3830408, -0xc1c2830204, -0xe1e3c39e00, -0x1c3830408, -0x4043060200, -0x6081820830, -0x21c3820c10, -0x60c1820c00, -0xc3820c10, -0x1c58f0204, -0x63c7870604, -0x21a3830c10, -0xc143810600, -0x43c5ce8102, -0xc1c3830204, -0x41c3810204, -0x61e3830c10, -0x4143820410, -0x1c3810204, -0x61e7830c00, -0x60c3860400, -0xe3e7c38e10, -0xc343870204, -0x21c3830608, -0x61c3810c00, -0x4183830408, -0x4142870200, -0xc1c3830608, -0x1c3830408, -0x4187870600, -0x4142870204, -0x61c3830c10, -0x1c3830604, -0x21e3820c00, -0x4102870200, -0x6162870400, -0x41c2830600, -0x4142870200, -0x1c3830408, -0x6143830c00, -0x3c7810204, -0x61c7828410, -0x41c3820408, -0x2183870408, -0x6143c20408, -0x1c3820400, -0x1e0c30c20, -0x61c3830c00, -0x41c3810604, -0x20c3860408, -0x61e68f0608, -0xc101810400, -0x61c3830408, -0x6163810408, -0x20c2870204, -0x21e2870408, -0x61a6c70408, -0x41c1830c10, -0x41c3830400, -0x61e2cf0c00, -0xc1830408, -0x4180810200, -0x21c58d0408, -0x41c3810608, -0xe344870302, -0x1c3810408, -0x1c3850204, -0x41c3870600, -0x20c1820400, -0x61c3830400, -0x41c3810608, -0x21e3830410, -0x1c3830408, -0x41c3820c10, -0x1c3870608, -0x21e3870400, -0x4142830400, -0x4147850204, -0x40c3020400, -0x60e2870400, -0x41c3830408, -0x10e3860800, -0x4143820400, -0x1c3830604, -0x61c38f0e00, -0xc1c7870204, -0x1e7c30608, -0x4103810200, -0x31e3870408, -0x41c7850204, -0x61e7830400, -0x61c3830400, -0x2083810408, -0x41c3810608, -0x41c5850408, -0x20c3860400, -0x1e3c30c10, -0x61c3830600, -0x21c3870204, -0x1c3830204, -0x41c3830408, -0x1e7870604, -0x21e2870400, -0x41c3830200, -0x61c3c10608, -0x61c3820c00, -0x4163820c10, -0x4102870408, -0x83e4c70204, -0x6183850200, -0x4143850200, -0x61a3830410, -0x21a7830408, -0x3a7c78100, -0x4143810400, -0x1c3820410, -0x83c3830604, -0x6147830408, -0x6143870408, -0x83c7c30608, -0x41c3830218, -0x1e3820c10, -0x6163830408, -0x4142870204, -0x1c2820408, -0x4143820410, -0x41c3830408, -0x41c3820408, -0x1c3810200, -0x41c3810204, -0x4142870200, -0x183870300, -0x20c1820810, -0x41c3820400, -0x4147810204, -0x71c2870600, -0x21c3810400, -0xc1e4cf8200, -0x41c3820408, -0x4141820400, -0x41c3830600, -0xc1c7870200, -0x1c3810204, -0x41c2860c10, -0xc3820400, -0x41c3830608, -0x21e2870208, -0xc3e3c30c10, -0x61c1830400, -0x4182870200, -0x61c3830400, -0x1e5870204, -0x61c1810c00, -0x1c7810408, -0x4142870200, -0x4143830200, -0x41c3830408, -0x1c3820410, -0x61c1820810, -0x41c3810408, -0x384c78100, -0x20c3820408, -0x6143860408, -0x61c3870408, -0x1c6870408, -0xc3c7878100, -0x41c3830608, -0x21e3820810, -0x61e3820c10, -0x3183830c10, -0x1e3830408, -0x41e7810604, -0x61c3820408, -0x2143820400, -0x1c3830600, -0x20c3850410, -0x61c3870408, -0x1c6870408, -0xe264870200, -0x41c3820408, -0x1e7830408, -0x21a7830c10, -0xc143810204, -0x41c3820408, -0x1c3870408, -0x40c0820400, -0x4144cc8100, -0x4142820400, -0x61c3830608, -0x61e7c70608, -0x41c3850208, -0x61c3830c10, -0x1c3820408, -0x6143830208, -0x145850204, -0x4143870408, -0x43c781020c, -0xc246810300, -0x41c2870200, -0x2183820400, -0x61c3820400, -0x41c7830608, -0x6142870200, -0x61e7cf0600, -0x61c3830608, -0x61c3830600, -0x61c3020812, -0x41c3830408, -0x41c3830600, -0xc3830408, -0x61c2870600, -0xc3c6c78102, -0x61c3020a04, -0x41c3830608, -0x21c7850204, -0x2142830200, -0x6143830410, -0x41c3830408, -0x183810204, -0xc1820408, -0x4183810400, -0x41c3830408, -0x1c3820400, -0x7141020810, -0x21e3820400, -0x20c3820400, -0x20c3820400, -0x21c3820410, -0x41c3830408, -0x3c7818302, -0x6103868100, -0x41c7830204, -0x41c3820408, -0x41c3810600, -0xc0c5850204, -0x41c3830608, -0x4142870204, -0x61048f0204, -0x61c3870408, -0x1c3820408, -0x21c3830408, -0x4183810608, -0x6143830410, -0x21c2870200, -0xc3830408, -0x21c3870600, -0x10f38e0820, -0x6143820608, -0x21c3820c10, -0x4143870600, -0x6183830408, -0x10c3850410, -0x4143830608, -0x31a3c21820, -0x1c3820410, -0x4143810204, -0x21c3860410, -0x1c3870600, -0x1e3830408, -0x20c3870408, -0x4143810600, -0x41c3810204, -0x147850200, -0x61c4850608, -0x21e3850608, -0x7125860c00, -0x6163820c10, -0x41c3810200, -0x2183830408, -0x43c7810204, -0x20e3820c10, -0x61a3830408, -0x4143830600, -0x4146870608, -0x20c3870400, -0x1c3830408, -0x1c2830204, -0x1c3850204, -0x41c3870204, -0xc3820410, -0x4143810400, -0x4142870204, -0x6140870400, -0x41c3870204, -0x41c383060c, -0x4143830408, -0xc1c1810c00, -0x20c3830410, -0x1c3830408, -0x41c3c30e08, -0xc3860408, -0x4142830200, -0x1c1810408, -0x41c7870604, -0x41c3810600, -0x21e2870410, -0x20c2860400, -0x41c6870204, -0x30c2870400, -0x6143820510, -0x61c3870600, -0x1e3820c10, -0x41e3c30400, -0xc347cd8302, -0x41c2870204, -0x20c3820820, -0xc1c3810604, -0x61c3830608, -0x21c2830408, -0x41c3810408, -0xc3820410, -0x21e6870204, -0x61e3820410, -0x41c6870204, -0x4181820400, -0x1e4870408, -0x41c3810408, -0x60c3830408, -0x41c3020910, -0x1c7cd8204, -0xe143810e08, -0x61c1820c10, -0x21e7c30410, -0x1c3860408, -0x41c7870204, -0xc3c7858300, -0x1c3820408, -0x41c6810604, -0x61e7850608, -0x41c2870204, -0x1c7810204, -0x20c1830408, -0x41c3830408, -0x21c3830608, -0x41c1830408, -0x21c3830400, -0xc3850408, -0x20c3870204, -0x41e687040c, -0x43c3830408, -0x1c2820408, -0x4143810204, -0x61c7870204, -0x4143830408, -0x183810204, -0x6142870200, -0x61c382850a, -0xe2870010, -0x41c3830400, -0x61e6c70408, -0x41c1830410, -0x6143830400, -0x41c3870408, -0x1c3810204, -0x4142810200, -0x1c7810204, -0x61c7850204, -0x43c7830200, -0x1c3870400, -0x41c7810204, -0xc1c3810204, -0x21c3820400, -0x21a3860c10, -0x6143810e00, -0x1c2830204, -0x4142870400, -0x4183810608, -0x41c3870204, -0xc1c2870204, -0x41c44f8000, -0x1c3838408, -0x21e3c30408, -0x41c3810204, -0x61c2870608, -0x1e2870400, -0x7127860810, -0x4142830200, -0x61e28f0c00, -0x61c3820d12, -0x41c5850200, -0x31c38f0400, -0x1c3820408, -0xc083870100, -0x61e7cf0608, -0x21c3870408, -0x1c3820408, -0xc143830204, -0xc3e7c30608, -0x61e2870200, -0x41c3830408, -0x4143830408, -0x1e3830410, -0x1c3830604, -0x142820400, -0x6183860408, -0x61c3830608, -0x41c2870208, -0x1c3820c10, -0x1c3830408, -0x1e7c70408, -0x1c3860400, -0xc183810204, -0x1c2810000, -0x61c3820c10, -0x4142870204, -0x1c3850008, -0x21c3850400, -0x1c1820408, -0xc1c3830204, -0x41c3830204, -0x41c3810204, -0x31e3820c10, -0x2183850408, -0x61278f0408, -0x71e7870608, -0x4103830400, -0x4143810200, -0x1c2870408, -0x21e3820800, -0x61e3820c10, -0xc3830408, -0xc343810680, -0x21e2870400, -0x4142870204, -0x61e1820810, -0x41c3830600, -0x21e7860c10, -0xc3870408, -0x4143820400, -0xc3830204, -0x6143830408, -0x61c7870408, -0x41c2830204, -0x4143830600, -0x41c3810204, -0x41c2870600, -0x41c3810204, -0x4165c78302, -0x41c3810408, -0x63c3810204, -0x61a3c20c10, -0x6167870204, -0x1e3820c10, -0x1c3810204, -0x31a6cf0c10, -0x387818100, -0x41c1830408, -0x41c2870408, -0x61e2870400, -0x1c3830408, -0x20c1820410, -0x61c3830408, -0x1c3870204, -0x61448f0204, -0x1c3820408, -0x61c1820c10, -0x21a3820c10, -0x61c3830c10, -0x4142870204, -0x2182870400, -0xc1c3870600, -0x4143820400, -0x1c3820408, -0x71e7cf8200, -0x1224cf0200, -0xc147810204, -0x4143810200, -0x41c7850204, -0x4143830604, -0x61a3810c10, -0x43c7c58302, -0x1c48c0408, -0x2183820408, -0x3c7810604, -0x1c3830204, -0x61e3820c10, -0xc143830600, -0x41c2830410, -0xc342c78300, -0x6103820c10, -0x41c3830608, -0xc1c3810608, -0x4143870600, -0x41e3820c30, -0x1c3820408, -0x41c3820400, -0x41c3810600, -0x41c2870604, -0x60c3870400, -0x41c3810204, -0x61c3820400, -0xc34383060c, -0x4100820400, -0x41e7810408, -0x1c3830608, -0x41c7870608, -0x61c3830408, -0x4183870600, -0x61e7c70c10, -0x20c3870408, -0x61c3810600, -0x4143830604, -0x71a7c70c00, -0x2183850408, -0xc3e7c98204, -0x41c3830600, -0x21e3860400, -0x20c3830c00, -0x61c3830400, -0x21c3860400, -0x21e3860400, -0x61c3820c08, -0x41c3820408, -0x6143820c10, -0x247010200, -0x2063020820, -0x21c3830408, -0x4143810200, -0x41a7810204, -0x61c3830408, -0x61e7870204, -0x31c3860c10, -0x1c3820408, -0x21e3c30c00, -0x61c3820c08, -0x4143810204, -0x61c3820c10, -0x1c3870408, -0x43c4cf8f00, -0x61c3830408, -0x61e7c30408, -0x41c3830408, -0x6143820c00, -0x61e5cf0400, -0x1c3830600, -0x21c3820c10, -0x61c6810608, -0x61a3820800, -0x41c3830604, -0x6143830400, -0x1e2870204, -0x21e7c70c10, -0x21e5c60c10, -0x41c7870204, -0x21e3870408, -0x60c1020c10, -0x1c3830408, -0x20c1820810, -0x6143820400, -0x3c3878302, -0x4182830204, -0x41c1810e00, -0x8181030204, -0x41c3810204, -0x40c3820400, -0x61e3830c00, -0x40c3820400, -0x61e3820800, -0x41c2c70204, -0x61e7c70408, -0x21c3860400, -0x41c3830608, -0x21c3860408, -0xc2c7848104, -0x21e7c10608, -0x21c7870410, -0x21c3870e00, -0x21e7c70408, -0x61c7c78c10, -0x41c3810204, -0x41c3830600, -0x1c3870204, -0x1c3870604, -0x41c7810608, -0x6147870410, -0x41c3830408, -0x41c3820400, -0x20c3830400, -0x4143020408, -0xc1c3870102, -0x21c3830410, -0x20c1820c10, -0x41c3830600, -0x41c3830400, -0x61c3830408, -0x31c78d0400, -0x43c7cf8302, -0x41e5dd9200, -0x41c2870604, -0x1c3810608, -0x21e7820c10, -0x21c3820c00, -0x21a48f0408, -0x41c3870208, -0x20c1820410, -0x41c3830c10, -0x21c2870400, -0xc3c7810704, -0x4143820408, -0x20c3820800, -0x41e6c78204, -0x61e3830410, -0x21e3830408, -0x4143810204, -0x163830400, -0x61c3830408, -0x2187870410, -0x4147810600, -0x20c2870408, -0x20c1020400, -0x41c78d0204, -0x1c3820400, -0x41c3810200, -0x61e7850608, -0x61a3820c10, -0x4142870400, -0x1c3830204, -0x6144870600, -0x1c3810408, -0x1c3810600, -0xc3810008, -0x1c3850200, -0x61c3870400, -0x6127c70400, -0x6145850204, -0x3e4870200, -0xe3c7cf8304, -0x1c3830204, -0x61a2870410, -0x6143810408, -0x6143810408, -0x61c2870408, -0xc3870408, -0x41c0820d10, -0x4147810204, -0x61e3820c10, -0xc3830604, -0x1c7830410, -0x1c3810400, -0x61c3830608, -0x4142870400, -0x41c1820408, -0x4082810200, -0x71c28f0600, -0x1e3820410, -0x43c7838602, -0x1c3870408, -0x1e3820810, -0x41c2870608, -0x3c7850206, -0x43e6c78102, -0x61c3870408, -0x61c3820408, -0x41c3830408, -0x41e3820c10, -0x6162870200, -0x4143830408, -0x41c3830600, -0xc344870204, -0x4141850204, -0x6143860400, -0x61a3820810, -0x30c3870400, -0x1c1820400, -0x20c3870608, -0x1c3810408, -0x1c3850204, -0x61e3c30e00, -0x61e6cf0208, -0x21e4c70200, -0x21e6870408, -0x4163830408, -0x21c3c30408, -0x21c3830c10, -0x61e3c38e00, -0x4142030200, -0x1c3810200, -0x1c7850608, -0x61c7850204, -0x61e3830c10, -0x61e7c70408, -0xc3c7c50204, -0x21c3810600, -0x61c3830410, -0xc3c7c10600, -0x43c7c08208, -0x1c7850204, -0x61c4c70408, -0x1c3810408, -0x1c3810408, -0x41e7810408, -0xc344850204, -0x21e3820810, -0x20e3820800, -0x61c7830600, -0x61e4c70608, -0x61e3830410, -0x2083820408, -0xc3c7870608, -0x61c3820c10, -0x41c3850200, -0x6143c70400, -0x6183020408, -0x83c381020c, -0x61c6870204, -0x41c3870204, -0x21e7c70410, -0x20e3820c10, -0x6143830408, -0x6142870208, -0x31e3c70c10, -0x44870000, -0x43c48f8302, -0x61648f0204, -0x61c3870608, -0xc1448f0200, -0x61e3830408, -0x6143830400, -0x1c3820c10, -0x6143820400, -0x41c3830600, -0x61e3c70418, -0x6101830400, -0x43c7c78304, -0x143810200, -0x4143850200, -0x4081820400, -0x6143810c00, -0x41c3830608, -0x1c3830410, -0x1c3810204, -0x1c7810204, -0x4142830400, -0x10e3820810, -0x43c781820c, -0x41c3830600, -0x4142830200, -0xc1c2870204, -0x6143810e08, -0x21c3820c10, -0x1c3830600, -0x6142830200, -0x1e3820810, -0x60c1820c10, -0x41c3830204, -0x4143870204, -0x41c3850200, -0x1c2870008, -0xc3c4ce8102, -0x4147810200, -0x4143850204, -0x41e7810608, -0x2003820000, -0x6146870204, -0xc142830200, -0x20c3820410, -0x21e3c20810, -0x41c3820c00, -0x4143830408, -0x20c3820400, -0x21e6c60c10, -0xe163c18e00, -0x6144870608, -0x6103870200, -0x21e4870208, -0x6145850408, -0x41c7c30204, -0x40c1820408, -0x4142830204, -0x4143830408, -0x1e4870204, -0x31c3820410, -0x21c1820400, -0x21c3810608, -0x1c6870408, -0x61c7810c00, -0x20c3820810, -0xc183830200, -0x31e6c30c10, -0x6125870400, -0x41c2870204, -0x31a3870400, -0x21c3830408, -0x1c3820408, -0x1e5cf0400, -0x21e3c30418, -0x41c3c70608, -0x63c7870204, -0x6167870400, -0x61e7c70c10, -0x43c7810302, -0x4142830600, -0xe3c4870200, -0x1c3830600, -0x63e4cf8100, -0x63e7c78302, -0x61c3830408, -0x61c48f0600, -0x1e3820418, -0x387c08100, -0x1e3830408, -0x2183850208, -0x31e3870c10, -0x1e786040c, -0x21e3830c00, -0xc146870204, -0x1c3830408, -0xc3850400, -0x4142870204, -0x1c3810204, -0x1c3820408, -0x31e6cf0810, -0xe3c7830600, -0x4141820400, -0x40c1020408, -0x3083820410, -0x31e7860c10, -0x41c3870408, -0x61e5870400, -0x61c3820408, -0x61c3870408, -0xc183810302, -0x21e6c70408, -0x41c3870200, -0x4143870400, -0x41e3830400, -0x1c3850200, -0x43efcf8300, -0x41c3870604, -0x21c3820c10, -0x6143820c10, -0x6142870408, -0x21c3810608, -0x21c7810204, -0xe2860410, -0xc2c7830202, -0x20c1820c10, -0x20c1820400, -0x61c4870204, -0x41c4870204, -0x41c3830408, -0x1c1850200, -0x20c3820c10, -0x1c3820408, -0xc3c5850204, -0x20c1820400, -0x1c3810204, -0x20c2830400, -0x61c3830408, -0x4183830204, -0x61e2870408, -0x41c3830204, -0x21e7870204, -0x63c4870204, -0x41c3870604, -0x61c2870200, -0x1e3830410, -0x1c7850202, -0x41c3830408, -0x71a58f0410, -0x43c4cf8100, -0x41c3810200, -0x43c4cf8204, -0x6163810408, -0x1c3830410, -0xc143810204, -0x1c2870204, -0x1c3810200, -0xe1c3830418, -0x4101820400, -0x61c3830404, -0x41c3870204, -0x81c3830604, -0x1c3830408, -0x6142870400, -0x20c3820c10, -0x21a3860810, -0x61c7870408, -0x61c7870604, -0xc347830300, -0x21c3830410, -0x41c3830408, -0x21e5870408, -0x4143810600, -0xc247c38102, -0x61c2870400, -0x6143870600, -0x61c78f0204, -0x4187810204, -0xc347c58300, -0x4143830400, -0x41c3830400, -0x61c7870204, -0x1c3870408, -0x41c3830408, -0x20a2860400, -0xe367c58e00, -0xc142830300, -0x4142870200, -0x21c3870410, -0xc1c7870204, -0x41c381060c, -0xc3c3810600, -0x20c1820c10, -0x21c3830400, -0x2042820400, -0x21a4860410, -0x61c2820400, -0x4181810200, -0x41c1820408, -0x6143870408, -0x1c3830408, -0x1c7810204, -0x4147810204, -0x41c5870408, -0x83c6c78102, -0x1c3870204, -0x31e3830c10, -0x11e7810208, -0x61c7870408, -0x6144870204, -0x61c7830200, -0x11e7c30c00, -0x41c2870408, -0x41c3820400, -0x1c7810200, -0x1c3830410, -0x61c3820408, -0xe3e4c70608, -0x20e1820410, -0x41c3810408, -0x41c3810600, -0x61c3820408, -0x1e5cf0410, -0xc346870304, -0x20c3820408, -0x61e3830c10, -0x1c3830408, -0x4181820408, -0x1c2870204, -0x41c3830400, -0x63e7cf0606, -0x61e3830c08, -0x21c3830604, -0x41c3820408, -0x43c7c18300, -0x6104870400, -0x41c3830604, -0xe1e3c78300, -0x4143810000, -0x81c7870204, -0x31e3860830, -0xc3c7810302, -0x20c3850400, -0x6143870400, -0x1c3820410, -0xc347810204, -0x1c3810408, -0x21e7870600, -0x61a3c30410, -0x81c3810204, -0x1c2870408, -0xe3c60830, -0x20e2870400, -0x61c3810608, -0xc143810e00, -0x8284850302, -0x41c3830408, -0x1c3810408, -0x41c3830204, -0x41c2c70200, -0x4143810204, -0x41c6870200, -0xe1c3870204, -0x21e3418610, -0x4143830204, -0x1e7c70204, -0x61c2860408, -0x21c3820c10, -0x4146870200, -0x2100870200, -0x20e3820810, -0x1e7c70608, -0x41c3830600, -0x61c3830c10, -0x4143830400, -0x41c3870608, -0x1e3820408, -0x41c2870604, -0x41c3830600, -0x41c7870204, -0x143860408, -0x41c3830608, -0x1c3830408, -0x20c3820810, -0x1c2860400, -0x61e4c70204, -0x41c3860408, -0x31e3870c10, -0xc347830600, -0x21e3830408, -0x20c3820400, -0x1c4c70204, -0x61e3860c18, -0x1c7870408, -0x61a2870408, -0xc1c3830204, -0x41c3830408, -0x43c7810204, -0xc363810e08, -0x41c3830608, -0x1c3820408, -0x6043020410, -0x1c3830204, -0x4143870600, -0x20c3820400, -0x4144870204, -0xe321c10e00, -0x1c3810204, -0x41c2870204, -0xc3c20c10, -0x6143830600, -0x61c3830408, -0x30e3820800, -0x4143810200, -0x4143810408, -0x20c1820410, -0x1c385060c, -0x61c3820410, -0x43c7810302, -0x21e3820810, -0xe344c70204, -0x2142870400, -0xc244870200, -0x41c3830608, -0x20c3860c10, -0x6141830c10, -0x6367c50204, -0x41e4870204, -0x61c3830408, -0x6142830608, -0x6142870200, -0x61c3820400, -0x20c3830408, -0x61c3870608, -0x3c4870204, -0x41c3810200, -0x6143820410, -0x31e78f0410, -0x61c3830410, -0x21c3870600, -0x41c3830408, -0xe3820400, -0x1c5850204, -0x41c3810200, -0x61c7860408, -0x4143830600, -0x6143830408, -0x41c3870604, -0x4347838300, -0x6143870408, -0x6142870200, -0x3e7c48f00, -0x4143820400, -0x61c1020408, -0x1c3810204, -0xc143810608, -0x1c3830408, -0x8387c78300, -0x21c48f0200, -0x61e3830c10, -0x61e3c30c18, -0x20c3820410, -0xe344878302, -0x41e3830408, -0x1c7850204, -0x6187c10400, -0x61e2870408, -0xc103810608, -0xc3e5870204, -0x41e3c30608, -0x6143820410, -0x41c3810408, -0x40c3860400, -0x61e7870c10, -0x61c3850408, -0x6142830200, -0x6143810608, -0x71a7870608, -0x41c5850200, -0x2083820400, -0x21c3830c10, -0x41c1830408, -0xc2860400, -0x4143810408, -0x4100870200, -0x1c3810204, -0x41c3830408, -0x61c3830410, -0x61e7830408, -0xc142870204, -0x31e3860c10, -0x21e5850410, -0x6142830200, -0x3c383060c, -0x8383870300, -0x41c7810408, -0x41c7870204, -0x61c3830408, -0x41a2870400, -0xc1c3830604, -0x183810200, -0x41c1020408, -0x41c7870200, -0x41c3830604, -0x20c3820410, -0x61e58f0600, -0x41c3830408, -0x61c7870600, -0x1c3830408, -0x21c7c70408, -0x1c3830418, -0x41c3830600, -0x4143870204, -0x21c3810408, -0x41e7830410, -0x3c6830102, -0x61c3820408, -0x1c3820c00, -0x61c3820400, -0x71e3c30820, -0x30e2860800, -0x1e7870608, -0xc1020400, -0x61c3830410, -0x21c3870400, -0x41c3870200, -0x41c3860408, -0x6143870400, -0x1c3820400, -0x61e7c30408, -0x61c6870600, -0x143820408, -0xe367c91e10, -0x41c2810610, -0x6145870202, -0x1e3830408, -0x1c3870008, -0x21e7cf0604, -0x31e4870408, -0x1e4c70608, -0x21e3870408, -0x1c3830400, -0x4143c20408, -0x61c3830600, -0x61c3830608, -0x21e7c70418, -0x4143830204, -0x41c3830608, -0x1c3820408, -0x43c7878302, -0x6183c10408, -0x41c3870608, -0xc183810204, -0x41c7c7060c, -0x61c3870204, -0xc3e7cf8100, -0x61a7c60c10, -0x6183820810, -0x4143830608, -0x4143810600, -0x4145850100, -0x21e3830408, -0x41c3820408, -0x41c6870200, -0xc182870200, -0x41c3870604, -0x40c1820408, -0x21c3830410, -0x21c7810608, -0xc143810200, -0x41c7870202, -0xa146c70200, -0x20c3820408, -0x20c2860400, -0x1c3820408, -0xc1c3810204, -0x61c3870408, -0x3c7c10410, -0x4183810100, -0x21c3820408, -0xe3820810, -0x4143810600, -0xc1c3810204, -0x41c3830400, -0xe3860408, -0x41c3870408, -0x60c1810c00, -0x61e2870400, -0x8345cf8100, -0x21e3830c10, -0x4144870204, -0x63e7c70608, -0x6122870200, -0x20c3820408, -0x61c3830408, -0x61e3cd1e10, -0x6103870200, -0x41c3870400, -0x1c3830408, -0xc3c3810608, -0x41c383060c, -0x4143860400, -0x1c3810200, -0x1c3820400, -0x20c3820800, -0x21c6870408, -0x43c7810204, -0xc346c78304, -0x40c3820400, -0x4101820408, -0x61c3810200, -0x183810200, -0xc3820810, -0x83c7818304, -0x4143820400, -0x41c6c70604, -0x41c3820408, -0x61c1810c10, -0x61e3870400, -0x20e3860408, -0x43c3c30608, -0x41c3830608, -0x41c3870204, -0x4143810408, -0x1c3830408, -0xc3c7870302, -0x20c3860410, -0xc244cf8102, -0x1c3820408, -0x2083870408, -0x1c3810600, -0x43c7878100, -0x1c7810204, -0x41c3810608, -0x1c3810204, -0x4143830600, -0x6367850608, -0x61e3830c00, -0x1c3850208, -0x1e3860400, -0x4101830600, -0x61a2870608, -0x41e3820408, -0x4142870204, -0x1c3830604, -0x61e3820400, -0x41c48f0204, -0x4143830408, -0x41c3830408, -0x1c6cd0000, -0x61c7870600, -0x6163820400, -0x4143870204, -0x21e3830c10, -0x41c7810608, -0xc143818e00, -0x1c7870204, -0x21c3820408, -0x1c7850208, -0x4143850200, -0x21e7cf0608, -0x4146870204, -0x61c3020810, -0x61c2870604, -0x61c3c70408, -0x1c7850204, -0x8143810200, -0x1c3830408, -0x1c3870608, -0x63e4cf0608, -0x41c3870408, -0x1c3810408, -0x41c3c10410, -0x6142860400, -0x1c3830604, -0x41c5cf0204, -0x21e7c70608, -0x61c3810e08, -0x61a1811c10, -0x20c1820c00, -0x41e3830400, -0xc3870408, -0x4147850204, -0x41a3c70608, -0x2187c70408, -0x2043820c00, -0x41c3830408, -0x6103830408, -0x6143820810, -0x61e7870408, -0x41c7870408, -0x21c3820400, -0x60e1820800, -0x40c2870400, -0x3e4ce8102, -0x4142870204, -0x1c5870200, -0x21e7860c10, -0x21e3820c10, -0x1e7870408, -0x41c3820408, -0x1c7830408, -0x21e3830c10, -0x20c3820400, -0x61e7810608, -0x1c3830608, -0x4143820400, -0x6324cf0b00, -0x41e3870608, -0x1e3860410, -0x3c7c08102, -0x61c3830408, -0x1c3830604, -0x61e7830408, -0x21c1820408, -0x1c3820408, -0x1c3820408, -0x1c3820400, -0x41c3830400, -0x61c3870608, -0x21e2830408, -0x21c3820400, -0x61e3820800, -0x21c3870408, -0x61a2c70400, -0x1c3820410, -0x61e3820c10, -0x4142870408, -0x41c2870408, -0x6323c78608, -0x43c7810608, -0x20c3820408, -0x1c3870604, -0xe2648f0200, -0x6143870400, -0x4103810200, -0x61c3830408, -0x20e3820c10, -0x83c7850306, -0x41e3830408, -0x6183820400, -0x61e2c70400, -0x60c3820c00, -0x73e6cf8300, -0x41c3870300, -0x61e3830410, -0xc146870300, -0xc3c3810204, -0x6143820408, -0x183810204, -0x41c3820408, -0x61c3820c10, -0x4147810204, -0x1c3870204, -0x41c3830410, -0x4143810204, -0xc204c78100, -0x1c3810408, -0x1c4870408, -0x21c3820400, -0x41c3c30410, -0x21c3830408, -0x61e3830c10, -0x4142870200, -0x83c7830204, -0x21c3820c10, -0x4142850000, -0x61c3830608, -0x61c2870200, -0xc1c3810204, -0x21e3830410, -0x20c3820410, -0xc3820810, -0x41c4c78500, -0xc142830200, -0x71c3830c10, -0x4143830600, -0x41c3820408, -0xc3810600, -0x41e3830408, -0x31a3870410, -0x21e7c70c10, -0x21c3820400, -0x21c3830410, -0x61e7c70608, -0x6143820c00, -0x21c3820400, -0x2083820410, -0x61c7870400, -0x61e3830410, -0x61c3820408, -0x71a78f0410, -0xc3c5870204, -0x61e38d0408, -0x6143820c00, -0xc3c6878300, -0x1c3820400, -0x41c3820408, -0x61e7870408, -0x1c3810204, -0x6143830400, -0x41e6cf0000, -0x4143820400, -0x41c3830600, -0x6123830410, -0x20c3000408, -0x20a3820800, -0x4146870604, -0x41c3830204, -0x6146870400, -0x61c3830408, -0x21e3830400, -0xc3830408, -0x41c3830604, -0xc143810200, -0x23e7c70400, -0x1c3870204, -0x4183810600, -0x41c4870204, -0x1c3820408, -0x61c3020408, -0x1c3810204, -0x61c6870200, -0x61e6870604, -0x60c1820400, -0x61c3820810, -0xc307c10200, -0x21e78f0408, -0x63c7c78302, -0x61c3830600, -0x61c7870608, -0x1c78d0204, -0x61c1820810, -0x6143830408, -0x20c3860400, -0x61c3870600, -0x41c3810600, -0x1c3830408, -0x1c3830408, -0x21c3820408, -0x41c6870204, -0x1c3830204, -0x41c3830408, -0x41c3810204, -0x21c3820c10, -0x21e7870408, -0x21c3830410, -0x61c6870204, -0x1c3860408, -0x61c3830c10, -0x41c78f0204, -0x21e3830c00, -0x1c3830c10, -0x61a3830410, -0x61c3820c10, -0x61c3828700, -0x41c3810604, -0x21c3830400, -0x41c3830608, -0x4143820400, -0x4143870600, -0xc1c3870204, -0x1c3870408, -0x6143850200, -0x1c3850204, -0x6143820c10, -0x21c3830408, -0x61c1020810, -0x21c78f0204, -0x1c3810204, -0x21c7810608, -0x4142870200, -0x1c3850204, -0x41c7830200, -0x21e5cf0400, -0x142830200, -0x41c7850200, -0x11e78e0810, -0x61c3870408, -0x41c3830608, -0x41448f0204, -0x61c2870204, -0x61c7870608, -0x21c3830400, -0x1c3830408, -0x61c3830400, -0x183810200, -0xc3820400, -0xc143830e00, -0x10e3820820, -0x1c4870204, -0xe3c7c18e08, -0x71a6cb1e00, -0x41c7870204, -0x41c3810600, -0x61c3870608, -0x81c3870204, -0x41c3830408, -0x4141c20c10, -0xc143830204, -0x6183830408, -0x61c3860c10, -0xe3820c10, -0x1c3810200, -0x61a3820820, -0x21e3810408, -0x21e3870408, -0x20c3820400, -0x1c3820400, -0x6144cf0302, -0x41c3830408, -0x4143870200, -0x20c3820800, -0x41c3830408, -0xc3820408, -0x21e7850408, -0x4144870200, -0x61e3c70410, -0xc306478100, -0x21c3830408, -0x61e7870408, -0x21c2870204, -0x1c3830408, -0x1c2870102, -0xe344470204, -0x61c3810408, -0x1c3830408, -0x6166c70408, -0x6103810200, -0x1e1820820, diff --git a/samples/digitrec/digitrec/hcl_code_dig.py b/samples/digitrec/digitrec/hcl_code_dig.py deleted file mode 100644 index 97f579a79..000000000 --- a/samples/digitrec/digitrec/hcl_code_dig.py +++ /dev/null @@ -1,150 +0,0 @@ -import heterocl as hcl -import time -import numpy as np -import math -from digitrec_data import read_digitrec_data - -# Declare some constants and data types. For images, we need unsigned 49-bit -# integers, while for knn matrices, we need unsigned 6-bit integers. -N = 7 * 7 -max_bit = int(math.ceil(math.log(N, 2))) -data_size = (10, 1800) - -# HeteroCL provides users with a set of bit-accurate data types, which include -# unsigned/signed arbitrary-bit integers and unsigned/signed fixed-points. -# Here we use `UInt(N)` for an N-bit unsigned integer. -dtype_image = hcl.UInt(N) -dtype_knnmat = hcl.UInt(max_bit) - -# We can initialize a HeteroCL environment with default data type by using -# `hcl.init(dtype)`. Here we set the default data type of each variable to -# the unsigned integer with the maximum bitwidth. -hcl.init(dtype_image) - - -def top(target=None): - - # Algorithm definition (§1) - def knn(test_image, train_images): - - # Imperative programming and bit operations (§2) - def popcount(num): - out = hcl.local(0, "out") - with hcl.for_(0, train_images.type.bits) as i: - # Bit selection operation - out[0] += num[i] - return out[0] - - # This function update the candidates, i.e., `knn_mat`. Here we mutate - # through the shape of tensor `dist`. For each `dist` value, if it is - # smaller than the maximum candidate, we replace it. - def update_knn(dist, knn_mat, i, j): - max_id = hcl.local(0, "max_id") - with hcl.for_(0, 3) as k: - with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id[0]]): - max_id[0] = k - with hcl.if_(dist[i][j] < knn_mat[i][max_id[0]]): - knn_mat[i][max_id[0]] = dist[i][j] - - # Main algorithm (§3) - # Fist step: XOR (§3.1) - diff = hcl.compute(train_images.shape, - lambda x, y: train_images[x][y] ^ test_image, - "diff") - - # Second step: popcount (§3.2) - dist = hcl.compute(diff.shape, - lambda x, y: popcount(diff[x][y]), - "dist") - - - # Third step: initialize the candidates (§3.3) - knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") - - - # Fourth step: update the candidates (§3.4) - hcl.mutate(dist.shape, - lambda x, y: update_knn(dist, knn_mat, x, y), - "knn_update") - - # Final step: return the candidates (§3.5) - return knn_mat - - # Inputs/Outputs definition (§4) - # Scalars (§4.1) - test_image = hcl.placeholder((), "test_image") - # Tensors (§4.2) - train_images = hcl.placeholder(data_size, "train_images") - - # Data type customization (§5.1) - scheme = hcl.create_scheme([test_image, train_images], knn) - scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) - - # Compute customization (§5.2) - s = hcl.create_schedule_from_scheme(scheme) - - diff = knn.diff - dist = knn.dist - knn_update = knn.knn_update - - # Merge loop nests - s[diff].compute_at(s[dist], dist.axis[1]) - s[dist].compute_at(s[knn_update], knn_update.axis[1]) - - # Reorder loop to expose more parallelism - s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) - - # Parallel outer loop and pipeline inner loop - s[knn_update].parallel(knn_update.axis[1]) - s[knn_update].pipeline(knn_update.axis[0]) - - # At the end, we build the whole offloaded function. - return hcl.build(s, target=target) - -offload = top('sdaccel') -with open('sdaccel_code.cl', 'w') as f: - f.write(offload) - -def knn_vote(knn_mat): - knn_mat.sort(axis = 1) - knn_score = np.zeros(10) - - for i in range(0, 3): - min_id = np.argmin(knn_mat, axis = 0)[i] - knn_score[min_id] += 1 - - return np.argmax(knn_score) - -# Data preparation -train_images, _, test_images, test_labels = read_digitrec_data() - -# Classification and testing -correct = 0.0 - -# We have 180 test images -total_time = 0 -for i in range(0, 180): - - # Prepare input data to offload function - # To load the tensors into the offloaded function, we must first cast it to - # the correct data type. - hcl_train_images = hcl.asarray(train_images, dtype_image) - hcl_knn_mat = hcl.asarray(np.zeros((10, 3)), dtype_knnmat) - - # Execute the offload function and collect the candidates - start = time.time() - offload(test_images[i], hcl_train_images, hcl_knn_mat) - total_time = total_time + (time.time() - start) - - # Convert back to a numpy array - knn_mat = hcl_knn_mat.asnumpy() - - # Feed the candidates to the voting algorithm and compare the labels - if knn_vote(knn_mat) == test_labels[i]: - correct += 1 - -print("Average kernel time (s): {:.2f}".format(total_time/180)) -print("Accuracy (%): {:.2f}".format(100*correct/180)) - -# for testing -assert (correct >= 150.0) diff --git a/samples/digitrec/digitrec_main.py b/samples/digitrec/digitrec_main.py index 373200fc1..8e63ffdad 100644 --- a/samples/digitrec/digitrec_main.py +++ b/samples/digitrec/digitrec_main.py @@ -65,13 +65,9 @@ # Declare some constants and data types. For images, we need unsigned 49-bit # integers, while for knn matrices, we need unsigned 6-bit integers. -# N = 7 * 7 -N = 2 * 2 +N = 7 * 7 max_bit = int(math.ceil(math.log(N, 2))) -# data_size = (10, 1800) -data_size = (10, 20) - - +data_size = (10, 1800) # HeteroCL provides users with a set of bit-accurate data types, which include # unsigned/signed arbitrary-bit integers and unsigned/signed fixed-points. @@ -100,8 +96,8 @@ def popcount(num): out = hcl.local(0, "out") with hcl.for_(0, train_images.type.bits) as i: # Bit selection operation - out[0] += num[i] - return out[0] + out.v += num[i] + return out.v # This function update the candidates, i.e., `knn_mat`. Here we mutate # through the shape of tensor `dist`. For each `dist` value, if it is @@ -109,10 +105,10 @@ def popcount(num): def update_knn(dist, knn_mat, i, j): max_id = hcl.local(0, "max_id") with hcl.for_(0, 3) as k: - with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id[0]]): - max_id[0] = k - with hcl.if_(dist[i][j] < knn_mat[i][max_id[0]]): - knn_mat[i][max_id[0]] = dist[i][j] + with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): + max_id.v = k + with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): + knn_mat[i][max_id.v] = dist[i][j] # Main algorithm (§3) # Fist step: XOR (§3.1) @@ -163,8 +159,7 @@ def update_knn(dist, knn_mat, i, j): s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) # Parallel outer loop and pipeline inner loop - # s[knn_update].parallel(knn_update.axis[1]) - s[knn_update].unroll(knn_update.axis[1]) + s[knn_update].parallel(knn_update.axis[1]) s[knn_update].pipeline(knn_update.axis[0]) # At the end, we build the whole offloaded function. @@ -339,12 +334,6 @@ def update_knn(dist, knn_mat, i, j): # This is the main function. Namely, the complete algorithm we want to run. We # get the offloaded function with the provided data types offload = top() -code = top('aocl') -with open('knn_aocl.cl', 'w') as f: - f.write(code) - - -assert 1==2 ############################################################################### # Voting algorithm diff --git a/samples/digitrec/host.cpp b/samples/digitrec/host.cpp deleted file mode 100644 index f8db0d699..000000000 --- a/samples/digitrec/host.cpp +++ /dev/null @@ -1,152 +0,0 @@ -#include -#include - - -// standard C/C++ headers -#include -#include -#include -#include -#include -#include - - -// opencl harness headers -#include "CLWorld.h" -#include "CLKernel.h" -#include "CLMemObj.h" -// harness namespace -using namespace rosetta; - - -//other headers -#include "utils.h" -#include "typedefs.h" -int main(int argc, char ** argv) { - uint64_t arg_0 = (uint64_t)207249344512; - uint64_t arg_top_0[1] = { arg_0 }; - - - uint64_t* arg_1 = (uint64_t*)shmat(90701824, nullptr, 0); - uint64_t arg_top_1[10 * 1800]; - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 1800; i1++) { - arg_top_1[i1 + i0*1800] = (uint64_t)(arg_1[i1 + i0*1800]); - } - } - - - uint8_t* arg_2 = (uint8_t*)shmat(90734593, nullptr, 0); - uint8_t arg_top_2[10]; - for (size_t i0 = 0; i0 < 10; i0++) { - arg_top_2[i0] = (uint8_t)(arg_2[i0]); - } - - - printf("Digit Recognition Application\n"); - - // compute bofore kernel function - - // parse command line arguments for opencl version - std::string kernelFile(""); - parse_sdaccel_command_line_args(argc, argv, kernelFile); - - - // create OpenCL world - CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR); - - - // add the bitstream file - digit_rec_world.addProgram(kernelFile); - - - // create kernels - CLKernel App(digit_rec_world.getContext(), digit_rec_world.getProgram(), "App", digit_rec_world.getDevice()); - - - // create mem objects - CLMemObj source_0((void*)arg_top_0, sizeof(uint64_t), 1, CL_MEM_READ_WRITE); - CLMemObj source_1((void*)arg_top_1, sizeof(uint64_t), 10 * 1800, CL_MEM_READ_WRITE); - CLMemObj source_2((void*)arg_top_2, sizeof(uint8_t), 10 , CL_MEM_READ_WRITE); - - - // add them to the world - digit_rec_world.addMemObj(source_0); - digit_rec_world.addMemObj(source_1); - digit_rec_world.addMemObj(source_2); - - - // set work size - int global_size[3] = {1, 1, 1}; - int local_size[3] = {1, 1, 1}; - App.set_global(global_size); - App.set_local(local_size); - - - // add them to the world - digit_rec_world.addKernel(App); - - - // set kernel arguments - digit_rec_world.setMemKernelArg(0, 0, 0); - digit_rec_world.setMemKernelArg(0, 1, 1); - digit_rec_world.setMemKernelArg(0, 2, 2); - - // run - digit_rec_world.runKernels(); - - // read the data back - digit_rec_world.readMemObj(2); - - // compute after kernel function - for (int x = 0; x < 10; ++x) { - int id0; - id0 = 0; - int id1; - id1 = 0; - int id2; - id2 = 0; - int count; - count = 0; - for (int i = 0; i < 10; ++i) { - if (knn_mat[(i * 3)] < knn_mat[(id0 * 3)]) { - id0 = i; - } - } - for (int i1 = 0; i1 < 10; ++i1) { - if (knn_mat[(i1 * 3)] < knn_mat[(id1 * 3)]) { - id1 = i1; - } - } - for (int i2 = 0; i2 < 10; ++i2) { - if (knn_mat[(i2 * 3)] < knn_mat[(id2 * 3)]) { - id2 = i2; - } - } - if (x == id0) { - count = (count + 1); - } else { - if (x == id1) { - count = (count + 1); - } else { - if (x == id2) { - count = (count + 1); - } - } - } - arg_top_2[x] = count; - } - - for (size_t i0 = 0; i0 < 10; i0++) { - for (size_t i1 = 0; i1 < 1800; i1++) { - arg_1[i1 + i0*1800] = (uint64_t)(arg_top_1[i1 + i0*1800]); - } - } - shmdt(arg_1); - for (size_t i0 = 0; i0 < 10; i0++) { - arg_2[i0] = (uint8_t)(arg_top_2[i0]); - } - shmdt(arg_2); - - - } diff --git a/samples/digitrec/interface.cpp b/samples/digitrec/interface.cpp deleted file mode 100644 index 788ad355f..000000000 --- a/samples/digitrec/interface.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include "kernel.cpp" - - -extern "C" -{ - void App( ap_uint<64>* source_wrapper_0, ap_uint<64>* source_wrapper_1, ap_uint<6>* source_wrapper_2, ap_uint<6>* source_wrapper_3 ) { - #pragma HLS INTERFACE m_axi port=source_wrapper_0 offset=slave bundle=gmem - #pragma HLS INTERFACE axis port=source_wrapper_1 offset=slave bundle=gmem - #pragma HLS INTERFACE m_axi port=source_wrapper_2 offset=slave bundle=gmem - #pragma HLS INTERFACE axis port=source_wrapper_3 offset=slave bundle=gmem - #pragma HLS INTERFACE s_axilite port=source_wrapper_0 bundle=control - #pragma HLS INTERFACE axis port=source_wrapper_1 bundle=control - #pragma HLS INTERFACE s_axilite port=source_wrapper_2 bundle=control - #pragma HLS INTERFACE axis port=source_wrapper_3 bundle=control - #pragma HLS INTERFACE s_axilite port=return bundle=control - - ap_uint<64> source_wrapper_temp_0[1]; - ap_uint<64> source_wrapper_temp_1[10][1800]; - ap_int<32> source_wrapper_temp_2[10]; - ap_uint<6> source_wrapper_temp_3[10][3]; - for (int i0 = 0; i0 < 10; i0++) { - for (int i1 = 0; i1 < 1800; i1++) { - source_wrapper_temp_1[i0][i1] = source_wrapper_1[i1+ i0*1800]; - } - } - for (int i0 = 0; i0 < 10; i0++) { - source_wrapper_temp_2[i0] = source_wrapper_0[i0]; - } - for (int i0 = 0; i0 < 10; i0++) { - for (int i1 = 0; i1 < 3; i1++) { - source_wrapper_temp_3[i0][i1] = source_wrapper_1[i1+ i0*3]; - } - } - - top( source_wrapper_temp_0, source_wrapper_temp_1, source_wrapper_temp_2, source_wrapper_temp_3); - - for (int i0 = 0; i0 < 10; i0++) { - for (int i1 = 0; i1 < 3; i1++) { - source_wrapper_3[i1 + i0*3 ] = source_wrapper_temp_3[i0][i1]; - } - } - for (int i0 = 0; i0 < 10; i0++) { - source_wrapper_2[i0 ] = source_wrapper_temp_2[i0]; - } -} -} diff --git a/samples/digitrec/kernel.cpp b/samples/digitrec/kernel.cpp deleted file mode 100644 index 68893cb7f..000000000 --- a/samples/digitrec/kernel.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include - -void top(ap_uint<64>* arg_top_0, ap_uint<64>* train_images_stream_recv, ap_int<32>* arg_top_2, ap_uint<6>* knn_mat){ - ap_uint<6> knn_mat[10][3]; - for (ap_int<32> x = 0; x < 10; ++x) { - for (ap_int<32> y = 0; y < 3; ++y) { - knn_mat[x][y] = (ap_uint<6>)50; - } - } - ap_int<32> knn_update; - for (ap_int<32> y1 = 0; y1 < 1800; ++y1) { - for (ap_int<32> x1 = 0; x1 < 10; ++x1) { - #pragma HLS pipeline - ap_uint<6> dist; - ap_int<32> diff; - diff = ((ap_int<32>)(train_images_stream_recv[x1][y1] ^ arg_top_0)); - ap_uint<6> out; - out = (ap_uint<6>)0; - for (ap_int<32> i = 0; i < 64; ++i) { - out = ((ap_uint<6>)(((ap_int<34>)out) + ((ap_int<34>)diff[i]))); - } - dist = out; - ap_int<32> max_id; - max_id = 0; - for (ap_int<32> i1 = 0; i1 < 3; ++i1) { - if (knn_mat[((max_id / 3) + x1)][(max_id % 3)] < knn_mat[x1][i1]) { - max_id = i1; - } - } - if (dist < knn_mat[((max_id / 3) + x1)][(max_id % 3)]) { - knn_mat[((max_id / 3) + x1)][(max_id % 3)] = dist; - } - } - } - ap_int<32> sort; - for (ap_int<32> x2 = 0; x2 < 10; ++x2) { - for (ap_int<32> y2 = 0; y2 < 3; ++y2) { - ap_int<32> val; - val = 0; - if (y2 == 1) { - if (knn_mat[x2][2] < knn_mat[x2][1]) { - val = ((ap_int<32>)knn_mat[x2][1]); - knn_mat[x2][1] = knn_mat[x2][2]; - knn_mat[x2][2] = ((ap_uint<6>)val); - } - } else { - if (knn_mat[x2][1] < knn_mat[x2][0]) { - val = ((ap_int<32>)knn_mat[x2][0]); - knn_mat[x2][0] = knn_mat[x2][1]; - knn_mat[x2][1] = ((ap_uint<6>)val); - } - } - } - } - ap_int<32> new[10][3]; - for (ap_int<32> x3 = 0; x3 < 10; ++x3) { - for (ap_int<32> y3 = 0; y3 < 3; ++y3) { - new[x3][y3] = ((ap_int<32>)knn_mat[x3][y3]); - } - } - } - diff --git a/samples/digitrec/knn_aocl.cl b/samples/digitrec/knn_aocl.cl deleted file mode 100644 index 760fa87ae..000000000 --- a/samples/digitrec/knn_aocl.cl +++ /dev/null @@ -1,35 +0,0 @@ - -#include "ihc_apint.h" -__kernel void default_function(__global uint* restrict test_image, __global uint* restrict train_images, __global uint* restrict knn_mat) { - for (int x = 0; x < 10; ++x) { - for (int y = 0; y < 3; ++y) { - knn_mat[(y + (x * 3))] = (uint2_t)2; - } - } - uint4_t knn_update; - #pragma unroll - for (int y1 = 0; y1 < 20; ++y1) { - for (int x1 = 0; x1 < 10; ++x1) { - uint2_t dist; - uint4_t diff; - diff = ((uint4_t)(train_images[(y1 + (x1 * 20))]) ^ (uint4_t)(test_image)); - uint2_t out; - out = (uint2_t)0; - for (int i = 0; i < 4; ++i) { - out = ((uint2_t)(((uint5_t)out) + ((uint5_t)((diff & (1L << i)) >> i)))); - } - dist = out; - uint4_t max_id; - max_id = (uint4_t)0; - for (int i1 = 0; i1 < 3; ++i1) { - if (knn_mat[(((int)max_id) + (x1 * 3))] < knn_mat[(i1 + (x1 * 3))]) { - max_id = ((uint4_t)i1); - } - } - if (dist < knn_mat[(((int)max_id) + (x1 * 3))]) { - knn_mat[(((int)max_id) + (x1 * 3))] = dist; - } - } - } -} - diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 62c12602b..4fc484f47 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -406,6 +406,7 @@ void GenWrapperCode(TVMArgs& args, stream << ";\n"; } + // vars init for values for (size_t i = 0; i < arg_stream_types.size(); i++) { auto shape = std::get<2>(arg_stream_types[i]); for (size_t j = 0; j < shape.size(); j++) { @@ -421,7 +422,7 @@ void GenWrapperCode(TVMArgs& args, stream << "[i" << k << "]"; } stream << " = "; - stream << "source_wrapper_" << j; + stream << "source_wrapper_" << i; stream << "[i" << shape.size() - 1; int mul = 1; for (size_t k = shape.size() - 1; k > 0; k--) { @@ -436,6 +437,11 @@ void GenWrapperCode(TVMArgs& args, PrintIndent(stream, indent); stream << "}\n"; } + if (shape.size() == 0) { + PrintIndent(stream, indent); + stream << "source_wrapper_temp_" << i; + stream << "[0] = source_wrapper_" << i << "[0];\n"; + } } // print top func @@ -732,7 +738,7 @@ void GenHostCode(TVMArgs& args, stream << "// set kernel arguments\n"; // PrintIndent(stream, indent); // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; - for (size_t i = 0;i < arg_stream_types.size();i++) { + for (size_t i = 0; i < arg_stream_types.size(); i++) { PrintIndent(stream, indent); stream << "digit_rec_world.setMemKernelArg(0, "<< i << ", " << i; stream << ");\n"; @@ -745,8 +751,10 @@ void GenHostCode(TVMArgs& args, stream << "digit_rec_world.runKernels();\n\n"; PrintIndent(stream, indent); stream << "// read the data back\n"; - PrintIndent(stream, indent); - stream << "digit_rec_world.readMemObj(2);\n"; + for (size_t i = args.size() - 1; i < arg_stream_types.size(); i++) { + PrintIndent(stream, indent); + stream << "digit_rec_world.readMemObj(" << i << ");\n"; + } // generate host (post-kernel) stream << "\n"; From ed31f99c9a579d5d7058b0b07ecf796cdc22c22a Mon Sep 17 00:00:00 2001 From: Hecmay Date: Fri, 8 Nov 2019 15:37:02 -0500 Subject: [PATCH 088/103] [update] stream example --- python/heterocl/devices.py | 9 +++++---- python/heterocl/tvm/build_module.py | 7 +++++-- samples/stream/stream.py | 22 +++++++++++----------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 127dc9e81..dd560e083 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -80,7 +80,7 @@ def __getattr__(self, key): def set_lang(self, lang): assert lang in \ ["opencl", "hlsc", "c", "opengl", "merlinc", "cuda", "metal"], \ - "unsupported lang sepc" + lang + "unsupported lang sepc " + lang self.impls["lang"] = lang return self @@ -153,9 +153,10 @@ def __getattr__(cls, key): devs = dev_table[key] host = devs[0].set_lang("opencl") xcel = devs[1].set_lang("hlsc") - elif key == "zynq": - host = CPU("arm") - xcel = FPGA("xilinx") + elif key == "zc706": + devs = dev_table[key] + host = devs[0].set_lang("hlsc") + xcel = devs[1].set_lang("opencl") elif key == "ppac": host = CPU("riscv") xcel = PIM("ppac") diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 7d07e13b5..09627f047 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -427,6 +427,9 @@ def build_fpga_kernel(sch, args, target, name="default_function"): if "sdaccel" in str(target.tool): host = target.host.lang.replace("opencl", "aocl") xcel = target.xcel.lang.replace("hlsc", "vhls") + if "vivado_hls" in str(target.tool): + host = target.host.lang.replace("hlsc", "vhls") + xcel = target.xcel.lang.replace("opencl", "aocl") builder = getattr(codegen, "build_{0}".format(host)) host_code = builder(fdevice) findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") @@ -446,11 +449,11 @@ def tvm_callback_syn_postproc(code): def get_util_path(path): return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/design/" - if target.tool.mode == "source": return xcel_code + host_code - elif "emu" in str(target.tool.mode): + if "emu" in str(target.tool.mode): builder = getattr(codegen, "build_{0}".format("sim")) f = builder(fdevice, ["s"], ["wwq", "swsw"]) return f + else: return xcel_code + host_code except AttributeError: raise AttributeError("Cannot find the target builder %s" % target) diff --git a/samples/stream/stream.py b/samples/stream/stream.py index 12e760d30..5c2396a57 100644 --- a/samples/stream/stream.py +++ b/samples/stream/stream.py @@ -1,7 +1,7 @@ import heterocl as hcl -# run on cpu by default -hcl.init(place=hcl.CPU("riscv")) +hcl.init() +target = hcl.platform.zc706 initiation_interval = 4 a = hcl.placeholder((10, 20), name="a") @@ -34,23 +34,23 @@ def ret_mul(c, d, e): # s[op1].pipeline(op1.axis[0], initiation_interval) # stream into modules / device -a0, b0 = s.stream_to([a, b], hcl.FPGA("intel")) -d0 = s.stream_to(d, hcl.FPGA('intel')) +a0, b0 = s.to([a, b], target.xcel) +d0 = s.to(d, target.xcel) #s.partition(b0, dim=2, factor=2) -s.stream_to([a0, b0], s[add_mul.ret_add]) -s.stream_to(d0, s[add_mul.ret_mul]) +s.to([a0, b0], s[add_mul.ret_add]) +s.to(d0, s[add_mul.ret_mul]) # within device move producer to consumer -s.stream_to(c, s[add_mul.ret_mul], - s[add_mul.ret_add], depth=10) +s.to(c, s[add_mul.ret_mul], + s[add_mul.ret_add], depth=10) # return tensor for inter-device move -e0 = s.stream_to(e, hcl.CPU('riscv')) +# e0 = s.stream_to(e, hcl.CPU('riscv')) # print(add_mul.ret_mul._buf, c._buf) print(hcl.lower(s)) -# code = hcl.build(s, target="aocl") -# print(code) +code = hcl.build(s, target) +print(code) # # with open("example.cl", "w") as f: # f.write(code) From 73daf282a4fa2293d3ebab8be5ab93d96d9334e7 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Sun, 10 Nov 2019 16:00:25 -0500 Subject: [PATCH 089/103] [add] rocc-ppac sim --- .gitmodules | 3 + python/heterocl/devices.py | 20 +- python/heterocl/tools.py | 7 +- python/heterocl/tvm/build_module.py | 61 +++--- rocc-ppac | 1 + tvm/src/codegen/build_common.cc | 177 ++++++++++-------- tvm/src/codegen/codegen_c.h | 9 +- tvm/src/schedule/schedule_dataflow_rewrite.cc | 32 +++- 8 files changed, 198 insertions(+), 112 deletions(-) create mode 160000 rocc-ppac diff --git a/.gitmodules b/.gitmodules index 292bfaba8..693559a42 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "soda"] path = soda url = https://github.com/UCLA-VAST/soda-compiler.git +[submodule "rocc-ppac"] + path = rocc-ppac + url = https://github.com/cornell-zhang/rocc-ppac.git diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index dd560e083..511d19588 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -48,9 +48,11 @@ def __repr__(self): str(self.options) tool_table = { - "aws_f1" : tool("sdaccel", *option_table["sdaccel"]), - "zc706" : tool("vivado_hls", *option_table["vivado_hls"]), - "stratix10_sx": tool("aocl", *option_table["aocl"]) + "aws_f1" : tool("sdaccel", *option_table["sdaccel"]), + "zc706" : tool("vivado_hls", *option_table["vivado_hls"]), + "ppac" : tool("rocket", *option_table["rocket"]), + "stratix10_sx": tool("aocl", *option_table["aocl"]), + "llvm" : tool("llvm", *option_table["llvm"]) } class Device(object): @@ -127,11 +129,12 @@ def __init__(self, vendor, model, **kwargs): raise DeviceError(model + " not supported yet") super(PIM, self).__init__("PIM", vendor, model, **kwargs) def __repr__(self): - return "PIM (" + str(self.model) + ")" + return "pim-" + str(self.model) dev_table = { "aws_f1" : [CPU("intel", "e5"), FPGA("xilinx", "xcvu19p")], "zc706" : [CPU("arm", "a9"), FPGA("xilinx", "xc7z045")], + "rocc-ppac" : [CPU("riscv", "riscv"), PIM("ppac", "ppac")], "stratix10_sx": [CPU("arm", "a53"), FPGA("intel", "stratix10_gx")] } @@ -157,9 +160,14 @@ def __getattr__(cls, key): devs = dev_table[key] host = devs[0].set_lang("hlsc") xcel = devs[1].set_lang("opencl") + elif key == "llvm": + devs = None + host = None + xcel = None elif key == "ppac": - host = CPU("riscv") - xcel = PIM("ppac") + devs = dev_table["rocc-ppac"] + host = devs[0].set_lang("c") + xcel = None else: # unsupported device raise DeviceError("not supported") tool = tool_table[key] diff --git a/python/heterocl/tools.py b/python/heterocl/tools.py index 38d5e79d3..4df5f96f8 100644 --- a/python/heterocl/tools.py +++ b/python/heterocl/tools.py @@ -6,13 +6,14 @@ "intel" : ["cpu_e5", "cpu_i7", "fpga_stratix10_gx", "fpga_stratix10_dx", "fpga_stratix10_mx"], "arm" : ["cpu_a7", "cpu_a9", "cpu_a53"], - "riscv" : ["riscv"] + "riscv" : ["cpu_riscv"] } option_table = { - "sdaccel" : ("sw_emu", {"version" : "2017.1", - "clock" : 1}), + "llvm" : ("llvm_sim", {"version" : "6.0.0"}), + "sdaccel" : ("sw_emu", {"version" : "2017.1", "clock" : 1}), "vivado_hls" : ("csim", {"version" : "2017.1"}), + "rocket" : ("source", {"RISCV" : ""}), # refer to xilinx2016_1/ug904-vivado-implementation.pdf "vivado" : ("pnr", diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 09627f047..9a1c1389e 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -24,6 +24,18 @@ from . import make from ..devices import platform +# test build sim +@register_func +def tvm_callback_syn_postproc(code): + return "test" + +@register_func +def get_util_path(platform): + if platform == "aws_f1": + return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/design/" + elif platform == "rocket": + return "/work/zhang-x1/users/sx233/heterocl/rocc-ppac/tests" + class DumpIR(object): """ Dump IR for each pass. @@ -424,35 +436,40 @@ def build_fpga_kernel(sch, args, target, name="default_function"): fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] try: # generate and split code - if "sdaccel" in str(target.tool): + host, xcel = None, None + if "sdaccel" == target.tool.name: host = target.host.lang.replace("opencl", "aocl") xcel = target.xcel.lang.replace("hlsc", "vhls") - if "vivado_hls" in str(target.tool): + if "vivado_hls" == target.tool.name: host = target.host.lang.replace("hlsc", "vhls") xcel = target.xcel.lang.replace("opencl", "aocl") - builder = getattr(codegen, "build_{0}".format(host)) - host_code = builder(fdevice) - findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") - host_code = host_code[findex + 6 : rindex] - - builder = getattr(codegen, "build_{0}".format(xcel)) - xcel_code = builder(fdevice) - findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") - xcel_code = xcel_code[findex + 8 : rindex] - - # test build sim - @register_func - def tvm_callback_syn_postproc(code): - return "test" - - @register_func - def get_util_path(path): - return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/design/" + # generate inline assembly c and invoke + if "rocket" == target.tool.name: + host = target.host.lang.replace("c", "rv64_ppac") + + host_code, xcel_code = "", "" + if host: # src mode generate host code + builder = getattr(codegen, "build_{0}".format(host)) + host_code = builder(fdevice) + findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") + host_code = host_code[findex + 6 : rindex] + + if xcel: # src mode generate xcel code + builder = getattr(codegen, "build_{0}".format(xcel)) + xcel_code = builder(fdevice) + findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") + xcel_code = xcel_code[findex + 8 : rindex] + + # return simulation built function if "emu" in str(target.tool.mode): builder = getattr(codegen, "build_{0}".format("sim")) - f = builder(fdevice, ["s"], ["wwq", "swsw"]) - return f + keys = [k for k in target.tool.options.keys()] + vals = [v for v in target.tool.options.values()] + keys.insert(0, "name") + vals.insert(0, target.tool.name) + return builder(fdevice, keys, vals) + # return source code only else: return xcel_code + host_code except AttributeError: diff --git a/rocc-ppac b/rocc-ppac new file mode 160000 index 000000000..c0d93c48c --- /dev/null +++ b/rocc-ppac @@ -0,0 +1 @@ +Subproject commit c0d93c48c789c877c9c9e3eae90ed8ef9c28abc6 diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 4fc484f47..7838d9513 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -20,10 +20,18 @@ #include "merlinc/codeanalys_merlinc.h" #include "hlsc/codegen_vhls.h" #include "opencl/codegen_aocl.h" +#include "ppac/codegen_rv64_ppac.h" namespace TVM { namespace runtime { +std::string getpath(void) { + char buff[256]; + getcwd(buff, 256); + std::string cwd(buff); + return cwd; +} + void PrintIndent(std::ofstream& stream, int indent) { for (int i = 0; i < indent; i++) stream << ' '; @@ -506,13 +514,11 @@ void GenHostCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, LoweredFunc func, - std::string pre_kernel, - std::string post_kernel, + std::string host_code, std::vector>>& arg_stream_types) { int indent = 0; std::ofstream stream; stream.open("__tmp__/host.cpp"); - // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/host/digit_recognition.cpp"); stream << "#include \n"; stream << "#include \n"; stream << "\n"; @@ -620,6 +626,9 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "// compute bofore kernel function"; // stream being axis interface host, channel for kernel + size_t pos = host_code.find("top("); + std::string pre_kernel = host_code.substr(0, pos -1); + std::string post_kernel = host_code.substr(host_code.find('\n', pos) + 1); stream << pre_kernel; stream << "\n"; @@ -785,15 +794,13 @@ void GenHostCode(TVMArgs& args, class SimModuleNode final : public ModuleNode { public: SimModuleNode(LoweredFunc func, - std::string pre_host_code, - std::string post_host_code, + std::string host_code, std::vector>> arg_stream_types, - std::string dev_code) + std::string dev_code, std::string platform, std::unordered_map options) : func_(func), - pre_host_(pre_host_code), - post_host_(post_host_code), + host_(host_code), arg_stream_types_(arg_stream_types), - dev_(dev_code) { + dev_(dev_code), platform_(platform), options_(options) { } const char* type_key() const { @@ -805,6 +812,7 @@ class SimModuleNode final : public ModuleNode { const std::string& name, const std::shared_ptr& sptr_to_self) final { return PackedFunc([this](TVMArgs args, TVMRetValue* rv){ + if (args.size() != (int)func_->args.size()) LOG(FATAL) << "The function should take in " << func_->args.size() << " inputs but get " << args.size(); @@ -817,22 +825,39 @@ class SimModuleNode final : public ModuleNode { LOG(CLEAN) << "Generating harness files ..."; system("rm -rf __tmp__; mkdir __tmp__"); - // generate interface wrapper for kernel args - GenWrapperCode(args, shmids, arg_types, arg_stream_types_, func_); - // host code invoking extern c wrapped hlsc kernel - GenHostCode(args, shmids, arg_types, func_, - pre_host_, post_host_, arg_stream_types_); - GenKernelCode(dev_); std::string path; if (const auto* f = Registry::Get("get_util_path")) - path = (*f)("aws_f1").operator std::string(); - system(("cp " + path + "/* __tmp__/").c_str()); - - LOG(CLEAN) << "Running SW simulation ..."; - system("cd __tmp__; source ./run_sw.sh"); - LOG(CLEAN) << "Finished C simulation"; + path = (*f)(platform_).operator std::string(); + system(("cp -r " + path + "/* __tmp__/").c_str()); + + if (platform_ == "sdaccel") { + GenWrapperCode(args, shmids, arg_types, arg_stream_types_, func_); + GenHostCode(args, shmids, arg_types, func_, + host_, arg_stream_types_); + GenKernelCode(dev_); + + LOG(CLEAN) << "Running SW simulation ..."; + system("cd __tmp__; source ./run_sw.sh"); + // emulation for ppac flow + } else if (platform_ == "rocket") { + // generate rocket emulator + std::string ppac = path + "/../"; + std::string cmd = "cd " + ppac + ";"; + cmd += std::string("cp src/Ppac.v rocket/src/main/resources/vsrc;") + + std::string("cp src/PpacRoCC.scala rocket/src/main/scala/tile;") + + std::string("cd rocket && git apply ../src/rocc-ppac.patch;") + + std::string("cd emulator && make CONFIG=RoccExampleConfig"); + system(cmd.c_str()); + // generate host and run proxy kernel test + std::string compile = "cd __tmp__;"; + compile += std::string("autoconf; mkdir build; cd build;") + + std::string("../configure --with-riscvtools=") + + options_["RISCV"] + std::string(";make"); + system(compile.c_str()); + } + + // clean & extract resource information FreeSharedMem(args, shmids, arg_sizes); - // extract resource information if (const auto* f = Registry::Get("tvm_callback_syn_postproc")) { std::string code; code = (*f)("test").operator std::string(); @@ -843,10 +868,11 @@ class SimModuleNode final : public ModuleNode { private: LoweredFunc func_; - std::string pre_host_; - std::string post_host_; + std::string host_; std::vector>> arg_stream_types_; std::string dev_; + std::string platform_; + std::unordered_map options_; }; using var2nameType = std::unordered_map& arg_vars, - std::unordered_map& stream_table, - var2nameType& arg_top_vars, - std::string dev_code) { - // process info: shape type and stream - std::vector>> arg_type; - for (size_t i = 0 ; i < arg_vars.size(); i++) { - auto v = arg_vars[i]; - auto nameType = arg_top_vars[v]; - bool is_stream; - if (stream_table[v]) - is_stream = true; - else is_stream = false; - auto item = std::make_tuple(is_stream, std::get<1>(nameType), - std::get<2>(nameType)); - arg_type.push_back(item); - } + std::string host_code, + std::string dev_code, + std::vector>> arg_type, + std::string platform, std::unordered_map options) { std::shared_ptr n = - std::make_shared(func, pre_host_code, post_host_code, - arg_type, dev_code); + std::make_shared(func, host_code, + arg_type, dev_code, + platform, options); return Module(n); } } // namespace runtime @@ -1013,12 +1025,6 @@ class StreamCollector final : public IRVisitor { class CodeGenXcel : public CodeGenVivadoHLS { public: int arg_top_count{0}; - std::string pre_kernel; - std::string post_kernel; - // map for generating wrapper - var2nameType arg_top_vars; - std::vector arg_vars; - std::unordered_map stream_table; str2tupleMap map_arg_type_; LoweredFunc f_; @@ -1191,12 +1197,6 @@ class CodeGenXcel : public CodeGenVivadoHLS { class CodeGenHost : public CodeGenAOCL { public: int arg_top_count{0}; - std::string pre_kernel; - std::string post_kernel; - // map for generating wrapper - std::vector arg_vars; - std::unordered_map stream_table; - var2nameType arg_top_vars; void PrintType(Type t, std::ostream &os) { int lanes = t.lanes(); @@ -1466,22 +1466,17 @@ class CodeGenHost : public CodeGenAOCL { PrintStmt(op->body); }; - // Split host into pre/post kernel - void SplitHost() { - std::string code = this->GetHost(); - size_t pos = code.find("top("); - pre_kernel = code.substr(0, pos -1); - post_kernel = code.substr(code.find('\n', pos) + 1); - } }; // unified simulation function for diff platforms +template runtime::Module BuildSimModule(Array funcs, Array attrs, Array values) { CodeAnalysMerlinC ca; - CodeGenHost cg_host; - CodeGenXcel cg_dev; + CGHost cg_host; + CGXcel cg_dev; + for (LoweredFunc f : funcs) { ca.AddFunction(f); str2tupleMap map_arg_type; @@ -1489,19 +1484,53 @@ runtime::Module BuildSimModule(Array funcs, cg_host.AddFunction(f, map_arg_type); cg_dev.AddFunction(f, map_arg_type); } - cg_host.SplitHost(); + // process info: shape type and stream + auto& arg_vars = cg_dev.arg_vars; + auto& stream_table = cg_dev.stream_table; + auto& arg_top_vars = cg_dev.arg_top_vars; + std::vector>> arg_type; + for (size_t i = 0 ; i < arg_vars.size(); i++) { + auto v = arg_vars[i]; + auto nameType = arg_top_vars[v]; + bool is_stream; + if (stream_table[v]) + is_stream = true; + else is_stream = false; + auto item = std::make_tuple(is_stream, std::get<1>(nameType), + std::get<2>(nameType)); + arg_type.push_back(item); + LOG(WARNING) << v; + } + // tool option mapping and platform + std::string platform = values[0].as()->value; + std::unordered_map options; + for (size_t k = 1; k < attrs.size(); k++) { + auto key = attrs[k].as()->value; + auto val = values[k].as()->value; + options[key] = val; + } return runtime::CreateSimModule(funcs[0], - cg_host.pre_kernel, - cg_host.post_kernel, - cg_dev.arg_vars, - cg_dev.stream_table, - cg_dev.arg_top_vars, - cg_dev.GetDevice()); + cg_host.GetHost(), + cg_dev.GetDevice(), + arg_type, platform, options); } TVM_REGISTER_API("codegen.build_sim") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = BuildSimModule(args[0], args[1], args[2]); + // dispatch to corr codegen + auto& sptr = args[2].node_sptr(); + CHECK(sptr->is_type()); + auto* n = static_cast(sptr.get()); + auto data = n->data[static_cast(0)]; + std::string type = Expr(data).as()->value; + if (type == "rocket") { + *rv = BuildSimModule + (args[0], args[1], args[2]); + } else if (type == "sdaccel") { + *rv = BuildSimModule + (args[0], args[1], args[2]); + } else { + } }); } // namespace codegen diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index 1d7466af3..03c8b2292 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -23,6 +23,8 @@ namespace codegen { using namespace ir; template using str2tupleMap = std::unordered_map>; +using var2nameType = std::unordered_map>>; /*! * \brief A base class to generate C code. @@ -48,7 +50,7 @@ class CodeGenC : * \brief Add the function to the generated module. * \param f The function to be compiled. */ - void AddFunction(LoweredFunc f); + virtual void AddFunction(LoweredFunc f); /*! * \brief Finalize the compilation and return the code. * \return The code. @@ -187,6 +189,11 @@ class CodeGenC : std::map > var_shape_map_save; std::unordered_map range_save; + // map for generating wrapper + var2nameType arg_top_vars; + std::vector arg_vars; + std::unordered_map stream_table; + protected: void SaveFuncState(LoweredFunc f); void RestoreFuncState(LoweredFunc f); diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 5d2ee7cb1..cfc0601c6 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -198,6 +198,7 @@ class KernelUpdater final : public IRMutator { const bool kernel_channel) : target_(target), type_(type), is_producer_(is_producer), + // setup common channel name kernel_channel_(kernel_channel) { if (kernel_channel_) common_name = getName(); } @@ -296,7 +297,7 @@ void Schedule::to_stage(const Tensor& target, Buffer target_buffer; if (const ExternOpNode* op = target_stage->op.as()) { target_buffer = op->output_placeholders[0]; - // remove and current stage (only consumer) + // remove the receiver buffer (call kernel directly in top) target_stage->op = ExternOpNode::make(op->name, "", Array(), @@ -310,6 +311,7 @@ void Schedule::to_stage(const Tensor& target, std::smatch match_result; std::regex_match(target_buffer->name, match_result, reg); std::string old_name = match_result.str(1); + // update kernel def body KernelUpdater mutator(old_name, stream_type, false, false); dest->op = ExternOpNode::make(destOp->name, @@ -349,9 +351,12 @@ void Schedule::stream_to(const Tensor& target, if (target == op->inputs[j]) { target_buffer = op->input_placeholders[j]; consumers.push_back(s); - if (std::regex_match(op->name, std::regex(destOp->name + "(\\d)"))) + // record streamed data pos in kernel call + if (std::regex_match(op->name, + std::regex(destOp->name + "(\\d)"))) pos[dest] = j; - else if (std::regex_match(op->name, std::regex(destOp->name + "(\\d)"))) + else if (std::regex_match(op->name, + std::regex(destOp->name + "(\\d)"))) pos[source] = j; break; } @@ -363,9 +368,10 @@ void Schedule::stream_to(const Tensor& target, target_buffer = op->output_placeholders[0]; consumers.push_back(target_stage); } - // update original kernels + // mutator (is_producer false, kernel_channel true) KernelUpdater destMutator(target_buffer->name, stream_type, false, true); + // mutate kernel def and repalce lw / st dest->op = ExternOpNode::make(destOp->name, destOp->tag, destOp->axis, @@ -373,6 +379,7 @@ void Schedule::stream_to(const Tensor& target, destOp->input_placeholders, Array(), destMutator.Mutate(destOp->body)); + // mutator (is_producer true, kernel_channel true) KernelUpdater srcMutator(target_buffer->name, stream_type, true, true); source->op = ExternOpNode::make(srcOp->name, @@ -382,13 +389,14 @@ void Schedule::stream_to(const Tensor& target, srcOp->input_placeholders, Array(), srcMutator.Mutate(srcOp->body)); - // remove alloc buffer of kernels + // update kernel call ops for (auto s : consumers) { const ExternOpNode* op = s->op.as(); Stmt body = AttrStmt::make(VarExpr(), "device_scope", StringImm::make("fpga"), op->body); + // not alloc buffer for kernel call s->op = ExternOpNode::make(op->name, op->tag, op->axis, @@ -480,6 +488,18 @@ Tensor Schedule::move_to(const Tensor& target, load_expr, stream_type, channel_depth); + // handle placeholder back to host case + size_t consumer_pos = min_pos; + switch (device_type) { + case DeviceType::CPU: + consumer_pos = num_stage; + break; + case DeviceType::FPGA: + break; + case DeviceType::GPU: + break; + } + // for (size_t j = 0; j < target->shape.size(); j++) { // consumer_body = For::make( // VarExpr(csm_loop_vars[j]), @@ -505,7 +525,7 @@ Tensor Schedule::move_to(const Tensor& target, consumer_output_placeholders, consumer_body); Stage consumer_stage = Stage(consumer_op); - stages->data.insert(stages->data.begin() + min_pos, consumer_stage.node_); + stages->data.insert(stages->data.begin() + consumer_pos, consumer_stage.node_); (*this)->stage_map.Set(consumer_op, consumer_stage); // build producer (receiver) stage which takes in data from streaming From e8fe22181b67c60a2bd97bff3bbfc7fe11fd7133 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 12 Nov 2019 22:38:15 -0500 Subject: [PATCH 090/103] [rm] submodule --- .gitmodules | 3 --- rocc-ppac | 1 - 2 files changed, 4 deletions(-) delete mode 160000 rocc-ppac diff --git a/.gitmodules b/.gitmodules index 693559a42..292bfaba8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "soda"] path = soda url = https://github.com/UCLA-VAST/soda-compiler.git -[submodule "rocc-ppac"] - path = rocc-ppac - url = https://github.com/cornell-zhang/rocc-ppac.git diff --git a/rocc-ppac b/rocc-ppac deleted file mode 160000 index c0d93c48c..000000000 --- a/rocc-ppac +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c0d93c48c789c877c9c9e3eae90ed8ef9c28abc6 From c98d85957d9ecd8a38ab21ab066430c4946fc6a1 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 12 Nov 2019 22:45:34 -0500 Subject: [PATCH 091/103] [update] rocc ppac hlib --- .gitmodules | 3 +++ hlib/rocc-ppac | 1 + 2 files changed, 4 insertions(+) create mode 160000 hlib/rocc-ppac diff --git a/.gitmodules b/.gitmodules index 292bfaba8..6e63adee0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "soda"] path = soda url = https://github.com/UCLA-VAST/soda-compiler.git +[submodule "hlib/rocc-ppac"] + path = hlib/rocc-ppac + url = git@github.com:cornell-zhang/rocc-ppac.git diff --git a/hlib/rocc-ppac b/hlib/rocc-ppac new file mode 160000 index 000000000..40d323d0c --- /dev/null +++ b/hlib/rocc-ppac @@ -0,0 +1 @@ +Subproject commit 40d323d0c81e2f64dbfb63afb5eb5d6ccf7c5e48 From 6d9780a03627f79310f81c52740a82a1edc766ec Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 25 Nov 2019 16:25:09 -0500 Subject: [PATCH 092/103] [add] unified sim & kernel updater --- hlib/python/hlib/nn.py | 11 +++ python/heterocl/devices.py | 12 ++- python/heterocl/tools.py | 2 +- python/heterocl/tvm/build_module.py | 32 ++++++- python/heterocl/tvm/schedule.py | 11 ++- samples/digitrec/digitrec_vhls.py | 96 ++++++++----------- samples/sobel/sobel.py | 91 ++++++++++++++++++ tvm/include/tvm/schedule.h | 1 + tvm/src/api/api_lang.cc | 6 +- tvm/src/codegen/build_common.cc | 28 +++--- tvm/src/codegen/codegen_c.cc | 3 +- tvm/src/schedule/schedule_dataflow_rewrite.cc | 78 ++++++++------- tvm/src/template/vivado/Makefile | 31 ++++++ tvm/src/template/vivado/run.tcl | 36 +++++++ tvm/src/template/vivado/timer.h | 94 ++++++++++++++++++ 15 files changed, 418 insertions(+), 114 deletions(-) create mode 100644 samples/sobel/sobel.py create mode 100644 tvm/src/template/vivado/Makefile create mode 100644 tvm/src/template/vivado/run.tcl create mode 100644 tvm/src/template/vivado/timer.h diff --git a/hlib/python/hlib/nn.py b/hlib/python/hlib/nn.py index c8fa146a8..cf860b389 100644 --- a/hlib/python/hlib/nn.py +++ b/hlib/python/hlib/nn.py @@ -32,6 +32,17 @@ def _pad(*indices): return data[tuple(index_tuple)] return hcl.compute(out_shape, _pad, name='pad') +def conv2d_nchw_imp(Input, Filter, Output, stride=[1,1], padding=[[0,0],[0,0]]): + with hcl.for_(0,Output.shape[0]) as n: + with hcl.for_(0,Output.shape[1]) as c: + with hcl.for_(0,Output.shape[2]) as h: + with hcl.for_(0,Output.shape[3]) as w: + partial = hcl.local(0) + with hcl.for_(0,Filter.shape[-2]) as x: + with hcl.for_(0,Filter.shape[-1]) as y: + partial.v += Input[n][c][h+x][w+y] * Filter[0][0][x][y] + Output[n,c,h,w] = partial + def conv2d_nchw(Input, Filter, name="conv2d", stride=[1,1], padding=[[0,0],[0,0]]): out_dtype = Input.dtype batch, in_channel, in_height, in_width = Input.shape diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 511d19588..20403ab6b 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -32,7 +32,7 @@ def __init__(self, name, mode, kwargs): def __getattr__(self, entry): return self.mapping[entry] - def __call__(self, mode, setting): + def __call__(self, mode, setting={}): self.mode = mode self.options = setting return self @@ -77,6 +77,7 @@ def __init__(self, types, vendor, self.impls[key] = value def __getattr__(self, key): + """ device hierarchy """ return self.impls[key] def set_lang(self, lang): @@ -181,7 +182,16 @@ def __init__(self, name, devs, host, xcel, tool): self.xcel = xcel self.tool = tool + if isinstance(host, CPU): + self.cpu = host + if isinstance(xcel, FPGA): + self.fpga = xcel + elif isinstance(xcel, PIM) and \ + xcel.model == "ppac": + self.ppac = xcel + def __getattr__(self, key): + """ return tool options """ return self.tool.__getattr__(key) def __call__(self, tooling=None): diff --git a/python/heterocl/tools.py b/python/heterocl/tools.py index 4df5f96f8..bf47753fa 100644 --- a/python/heterocl/tools.py +++ b/python/heterocl/tools.py @@ -11,7 +11,7 @@ option_table = { "llvm" : ("llvm_sim", {"version" : "6.0.0"}), - "sdaccel" : ("sw_emu", {"version" : "2017.1", "clock" : 1}), + "sdaccel" : ("sw_emu", {"version" : "2017.1", "clock" : "1"}), "vivado_hls" : ("csim", {"version" : "2017.1"}), "rocket" : ("source", {"RISCV" : ""}), diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 9a1c1389e..5a78bb5f0 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -6,6 +6,7 @@ from __future__ import absolute_import as _abs import warnings import types +import os from ._ffi.node import NodeBase, register_node from ._ffi.function import register_func @@ -34,8 +35,37 @@ def get_util_path(platform): if platform == "aws_f1": return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/design/" elif platform == "rocket": + ppac = "/work/zhang-x1/users/sx233/heterocl/hlib/rocc-ppac" + emulator = os.path.join(ppac, "rocket/emulator/emulator-freechips." + \ + "rocketchip.system-RoccExampleConfig-debug") + # build emulator if not exist + if not os.path.isfile(emulator): + cmd = "cd " + ppac + ";" + cmd += "cp src/Ppac.v rocket/src/main/resources/vsrc;" + \ + "cp src/PpacRoCC.scala rocket/src/main/scala/tile;" + \ + "cd rocket && git apply ../src/rocc-ppac.patch;" + \ + "cd emulator && make CONFIG=RoccExampleConfig debug" + # create subprocess to check + subprocess.Popen(cmd, shell=True, stdout=open("build.log", "w")).wait() + + # re-build proxy kernel + if not os.path.isfile(ppac + "/rocket/riscv-pk/build/pk"): + cmd = "cd " + ppac + "/rocket/riscv-pk;" + cmd += "git apply ../../tests/patches/riscv-pk.patch;" + cmd += "mkdir build; cd build;" + cmd += " ../configure --prefix=$RISCV/riscv64-unknown-elf --host=riscv64-unknown-elf;" + cmd += "make -j8; make install" + subprocess.Popen(cmd, shell=True, stdout=open("build.log", "w")).wait() + # return util folder needed to compile generated test files return "/work/zhang-x1/users/sx233/heterocl/rocc-ppac/tests" + # copy tcl and testbench + elif platform == "vivado_hls": + return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/vivado" + + else: # unrecognized platform + assert False, "unsupported platform" + class DumpIR(object): """ Dump IR for each pass. @@ -462,7 +492,7 @@ def build_fpga_kernel(sch, args, target, name="default_function"): xcel_code = xcel_code[findex + 8 : rindex] # return simulation built function - if "emu" in str(target.tool.mode): + if "emu" in str(target.tool.mode) or "sim" in str(target.tool.mode): builder = getattr(codegen, "build_{0}".format("sim")) keys = [k for k in target.tool.options.keys()] vals = [v for v in target.tool.options.values()] diff --git a/python/heterocl/tvm/schedule.py b/python/heterocl/tvm/schedule.py index 8ccd6ffbf..36ead39de 100644 --- a/python/heterocl/tvm/schedule.py +++ b/python/heterocl/tvm/schedule.py @@ -335,7 +335,7 @@ def partition(self, target, partition_type, dim, factor): def to(self, tensor, dst, src, types=_expr.StreamExpr.Channel, - depth=10, name=None): + depth=1, name=None): """ Stream data to devices or on-chip module Parameters @@ -370,7 +370,14 @@ def to(self, tensor, dst, src, _api_internal._ScheduleStream(self, tensor, dst, src, types, depth, name) else: # from externop buffer to kernel - _api_internal._ScheduleMoveToStage(self, tensor, dst, + shape = [_.value for _ in tensor.shape] + index, match = 0, [] + for s in dst.op.body.api_args: + arg_shape = [_.value for _ in s] + if shape == arg_shape: match.append(index) + index = index + 1 + assert len(match) > 0, "wrong kernel or tensor (shape not matching)" + _api_internal._ScheduleMoveToStage(self, tensor, dst, match[0], types, depth, name) @register_node("Stage") diff --git a/samples/digitrec/digitrec_vhls.py b/samples/digitrec/digitrec_vhls.py index d87e5dedd..1f219db62 100644 --- a/samples/digitrec/digitrec_vhls.py +++ b/samples/digitrec/digitrec_vhls.py @@ -6,31 +6,27 @@ N = 8 * 8 max_bit = int(math.ceil(math.log(N, 2))) +test_size = (180, ) data_size = (10, 1800) dtype_image = hcl.UInt(N) dtype_knnmat = hcl.UInt(max_bit) -# set up the platform and tool setting = { "version" : "2019.1", - "clock" : 10 + "clock" : "10" } tool = hcl.tool.vivado("csim", setting) target = hcl.platform.aws_f1 -def knn(test_image, train_images): - # Imperative programming and bit operations (§2) +def knn(test_images, train_images): + def popcount(num): out = hcl.local(0, "out") with hcl.for_(0, train_images.type.bits) as i: - # Bit selection operation out.v += num[i] return out.v - # This function update the candidates, i.e., `knn_mat`. Here we mutate - # through the shape of tensor `dist`. For each `dist` value, if it is - # smaller than the maximum candidate, we replace it. def update_knn(dist, knn_mat, i, j): max_id = hcl.local(0, "max_id") with hcl.for_(0, 3) as k: @@ -39,9 +35,6 @@ def update_knn(dist, knn_mat, i, j): with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): knn_mat[i][max_id.v] = dist[i][j] - # This function sorts the 10 x 3 matrix. Sorting each row elements from - # small to large distance, and find digit index where the distance is the smallest - # then returns the digit having the highest scores def sort_knn(knn_mat, i, j): val = hcl.local(0, "val") with hcl.if_( j == 1 ): @@ -79,47 +72,41 @@ def knn_vote(knn_mat, j): count.v += 0 return count.v - # Main algorithm (§3) - # Fist step: XOR (§3.1) - diff = hcl.compute(train_images.shape, - lambda x, y: train_images[x][y] ^ test_image, - "diff") + # support + @hcl.def_([(), data_size, (10,3)]) + def knn_dist(test_image, train_images, pred_matrix) - # Second step: popcount (§3.2) - dist = hcl.compute(diff.shape, - lambda x, y: popcount(diff[x][y]), - "dist") + with hcl.for_(0, 180) as index: - # Third step: initialize the candidates (§3.3) - knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") + test_image = test_images[index] + diff = hcl.compute(train_images.shape, + lambda x, y: train_images[x][y] ^ test_image, + "diff") - # Fourth step: update the candidates (§3.4) - hcl.mutate(dist.shape, - lambda x, y: update_knn(dist, knn_mat, x, y), - "knn_update") + dist = hcl.compute(diff.shape, + lambda x, y: popcount(diff[x][y]), + "dist") - # Fifth step: voting candidates (§3.5) - hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") + knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") + hcl.mutate(dist.shape, + lambda x, y: update_knn(dist, knn_mat, x, y), + "knn_update") - # Sixth step: compute the score baord ranking - knn_new = hcl.compute(knn_mat.shape, lambda x, y: knn_mat[x][y], "copy") - knn_pred = hcl.compute((10,), lambda x: knn_vote(knn_mat, x), "vote") + hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") + knn_new = hcl.compute(knn_mat.shape, + lambda x, y: knn_mat[x][y], "copy") + knn_pred = hcl.compute((10,), + lambda x: knn_vote(knn_mat, x), "vote") - # computed data return knn_pred -# Inputs/Outputs definition (§4) -# Scalars (§4.1) -test_image = hcl.placeholder((), "test_image", dtype_image) -# Tensors (§4.2) +test_image = hcl.placeholder(test_size, "test_image", dtype_image) train_images = hcl.placeholder(data_size, "train_images", dtype_image) -# Data type customization (§5.1) scheme = hcl.create_scheme([test_image, train_images], knn) scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) -# Compute customization (§5.2) s = hcl.create_schedule_from_scheme(scheme) diff = knn.diff @@ -127,41 +114,42 @@ def knn_vote(knn_mat, j): vote = knn.copy knn_update = knn.knn_update -# s.stream_to(test_image, hcl.FPGA("intel")) -s.to(train_images, target.xcel) +s.to([test_images, train_images], target.xcel) s.to(vote, target.host) -# Merge loop nests +# merge loop nests s[diff].compute_at(s[dist], dist.axis[1]) s[dist].compute_at(s[knn_update], knn_update.axis[1]) -# Reorder loop to expose more parallelism +# reorder loop to expose more parallelism s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) -# Parallel outer loop and pipeline inner loop +# parallel outer loop and pipeline inner loop s[knn_update].parallel(knn_update.axis[1]) s[knn_update].pipeline(knn_update.axis[0]) -# At the end, we build the whole offloaded function. +# at the end, we build the whole offloaded function. # print(hcl.lower(s)) f = hcl.build(s, target) train_images, _, test_images, test_labels = read_digitrec_data() -correct = 0.0 +total = len(test_images) total_time = 0 -for i in range(0, 1): - hcl_train_images = hcl.asarray(train_images, dtype_image) - hcl_knn_pred = hcl.asarray(np.zeros((10,)), dtype_knnmat) +# read returned prediction from streaming pipe +hcl_train_images = hcl.asarray(train_images, dtype_image) +hcl_knn_pred = hcl.asarray(np.zeros((total, 10)), dtype_knnmat) - start = time.time() - f(test_images[i], hcl_train_images, hcl_knn_pred) - total_time = total_time + (time.time() - start) +start = time.time() +f(test_images, hcl_train_images, hcl_knn_pred) +total_time = total_time + (time.time() - start) - knn_mat = hcl_knn_pred.asnumpy() +knn_result = hcl_knn_pred.asnumpy() - if np.argmax(knn_mat) == test_labels[i]: +correct = 0.0 +for i in range(total): + if np.argmax(knn_result[i]) == test_labels[i]: correct += 1 -print("Average kernel time (s): {:.2f}".format(total_time/1)) +print("Average kernel time (s): {:.2f}".format(total_time/total)) print("Accuracy (%): {:.2f}".format(100*correct/1)) diff --git a/samples/sobel/sobel.py b/samples/sobel/sobel.py new file mode 100644 index 000000000..aee976462 --- /dev/null +++ b/samples/sobel/sobel.py @@ -0,0 +1,91 @@ +import heterocl as hcl +import hlib +import numpy as np +from PIL import Image +from urllib.request import urlopen + +batch_size = 1 +hcl.init(hcl.Float()) +dtype = hcl.Float() +image_size = () +kernel_size = 3 + +# setup target using vivado +tool = hcl.tool.vivado("csim") +target = hcl.platform.zc706 + +def sobel(): + image = hcl.placeholder((batch_size, 1, 256, 256), "input_image") + k1 = hcl.placeholder((1, 1, 3, 3), "kernel_1") + k2 = hcl.placeholder((1, 1, 3, 3), "kernel_2") + + def kernel(input_image, kernel_1, kernel_2): + + def absolute(image, *args): + with hcl.if_(image[args] > 0): + hcl.return_(image[args]) + with hcl.else_(): + hcl.return_(-1 * image[args]) + + def dev(gx, gy, org): + assert gx.shape == gy.shape, "mismatch" + rx = hcl.reduce_axis(0, 255, "rx") + ry = hcl.reduce_axis(0, 255, "ry") + mat_sum = hcl.compute(gx.shape, lambda nn, ff, xx, yy: + gx[nn, ff, xx, yy] + gy[nn, ff, xx, yy], name="add") + return hcl.compute(mat_sum.shape, lambda nn, ff, xx, yy: + mat_sum[nn, ff, xx, yy] * 255.0 / hcl.max(mat_sum[nn, ff, rx, ry], axis=[rx, ry]), + name = "derv") + + # make the conv op a kernel on fpga. + # return tensor required (cannot do def_()) + output_shape = (1,1,254,254) + + # make compute wrapped in hcl def + module1 = hcl.def_([input_image.shape, kernel_1.shape, output_shape], name="conv1")(hlib.nn.conv2d_nchw_imp) + module2 = hcl.def_([input_image.shape, kernel_1.shape, output_shape], name="conv2")(hlib.nn.conv2d_nchw_imp) + conv1 = hcl.compute(output_shape, lambda *args: 0) + conv2 = hcl.compute(output_shape, lambda *args: 0) + module1(input_image, kernel_1, conv1) + module2(input_image, kernel_2, conv2) + + abs1 = hcl.compute(conv1.shape, + lambda *args: absolute(conv1, *args)) + abs2 = hcl.compute(conv2.shape, + lambda *args: absolute(conv2, *args)) + + # derivative module for normalization + return dev(abs1, abs2, input_image) + + s = hcl.create_schedule([image, k1, k2], kernel) + + # data moved to local + i0, k10 = s.to([image, k1], target.fpga) + s.to([i0, k10], s[kernel.conv1]) + s.to(kernel.derv, target.cpu) + + # create stream channel between modules + # print(type(target.fpga), hcl.lower(s)) + return hcl.build(s, target) + +# Load sample data +img = Image.open(urlopen('http://i.stack.imgur.com/8zINU.gif')) +kernel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) +kernel_y = np.flip(kernel_x.T.T, axis=0) +img = np.array(img) + +img = img[np.newaxis, ...] +img = img[np.newaxis, ...] +kernel_x = kernel_x[np.newaxis, ...] +kernel_x = kernel_x[np.newaxis, ...] +kernel_y = kernel_y[np.newaxis, ...] +kernel_y = kernel_y[np.newaxis, ...] + +hcl_input = hcl.asarray(img, dtype) +kernel_x = hcl.asarray(kernel_x, dtype) +kernel_y = hcl.asarray(kernel_y, dtype) +hcl_output = hcl.asarray(np.zeros((1,1,254,254)), dtype) + +f = sobel() +f(hcl_input, kernel_x, kernel_y, hcl_output) + diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index e56085853..1ffa389da 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -358,6 +358,7 @@ class Schedule : public NodeRef { EXPORT void to_stage(const Tensor& target, Stage dest, + int arg_pos, ir::StreamType stream_type, int channel_depth, std::string name); diff --git a/tvm/src/api/api_lang.cc b/tvm/src/api/api_lang.cc index 096f902cb..543e816aa 100644 --- a/tvm/src/api/api_lang.cc +++ b/tvm/src/api/api_lang.cc @@ -464,9 +464,9 @@ TVM_REGISTER_API("_SchedulePartition") TVM_REGISTER_API("_ScheduleMoveToStage") .set_body([](TVMArgs args, TVMRetValue *ret) { args[0].operator Schedule() - .to_stage(args[1], args[2], - static_cast(args[3].operator int()), - args[4], args[5]); + .to_stage(args[1], args[2], args[3], + static_cast(args[4].operator int()), + args[5], args[6]); }); TVM_REGISTER_API("_ScheduleMove") diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 7838d9513..411a6cb29 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -838,22 +838,23 @@ class SimModuleNode final : public ModuleNode { LOG(CLEAN) << "Running SW simulation ..."; system("cd __tmp__; source ./run_sw.sh"); - // emulation for ppac flow + } else if (platform_ == "rocket") { - // generate rocket emulator - std::string ppac = path + "/../"; - std::string cmd = "cd " + ppac + ";"; - cmd += std::string("cp src/Ppac.v rocket/src/main/resources/vsrc;") + - std::string("cp src/PpacRoCC.scala rocket/src/main/scala/tile;") + - std::string("cd rocket && git apply ../src/rocc-ppac.patch;") + - std::string("cd emulator && make CONFIG=RoccExampleConfig"); - system(cmd.c_str()); // generate host and run proxy kernel test + GenHostCode(args, shmids, arg_types, func_, + host_, arg_stream_types_); std::string compile = "cd __tmp__;"; compile += std::string("autoconf; mkdir build; cd build;") + std::string("../configure --with-riscvtools=") + - options_["RISCV"] + std::string(";make"); + options_["RISCV"] + std::string(";make -j8"); system(compile.c_str()); + + } else if (platform_ == "vivado_hls") { + GenHostCode(args, shmids, arg_types, func_, + host_, arg_stream_types_); + GenKernelCode(dev_); + } else { + LOG(FATAL) << "unrecognized platform " << platform_; } // clean & extract resource information @@ -1488,6 +1489,7 @@ runtime::Module BuildSimModule(Array funcs, auto& arg_vars = cg_dev.arg_vars; auto& stream_table = cg_dev.stream_table; auto& arg_top_vars = cg_dev.arg_top_vars; + std::vector>> arg_type; for (size_t i = 0 ; i < arg_vars.size(); i++) { auto v = arg_vars[i]; @@ -1499,7 +1501,6 @@ runtime::Module BuildSimModule(Array funcs, auto item = std::make_tuple(is_stream, std::get<1>(nameType), std::get<2>(nameType)); arg_type.push_back(item); - LOG(WARNING) << v; } // tool option mapping and platform std::string platform = values[0].as()->value; @@ -1522,6 +1523,8 @@ TVM_REGISTER_API("codegen.build_sim") CHECK(sptr->is_type()); auto* n = static_cast(sptr.get()); auto data = n->data[static_cast(0)]; + + // create module node for simulation std::string type = Expr(data).as()->value; if (type == "rocket") { *rv = BuildSimModule @@ -1529,6 +1532,9 @@ TVM_REGISTER_API("codegen.build_sim") } else if (type == "sdaccel") { *rv = BuildSimModule (args[0], args[1], args[2]); + } else if (type == "vivado_hls") { + *rv = BuildSimModule + (args[0], args[1], args[2]); } else { } }); diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 0b11fecf0..fc656601d 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -85,7 +85,8 @@ std::string CodeGenC::GetDevice() { device << "void top(" << arg_stream.str() << "){\n" << device_stream.str(); if (fpga_scope_) device << stream.str(); - return decl_stream.str() + device.str() + "}\n\n"; + return decl_stream.str() + module_stream.str() + + device.str() + "}\n\n"; } std::string CodeGenC::Finish() { diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index cfc0601c6..17b6b71bf 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -123,16 +123,18 @@ class StreamConsumer final : public IRMutator { Expr Mutate_(const Load* op, const Expr& e) { Expr index = op->index; std::string target_name = op->buffer_var.get()->name_hint; - if (has_suffix(target_name, "." + target_)) { + if (target_ == target_name) { stream_data = op->buffer_var; Array keys, values; if (kernel_channel_) { keys.push_back(StringImm::make("name")); values.push_back(StringImm::make(common_name_)); } - return StreamExpr::make(op->type, op->buffer_var, type_, 10, keys, values); + return StreamExpr::make(op->type, op->buffer_var, + type_, 10, keys, values); } else { - return Load::make(op->type, op->buffer_var, index, op->predicate); + return Load::make(op->type, op->buffer_var, + index, op->predicate); } } @@ -141,10 +143,6 @@ class StreamConsumer final : public IRMutator { const ir::StreamType type_; const bool kernel_channel_; const std::string common_name_; - bool has_suffix(const std::string &str, const std::string &suffix) { - return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; - } }; class StreamProducer final : public IRMutator { @@ -164,16 +162,18 @@ class StreamProducer final : public IRMutator { Expr index = op->index; Expr value = this->Mutate(op->value); std::string target_name = op->buffer_var.get()->name_hint; - if (has_suffix(target_name, "." + target_)) { + if (target_name == target_) { stream_data = op->buffer_var; Array keys, values; if (kernel_channel_) { keys.push_back(StringImm::make("name")); values.push_back(StringImm::make(common_name_)); } - return StreamStmt::make(op->buffer_var, value, type_, 10, keys, values); + return StreamStmt::make(op->buffer_var, value, + type_, 10, keys, values); } else { - return Store::make(op->buffer_var, value, index, op->predicate); + return Store::make(op->buffer_var, value, + index, op->predicate); } } @@ -182,31 +182,28 @@ class StreamProducer final : public IRMutator { const ir::StreamType type_; const bool kernel_channel_; const std::string common_name_; - bool has_suffix(const std::string &str, const std::string &suffix) { - return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; - } }; class KernelUpdater final : public IRMutator { public: static int channelCount; KernelUpdater( - const std::string& target, + const int arg_pos, const ir::StreamType& type, const bool is_producer, const bool kernel_channel) - : target_(target), type_(type), + : arg_pos_(arg_pos), type_(type), is_producer_(is_producer), // setup common channel name kernel_channel_(kernel_channel) { - if (kernel_channel_) common_name = getName(); - } + if (kernel_channel_) common_name = getName(); + } Stmt Mutate_(const KernelDef* op, const Stmt& s) { // mutate target load Stmt stmt = op->body; Array arr = op->channels; + std::string target_ = op->args[arg_pos_].get()->name_hint; if (is_producer_) { StreamProducer mutator(target_, type_, kernel_channel_, @@ -228,7 +225,7 @@ class KernelUpdater final : public IRMutator { op->ret_type, op->name, arr); } private: - const std::string target_; + const int arg_pos_; const ir::StreamType type_; const bool is_producer_; const bool kernel_channel_; @@ -289,38 +286,39 @@ class ParentStmtCollector final : public IRMutator { // stream buffer data to kernel stage void Schedule::to_stage(const Tensor& target, - Stage dest, + /*kernel def stage*/ Stage dest, + /*position index*/int arg_pos, StreamType stream_type, int channel_depth, std::string name) { Stage target_stage = (*this)[target]; Buffer target_buffer; + + // target stage as kernel def operator if (const ExternOpNode* op = target_stage->op.as()) { target_buffer = op->output_placeholders[0]; - // remove the receiver buffer (call kernel directly in top) + // remove the receiver buffer (keep the device scope) + const AttrStmt* attr = op->body.as(); + Stmt scope_attr = AttrStmt::make(attr->node, attr->attr_key, + attr->value, Evaluate::make(0)); target_stage->op = ExternOpNode::make(op->name, "", Array(), op->inputs, op->input_placeholders, op->output_placeholders, - Evaluate::make(0)); + scope_attr); // update dest stage body for data stream in const ExternOpNode* destOp = dest->op.as(); - std::regex reg("^(.+?)\\.stream_(.*)"); - std::smatch match_result; - std::regex_match(target_buffer->name, match_result, reg); - std::string old_name = match_result.str(1); - // update kernel def body - KernelUpdater mutator(old_name, stream_type, - false, false); - dest->op = ExternOpNode::make(destOp->name, - destOp->tag, - destOp->axis, - destOp->inputs, + KernelUpdater mutator(arg_pos, stream_type, + /*is producer*/false, + /*inter module channel*/false); + auto new_body = mutator.Mutate(destOp->body); + dest->op = ExternOpNode::make(destOp->name, destOp->tag, + destOp->axis, destOp->inputs, destOp->input_placeholders, Array(), - mutator.Mutate(destOp->body)); + new_body); } } @@ -369,7 +367,7 @@ void Schedule::stream_to(const Tensor& target, consumers.push_back(target_stage); } // mutator (is_producer false, kernel_channel true) - KernelUpdater destMutator(target_buffer->name, + KernelUpdater destMutator(0, //target_buffer->name, stream_type, false, true); // mutate kernel def and repalce lw / st dest->op = ExternOpNode::make(destOp->name, @@ -380,7 +378,7 @@ void Schedule::stream_to(const Tensor& target, Array(), destMutator.Mutate(destOp->body)); // mutator (is_producer true, kernel_channel true) - KernelUpdater srcMutator(target_buffer->name, + KernelUpdater srcMutator(0, //target_buffer->name, stream_type, true, true); source->op = ExternOpNode::make(srcOp->name, srcOp->tag, @@ -437,15 +435,15 @@ Tensor Schedule::move_to(const Tensor& target, } } } - } else { // move back the data after extern compute + } else { // move data generated by extern op min_pos = FindNodeRef(stages, target_stage) + 1; const ExternOpNode* op = target_stage->op.as(); target_buffer = op->output_placeholders[0]; for (size_t i = 0; i < num_stage; i++) { Stage s = (*this)->stages[i]; - if (const ExternOpNode* op = s->op.as()) { - for (size_t j = 0; j < op->inputs.size(); j++) { - if (op->output_placeholders[0] == op->input_placeholders[j]) { + if (const ExternOpNode* stage_op = s->op.as()) { + for (size_t j = 0; j < stage_op->inputs.size(); j++) { + if (op->output_placeholders[0] == stage_op->input_placeholders[j]) { consumers.push_back(s); break; } diff --git a/tvm/src/template/vivado/Makefile b/tvm/src/template/vivado/Makefile new file mode 100644 index 000000000..2afd05040 --- /dev/null +++ b/tvm/src/template/vivado/Makefile @@ -0,0 +1,31 @@ +#========================================================================== +# Makefile +#========================================================================== +# @brief: A makefile the compiles and synthesizes the program +# +# @desc: 1. "make" runs csim by default +# 2. "make csim" compiles & executes the fixed-point implementation +# 3. "make clean" cleans up the directory + + +# Extract Vivado HLS include path +VHLS_PATH := $(dir $(shell which vivado_hls))/.. +VHLS_INC ?= ${VHLS_PATH}/include + +CFLAGS = -g -I${VHLS_INC} + +all: csim + +csim: kernel.cpp host.cpp + @echo "Compiling & simulating on amdpool ..." + g++ ${CFLAGS} $^ -o out -lrt + ./out + +vivado: + @echo "Run Vivado csim and HLS" + vivado_hls -f run.tcl + +clean: + rm -rf out *.txt *.dat *.prj *.log + rm -rf zedboard_project* xillydemo.bit + diff --git a/tvm/src/template/vivado/run.tcl b/tvm/src/template/vivado/run.tcl new file mode 100644 index 000000000..d80b865df --- /dev/null +++ b/tvm/src/template/vivado/run.tcl @@ -0,0 +1,36 @@ +#============================================================================= +# run_base.tcl +#============================================================================= +# @brief: A Tcl script for synthesizing the design. + +# Project name +set hls_prj out.prj + +# Open/reset the project +open_project ${hls_prj} -reset + +# Top function of the design is "top" +set_top top + +# Add design and testbench files +add_files kernel.cpp +add_files -tb host.cpp + +open_solution "solution1" +# Use Zynq device +set_part {xc7z020clg484-1} + +# Target clock period is 10ns +create_clock -period 10 + +# Directives + +############################################ + +# Simulate the C++ design +csim_design -O +# Synthesize the design +csynth_design +# Co-simulate the design +#cosim_design +exit diff --git a/tvm/src/template/vivado/timer.h b/tvm/src/template/vivado/timer.h new file mode 100644 index 000000000..77c461b00 --- /dev/null +++ b/tvm/src/template/vivado/timer.h @@ -0,0 +1,94 @@ +//--------------------------------------------------------- +// Timer.h +//--------------------------------------------------------- +#ifndef __TIMER_H__ +#define __TIMER_H__ +#include +#include +#include +#include + +#define TIMER_ON + +//--------------------------------------------------------- +// Timer is an object which helps profile programs using +// the clock() function. +// - By default, a timer is stopped when you instantiate it +// and must be started manually +// - Passing True to the constructor starts the timer when +// it is constructed +// - When the timer is destructed it prints stats to stdout +//--------------------------------------------------------- +class Timer { + + #ifdef TIMER_ON + + char binName[50]; + unsigned nCalls; + timeval ts_start; + float totalTime; + + public: + //------------------------------------------------------------------ + // constructor + //------------------------------------------------------------------ + Timer (const char* Name="", bool On=false) { + if (On) { + // record the start time + gettimeofday(&ts_start, NULL); + nCalls = 1; + } + else { + nCalls = 0; + } + totalTime = 0; + strcpy(binName, Name); + } + + //------------------------------------------------------------------ + // destructor + //------------------------------------------------------------------ + ~Timer () { + // on being destroyed, print the average and total time + if (nCalls > 0) { + printf ("%-20s: ", binName); + printf ("%6d calls; ", nCalls); + printf ("%7.3f msecs total time\n", 1000*totalTime); + //printf ("%7.4f msecs average time;\n", 1000*totalTime/nCalls); + } + } + + //------------------------------------------------------------------ + // start timer + //------------------------------------------------------------------ + void start() { + // record start time + gettimeofday(&ts_start, NULL); + nCalls++; + } + + //------------------------------------------------------------------ + // stop timer + //------------------------------------------------------------------ + void stop() { + // get current time, add elapsed time to totalTime + timeval ts_curr; + gettimeofday(&ts_curr, NULL); + totalTime += float(ts_curr.tv_sec - ts_start.tv_sec) + + float(ts_curr.tv_usec)*1e-6 - float(ts_start.tv_usec)*1e-6; + } + + #else + + //-------------------------------------------------------------------- + // all methods do nothing if TIMER_ON is not set + //-------------------------------------------------------------------- + public: + Timer (const char* Name, bool On=true) {} + void start() {} + void stop() {} + + #endif +}; + +#endif From 046048e096e84be098d4cc5a08614ee63a994108 Mon Sep 17 00:00:00 2001 From: "Yi-Hsiang (Sean) Lai" Date: Tue, 26 Nov 2019 09:55:47 -0500 Subject: [PATCH 093/103] [API] Enable select API to accept Python expressions --- python/heterocl/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/heterocl/api.py b/python/heterocl/api.py index 18c457e37..4da52786f 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -2,6 +2,7 @@ #pylint: disable=no-member from ordered_set import OrderedSet from .tvm.build_module import build as _build, lower as _lower +from .tvm.api import convert from .tvm import _api_internal as tvm_api from .tvm import schedule as _schedule from .tvm import make as _make @@ -357,4 +358,4 @@ def select(cond, true, false): ------- Expr """ - return _make.Select(cond, true, false) + return _make.Select(convert(cond), convert(true), convert(false)) From c124eb92887b6ed4a7b6c005c62b0658ca4f74d7 Mon Sep 17 00:00:00 2001 From: "Yi-Hsiang (Sean) Lai" Date: Tue, 26 Nov 2019 20:53:23 -0500 Subject: [PATCH 094/103] [API] Fixed incorrect casting for select in CastRemover --- python/heterocl/mutator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/heterocl/mutator.py b/python/heterocl/mutator.py index a38c90906..88ca42788 100644 --- a/python/heterocl/mutator.py +++ b/python/heterocl/mutator.py @@ -1,6 +1,7 @@ from .tvm import expr as _expr from .tvm import stmt as _stmt from .tvm import make as _make +from .tvm.api import convert class Mutator(object): @@ -187,9 +188,9 @@ def mutate_Cast(self, node): return _make.Cast(node.dtype, value) def mutate_Select(self, node): - condition = self.mutate(node.condition) - true_value = self.mutate(node.true_value) - false_value = self.mutate(node.false_value) + condition = _make.Cast("uint1", self.mutate(node.condition)) + true_value = convert(self.mutate(node.true_value)) + false_value = convert(self.mutate(node.false_value)) return _make.Select(condition, true_value, _make.Cast(true_value.dtype, false_value)) def mutate_Load(self, node): From 85db48dbdbce1b66893265493955e4b1afa74b26 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Thu, 28 Nov 2019 04:55:51 -0500 Subject: [PATCH 095/103] re-organize build common util --- python/heterocl/devices.py | 2 +- python/heterocl/tvm/build_module.py | 2 +- tvm/src/codegen/build_common.cc | 769 +----------------------- tvm/src/codegen/build_common.h | 1 + tvm/src/codegen/build_helper.cc | 794 +++++++++++++++++++++++++ tvm/src/codegen/build_helper.h | 66 ++ tvm/src/codegen/opencl/codegen_aocl.cc | 3 +- 7 files changed, 867 insertions(+), 770 deletions(-) create mode 100644 tvm/src/codegen/build_helper.cc create mode 100644 tvm/src/codegen/build_helper.h diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 20403ab6b..734c45b8c 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -160,7 +160,7 @@ def __getattr__(cls, key): elif key == "zc706": devs = dev_table[key] host = devs[0].set_lang("hlsc") - xcel = devs[1].set_lang("opencl") + xcel = devs[1].set_lang("hlsc") elif key == "llvm": devs = None host = None diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 5a78bb5f0..41adfa029 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -472,7 +472,7 @@ def build_fpga_kernel(sch, args, target, name="default_function"): xcel = target.xcel.lang.replace("hlsc", "vhls") if "vivado_hls" == target.tool.name: host = target.host.lang.replace("hlsc", "vhls") - xcel = target.xcel.lang.replace("opencl", "aocl") + xcel = target.xcel.lang.replace("hlsc", "vhls") # generate inline assembly c and invoke if "rocket" == target.tool.name: diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index 411a6cb29..d2efd74f9 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -10,6 +10,7 @@ #include #include #include "./build_common.h" +#include "./build_helper.h" #include #include @@ -25,772 +26,6 @@ namespace TVM { namespace runtime { -std::string getpath(void) { - char buff[256]; - getcwd(buff, 256); - std::string cwd(buff); - return cwd; -} - -void PrintIndent(std::ofstream& stream, int indent) { - for (int i = 0; i < indent; i++) - stream << ' '; -} - -inline size_t GetTypeSize(TVMType t) { - size_t byte = (t.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - return byte; -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size_t byte = (arr->dtype.bits + 7) / 8; - if (byte > 2){ - if (byte <= 4) byte = 4; - else if (byte <= 8) byte = 8; - else byte = 16; - } - size *= (byte * 8 * arr->dtype.lanes + 7) / 8; - return size; -} - -inline TVMType Type2TVMType(Type t) { - TVMType tt; - if (t.is_int()) tt.code = kDLInt; - else if (t.is_uint()) tt.code = kDLUInt; - else if (t.is_float()) tt.code = kDLFloat; - else LOG(FATAL) << "Unacceptable type: " << t; - tt.bits = static_cast(t.bits()); - tt.fracs = static_cast(t.fracs()); - return tt; -} - -inline std::string PrintHalideType(Type t) { - std::string str = ""; - if (t.is_uint() || t.is_int() || t.is_fixed() || t.is_ufixed()) { - if (t.is_uint()) str += "ap_uint<" + std::to_string(t.bits()) + ">"; - else if (t.is_int()) str += "ap_int<" + std::to_string(t.bits()) + ">"; - else if (t.is_ufixed()) str += "ap_ufixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; - else str += "ap_fixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; - } else { - LOG(FATAL) << "Cannot convert type " << t << " to C type"; - } - return str; -} - -inline std::string Type2Str(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) str += "ap_fixed<"; - else str += "ap_int<"; - str += std::to_string(static_cast(t.bits)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) str += "ap_ufixed<"; - else str += "ap_uint<"; - str += std::to_string(static_cast(t.bits)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2ExtStr(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) str += "ap_fixed<"; - else str += "ap_int<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) str += "ap_ufixed<"; - else str += "ap_uint<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2WrapStr(TVMType t) { - std::string str = ""; - if (t.code == kDLInt) { - if (t.fracs > 0) { - str += "ap_fixed<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - } else { - str += "ap_int<"; - if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - } - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLUInt) { - if (t.fracs > 0) { - str += "ap_ufixed<"; - str += std::to_string(static_cast(t.bits + t.fracs)); - } else { - str += "ap_uint<"; - if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - } - if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; - else str += ">"; - } else if (t.code == kDLFloat) { - str += "float"; - } else { - LOG(FATAL) << "Unknown type"; - } - return str; -} - -inline std::string Type2Byte(TVMType t) { - std::string str = ""; - if (t.code == kDLFloat) { - str += "float"; - } else if (t.code == kDLInt || t.code == kDLUInt) { - if (t.code == kDLUInt) str += "u"; - str += "int"; - if (t.bits <= 8) str += "8"; - else if (t.bits <= 16) str += "16"; - else if (t.bits <= 32) str += "32"; - else str += "64"; - str += "_t"; - } - return str; -} - -void CollectArgInfo(TVMArgs& args, - LoweredFunc func, - std::vector& arg_sizes, - std::vector& arg_types) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - arg_sizes.push_back(GetDataSize(arr)); - arg_types.push_back(arr->dtype); - } else { - const Variable* var = func->api_args[i].as(); - TVMType t = Type2TVMType(var->type); - arg_sizes.push_back(GetTypeSize(t)); - arg_types.push_back(t); - } - } -} - -void GenSharedMem(TVMArgs& args, - std::vector& shmids, - std::vector& arg_sizes) { - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - // generate shared memory key and id - // TODO: maybe get the current path?? - key_t key = ftok("/", i+1); - int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); - shmids.push_back(shmid); - // copy mem from TVM args to the shared memory - void* mem = shmat(shmid, nullptr, 0); - memcpy(mem, arr->data, arg_sizes[i]); - } else { - shmids.push_back(0); - } - } -} - -void FreeSharedMem(TVMArgs& args, - const std::vector& shmids, - std::vector& arg_sizes) { - for (size_t i = 0; i < shmids.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - int shmid = shmids[i]; - void* mem = shmat(shmid, nullptr, 0); - memcpy(arr->data, mem, arg_sizes[i]); - shmdt(mem); - shmctl(shmid, IPC_RMID, nullptr); - } - } -} - -// copy values from the shared mem to local mem -void PrintCopy(TVMArray* arr, - std::ofstream& stream, - int indent, size_t nth_arr) { - for (int i = 0; i < arr->ndim; i++) { - PrintIndent(stream, indent); - stream << "for (size_t i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - if (i == arr->ndim-1) { - PrintIndent(stream, indent); - // stream << "arg_top_" << nth_arr; - // for (int j = 0; j < arr->ndim; j++) { - // stream << "[i" << j << "]"; - // } - - stream << "arg_top_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul2 = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul2 *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul2; - } - stream << "]"; - - - stream << " = ("; - // stream << Type2ExtStr(arr->dtype); - stream << Type2Byte(arr->dtype); - - stream << ")(arg_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << "])"; - if (arr->dtype.fracs > 0) - stream << " >> " << static_cast(arr->dtype.fracs); - stream << ";\n"; - } - } - for (int i = 0; i < arr->ndim; i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } -} - -// copy values from local mem back to shared mem -void PrintCopyBack(TVMArray* arr, - std::ofstream& stream, - int indent, size_t nth_arr) { - for (int i = 0; i < arr->ndim; i++) { - PrintIndent(stream, indent); - stream << "for (size_t i" << i << " = 0; "; - stream << "i" << i << " < " << arr->shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - if (i == arr->ndim-1) { - PrintIndent(stream, indent); - stream << "arg_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul; - } - stream << "] = ("; - // stream << Type2ExtStr(arr->dtype); - stream << Type2Byte(arr->dtype); - stream << ")(arg_top_" << nth_arr; - stream << "[i" << arr->ndim-1; - int mul2 = 1; - for (int j = arr->ndim-2; j >= 0; j--) { - mul2 *= arr->shape[j+1]; - stream << " + i" << j << "*" << mul2; - } - - stream << "])"; - - // for (int j = 0; j < arr->ndim; j++) { - // stream << "[i" << j << "]"; - // } - // stream << ")"; - if (arr->dtype.fracs > 0) - stream << " << " << static_cast(arr->dtype.fracs); - stream << ";\n"; - } - } - for (int i = 0; i < arr->ndim; i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } -} - -void GenKernelCode(std::string test_file) { - std::ofstream stream; - // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/knn_vhls.cpp"); - stream.open("__tmp__/kernel.cpp"); - stream << test_file; - stream.close(); -} - -// interface pragma to specify mem and ctrl interface in sdx -void GenWrapperCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - const std::vector>>& arg_stream_types, - LoweredFunc func) { - std::ofstream stream; - // stream.open("/home/centos/src/project_data/lab_digitrec_aws/solution/src/kernel/digitrec.cpp"); - int indent = 0; - std::string path(getenv("PWD")); - stream.open("__tmp__/interface.cpp"); - stream << "#include \n"; - stream << "#include \"" + path + "/__tmp__/kernel.cpp\"\n"; - stream << "\n\n"; - stream << "extern \"C\" \n"; - stream << "{\n"; - indent += 2; - PrintIndent(stream, indent); - - // wrapper func interface - stream << "void App( "; - size_t ex_arg_count = 0; - ex_arg_count = arg_stream_types.size() - arg_types.size(); - for (size_t i = 0; i < arg_types.size(); i++) { - if (i != 0) stream << ", "; - stream << Type2WrapStr(arg_types[i]); - stream << "*"; - stream << " source_wrapper_" << i; - } - for (size_t k = 0; k < ex_arg_count; k++) { - if (k != ex_arg_count) stream << ", "; - stream << PrintHalideType(std::get<1>(arg_stream_types[k + arg_types.size()])); - stream << "*"; - stream << " source_wrapper_" << k + arg_types.size(); - } - stream << " ) {\n"; - - // memeory and control pragma - for (size_t i = 0; i < arg_stream_types.size(); i++) { - std::string interface; - if (std::get<0>(arg_stream_types[i])) interface = " m_axi "; - else interface = " m_axi "; - PrintIndent(stream, indent); - stream << "#pragma HLS INTERFACE" + interface + "port="; - stream << "source_wrapper_" << i; - stream << " offset=slave bundle=gmem\n"; - } - for (size_t i = 0; i < arg_stream_types.size(); i++) { - std::string interface; - if (std::get<0>(arg_stream_types[i])) interface = " s_axilite "; - else interface = " s_axilite "; - PrintIndent(stream, indent); - stream << "#pragma HLS INTERFACE" + interface + "port="; - stream << "source_wrapper_" << i; - stream << " bundle=control\n"; - } - PrintIndent(stream, indent); - stream << "#pragma HLS INTERFACE s_axilite port=return bundle=control\n"; - stream << "\n"; - - // intermediate vars init alloc - for (size_t i = 0; i < arg_stream_types.size(); i++) { - PrintIndent(stream, indent); - stream << PrintHalideType(std::get<1>(arg_stream_types[i])); - stream << " source_wrapper_temp_" << i; - auto shape = std::get<2>(arg_stream_types[i]); - for (size_t j = 0; j < shape.size(); j++) - stream << "[" << shape[j] << "]"; - if (shape.size() == 0) stream << "[1]"; - stream << ";\n"; - } - - // vars init for values - for (size_t i = 0; i < arg_stream_types.size(); i++) { - auto shape = std::get<2>(arg_stream_types[i]); - for (size_t j = 0; j < shape.size(); j++) { - PrintIndent(stream, indent); - stream << "for (int i" << j << " = 0; "; - stream << "i" << j << " < " << shape[j] << "; "; - stream << "i" << j << "++) {\n"; - indent += 2; - if (j == shape.size() - 1) { - PrintIndent(stream, indent); - stream << "source_wrapper_temp_" << i; - for (size_t k = 0; k < shape.size(); k++) { - stream << "[i" << k << "]"; - } - stream << " = "; - stream << "source_wrapper_" << i; - stream << "[i" << shape.size() - 1; - int mul = 1; - for (size_t k = shape.size() - 1; k > 0; k--) { - mul *= shape[k]; - stream << "+ i" << k - 1 << "*" << mul; - } - stream << "];\n"; - } - } - for (size_t j = 0; j < shape.size(); j++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } - if (shape.size() == 0) { - PrintIndent(stream, indent); - stream << "source_wrapper_temp_" << i; - stream << "[0] = source_wrapper_" << i << "[0];\n"; - } - } - - // print top func - stream << "\n"; - PrintIndent(stream, indent); - stream << "top( "; - for (size_t i = 0;i < arg_stream_types.size(); i++) { - if (i != arg_stream_types.size() - 1){ - stream << "source_wrapper_temp_" << i; - stream << ", "; - } else { - stream << "source_wrapper_temp_" << i; - stream << ");\n"; - } - - } - stream << "\n"; - - // read back return val - for (int k = arg_stream_types.size() - 1; - k > args.size() - 2; k--) { - auto shape = std::get<2>(arg_stream_types[k]); - for (size_t i = 0; i < shape.size(); i++) { - PrintIndent(stream, indent); - stream << "for (int i" << i << " = 0; "; - stream << "i" << i << " < " << shape[i] << "; "; - stream << "i" << i << "++) {\n"; - indent += 2; - - if (i == shape.size() - 1) { - PrintIndent(stream, indent); - stream << "source_wrapper_" << k; - stream << "[i" << shape.size() - 1; - int mul = 1; - for (size_t j = shape.size() - 1; j > 0; j--) { - mul *= shape[j]; - stream << " + i" << j - 1 << "*" << mul; - } - stream << " ] = "; - - stream << "source_wrapper_temp_" << k; - for (size_t j = 0; j < shape.size(); j++) { - stream << "[i" << j << "]"; - } - stream <<";\n"; - } - } - for (size_t i = 0;i < shape.size(); i++) { - indent -= 2; - PrintIndent(stream, indent); - stream << "}\n"; - } - } - stream << "}\n"; - indent -= 2; - stream << "}\n"; - stream.close(); -} - -// generate opencl kernel and mem obj -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string host_code, - std::vector>>& arg_stream_types) { - int indent = 0; - std::ofstream stream; - stream.open("__tmp__/host.cpp"); - stream << "#include \n"; - stream << "#include \n"; - stream << "\n"; - stream << "// standard C/C++ headers\n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "#include \n"; - stream << "\n"; - stream << "// opencl harness headers\n"; - stream << "#include \"CLWorld.h\"\n"; - stream << "#include \"CLKernel.h\"\n"; - stream << "#include \"CLMemObj.h\"\n"; - stream << "// harness namespace\n"; - stream << "using namespace rosetta;\n"; - stream << "\n"; - stream << "//other headers\n"; - stream << "#include \"utils.h\"\n"; - // stream << "#include \"typedefs.h\"\n"; - stream << "int main(int argc, char ** argv) {\n"; - indent += 2; - - int cnt = 0; // label the constant value - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << "*)"; - stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - PrintIndent(stream, indent); - - stream << Type2Byte(arg_types[i]) << " "; - // stream << Type2Str(arg_types[i]) << " "; - stream << "arg_top_" << i; - TVMArray* arr = args[i]; - - stream << "["; - for (int j = 0; j < arr->ndim; j++) { - //stream << "[" << arr->shape[j] << "]"; - if (j == arr->ndim-1) { - stream << arr->shape[j]; - } else { - stream << arr->shape[j]; - stream << " * "; - } - } - stream << "];\n"; - // copy from shared mem - PrintCopy(arr, stream, indent, i); - - } else { - // directly assign the value to the variable - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << ")"; - if (args[i].type_code() == kDLInt || - args[i].type_code() == kDLUInt) { - stream << int64_t(args[i]); - } - stream << ";\n"; - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_top_" << i; - stream << "[1] = { "; - - stream << "arg_" << i << " }"; - if (arg_types[i].fracs > 0) - stream << " >> " << static_cast(arg_types[i].fracs); - stream << ";\n"; - - // PrintIndent(stream, indent); - // stream << Type2Byte(arg_types[i]) << " "; - // stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; - cnt += 1; - } - stream << "\n"; - } - // allocate mem for stream vars - for (size_t k = args.size(); k < arg_stream_types.size(); k++) { - auto type = std::get<1>(arg_stream_types[k]); - auto shape = std::get<2>(arg_stream_types[k]); - PrintIndent(stream, indent); - stream << Type2Byte(Type2TVMType(type)) << " " << "knn_mat["; - if (shape.size() > 0) { - for (size_t i = 0; i < shape.size(); i++) { - if (i != shape.size() - 1) - stream << shape[i] << " * "; - else stream << shape[i]; - } - } else { - stream << "1"; - } - stream << "];\n"; - } - - // generate host side (before) on arg_top_k - PrintIndent(stream,indent); - stream << "printf(\"Host Side Application\\n\");\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// compute bofore kernel function"; - // stream being axis interface host, channel for kernel - size_t pos = host_code.find("top("); - std::string pre_kernel = host_code.substr(0, pos -1); - std::string post_kernel = host_code.substr(host_code.find('\n', pos) + 1); - stream << pre_kernel; - - stream << "\n"; - PrintIndent(stream, indent); - stream << "// parse command line arguments for opencl version\n"; - PrintIndent(stream, indent); - stream << "std::string kernelFile(\"\");\n"; - PrintIndent(stream, indent); - stream << "parse_sdaccel_command_line_args(argc, argv, kernelFile);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// create OpenCL world\n"; - PrintIndent(stream, indent); - stream << "CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// add the bitstream file\n"; - PrintIndent(stream, indent); - stream << "digit_rec_world.addProgram(kernelFile);\n"; - stream << "\n\n"; - PrintIndent(stream, indent); - stream << "// create kernels\n"; - PrintIndent(stream, indent); - stream << "CLKernel App(digit_rec_world.getContext(), digit_rec_world.getProgram(), \"App\", digit_rec_world.getDevice());\n"; - stream << "\n\n"; - - PrintIndent(stream, indent); - stream << "// create mem objects\n"; - for (int i = 0;i < args.size();i++) { - PrintIndent(stream, indent); - // if (cnt!=0) { - // stream << "CLMemObj source_" << i; - // stream << "((void*)fool_" << cnt - 1; - // stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; - // stream << "1, "; - // stream << "CL_MEM_READ_WRITE);\n"; - // cnt--; - // continue; - // } - stream << "CLMemObj source_" << i; - stream << "((void*)arg_top_" << i; - stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; - // stream << ", sizeof(" << Type2ExtStr(arg_types[i]) << "), "; - - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - for (int j = 0;j < arr->ndim;j++) { - if (j==0) { - stream << arr->shape[j] << " "; - } else { - stream << "* " << arr->shape[j]; - } - } - } else { - stream << "1"; - } - stream << ", "; - stream << "CL_MEM_READ_WRITE);\n"; - } - // addiion streamed data - for (size_t k = args.size(); k < arg_stream_types.size(); k++) { - auto type = std::get<1>(arg_stream_types[k]); - auto shape = std::get<2>(arg_stream_types[k]); - PrintIndent(stream, indent); - stream << "CLMemObj source_" << k; - stream << "((void*)knn_mat"; - stream << ", sizeof(" << Type2Byte(Type2TVMType(type)) << "), "; - if (shape.size() > 0) { - for (size_t j = 0; j < shape.size(); j++) { - if (j == 0) { - stream << shape[j] << " "; - } else { - stream << "* " << shape[j]; - } - } - } else { - stream << "1"; - } - stream << ", "; - stream << "CL_MEM_READ_WRITE);\n"; - } - - stream << "\n"; - PrintIndent(stream, indent); - stream << "// add them to the world\n"; - for (size_t i = 0;i < arg_stream_types.size();i++) { - PrintIndent(stream, indent); - stream << "digit_rec_world.addMemObj(source_" << i; - stream << ");\n"; - } - - stream << "\n\n"; - PrintIndent(stream, indent); - stream << " // set work size\n"; - PrintIndent(stream, indent); - int size = arg_stream_types.size(); - std::string arr = "[" + std::to_string(size) + "] = {"; - for (int i = 0; i < size; i++) { - if (i != size -1) arr += "1, "; - else arr += "1};\n"; - } - stream << "int global_size" + arr; - PrintIndent(stream, indent); - stream << "int local_size" + arr; - PrintIndent(stream, indent); - stream << "App.set_global(global_size);\n"; - PrintIndent(stream, indent); - stream << "App.set_local(local_size);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// add them to the world\n"; - PrintIndent(stream, indent); - stream << "digit_rec_world.addKernel(App);\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// set kernel arguments\n"; - // PrintIndent(stream, indent); - // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; - for (size_t i = 0; i < arg_stream_types.size(); i++) { - PrintIndent(stream, indent); - stream << "digit_rec_world.setMemKernelArg(0, "<< i << ", " << i; - stream << ");\n"; - } - - stream << "\n"; - PrintIndent(stream, indent); - stream << "// run\n"; - PrintIndent(stream, indent); - stream << "digit_rec_world.runKernels();\n\n"; - PrintIndent(stream, indent); - stream << "// read the data back\n"; - for (size_t i = args.size() - 1; i < arg_stream_types.size(); i++) { - PrintIndent(stream, indent); - stream << "digit_rec_world.readMemObj(" << i << ");\n"; - } - - // generate host (post-kernel) - stream << "\n"; - PrintIndent(stream, indent); - stream << "// compute after kernel function\n"; - // stream being axis interface host, channel for kernel - stream << post_kernel; - - // copy to shared mem - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - // PrintCopyBack2(arr, stream, indent, i); - PrintIndent(stream, indent); - stream << "shmdt("; - stream << "arg_" << i << ");\n"; - } - } - - stream << "\n\n"; - PrintIndent(stream, indent); - stream << "}\n"; - stream.close(); - -} - class SimModuleNode final : public ModuleNode { public: SimModuleNode(LoweredFunc func, @@ -1533,7 +768,7 @@ TVM_REGISTER_API("codegen.build_sim") *rv = BuildSimModule (args[0], args[1], args[2]); } else if (type == "vivado_hls") { - *rv = BuildSimModule + *rv = BuildSimModule (args[0], args[1], args[2]); } else { } diff --git a/tvm/src/codegen/build_common.h b/tvm/src/codegen/build_common.h index f9f42d219..1be783c72 100644 --- a/tvm/src/codegen/build_common.h +++ b/tvm/src/codegen/build_common.h @@ -12,6 +12,7 @@ #include "../runtime/meta_data.h" namespace TVM { + namespace codegen { // Extract function information from device function. inline std::unordered_map diff --git a/tvm/src/codegen/build_helper.cc b/tvm/src/codegen/build_helper.cc new file mode 100644 index 000000000..c896ba11d --- /dev/null +++ b/tvm/src/codegen/build_helper.cc @@ -0,0 +1,794 @@ +/*! + * Copyright (c) 2019 by Contributors + * \file build_common.cc + * \brief Build unified simulation module + */ +#include +#include +#include +#include +#include +#include +#include "./build_common.h" + +#include +#include +#include +#include +#include + +#include "merlinc/codeanalys_merlinc.h" +#include "hlsc/codegen_vhls.h" +#include "opencl/codegen_aocl.h" +#include "ppac/codegen_rv64_ppac.h" + +namespace TVM { +namespace runtime { + +std::string getpath(void) { + char buff[256]; + getcwd(buff, 256); + std::string cwd(buff); + return cwd; +} + +void PrintIndent(std::ofstream& stream, int indent) { + for (int i = 0; i < indent; i++) + stream << ' '; +} + +inline size_t GetTypeSize(TVMType t) { + size_t byte = (t.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + return byte; +} + +inline size_t GetDataSize(TVMArray* arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= arr->shape[i]; + } + size_t byte = (arr->dtype.bits + 7) / 8; + if (byte > 2){ + if (byte <= 4) byte = 4; + else if (byte <= 8) byte = 8; + else byte = 16; + } + size *= (byte * 8 * arr->dtype.lanes + 7) / 8; + return size; +} + +inline TVMType Type2TVMType(Type t) { + TVMType tt; + if (t.is_int()) tt.code = kDLInt; + else if (t.is_uint()) tt.code = kDLUInt; + else if (t.is_float()) tt.code = kDLFloat; + else LOG(FATAL) << "Unacceptable type: " << t; + tt.bits = static_cast(t.bits()); + tt.fracs = static_cast(t.fracs()); + return tt; +} + +inline std::string PrintHalideType(Type t) { + std::string str = ""; + if (t.is_uint() || t.is_int() || t.is_fixed() || t.is_ufixed()) { + if (t.is_uint()) str += "ap_uint<" + std::to_string(t.bits()) + ">"; + else if (t.is_int()) str += "ap_int<" + std::to_string(t.bits()) + ">"; + else if (t.is_ufixed()) str += "ap_ufixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; + else str += "ap_fixed<" + std::to_string(t.bits()) + ", " + std::to_string(t.bits() - t.fracs()) + ">"; + } else { + LOG(FATAL) << "Cannot convert type " << t << " to C type"; + } + return str; +} + +inline std::string Type2Str(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) str += "ap_fixed<"; + else str += "ap_int<"; + str += std::to_string(static_cast(t.bits)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) str += "ap_ufixed<"; + else str += "ap_uint<"; + str += std::to_string(static_cast(t.bits)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2ExtStr(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) str += "ap_fixed<"; + else str += "ap_int<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) str += "ap_ufixed<"; + else str += "ap_uint<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2WrapStr(TVMType t) { + std::string str = ""; + if (t.code == kDLInt) { + if (t.fracs > 0) { + str += "ap_fixed<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + } else { + str += "ap_int<"; + if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + } + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLUInt) { + if (t.fracs > 0) { + str += "ap_ufixed<"; + str += std::to_string(static_cast(t.bits + t.fracs)); + } else { + str += "ap_uint<"; + if (t.bits <= 8) str += std::to_string(static_cast(t.bits)); + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + } + if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits)) + ">"; + else str += ">"; + } else if (t.code == kDLFloat) { + str += "float"; + } else { + LOG(FATAL) << "Unknown type"; + } + return str; +} + +inline std::string Type2Byte(TVMType t) { + std::string str = ""; + if (t.code == kDLFloat) { + str += "float"; + } else if (t.code == kDLInt || t.code == kDLUInt) { + if (t.code == kDLUInt) str += "u"; + str += "int"; + if (t.bits <= 8) str += "8"; + else if (t.bits <= 16) str += "16"; + else if (t.bits <= 32) str += "32"; + else str += "64"; + str += "_t"; + } + return str; +} + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + arg_sizes.push_back(GetDataSize(arr)); + arg_types.push_back(arr->dtype); + } else { + const Variable* var = func->api_args[i].as(); + TVMType t = Type2TVMType(var->type); + arg_sizes.push_back(GetTypeSize(t)); + arg_types.push_back(t); + } + } +} + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes) { + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + // generate shared memory key and id + // TODO: maybe get the current path?? + key_t key = ftok("/", i+1); + int shmid = shmget(key, arg_sizes[i], 0666|IPC_CREAT); + shmids.push_back(shmid); + // copy mem from TVM args to the shared memory + void* mem = shmat(shmid, nullptr, 0); + memcpy(mem, arr->data, arg_sizes[i]); + } else { + shmids.push_back(0); + } + } +} + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes) { + for (size_t i = 0; i < shmids.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + int shmid = shmids[i]; + void* mem = shmat(shmid, nullptr, 0); + memcpy(arr->data, mem, arg_sizes[i]); + shmdt(mem); + shmctl(shmid, IPC_RMID, nullptr); + } + } +} + +// copy values from the shared mem to local mem +void PrintCopy(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr) { + for (int i = 0; i < arr->ndim; i++) { + PrintIndent(stream, indent); + stream << "for (size_t i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + if (i == arr->ndim-1) { + PrintIndent(stream, indent); + // stream << "arg_top_" << nth_arr; + // for (int j = 0; j < arr->ndim; j++) { + // stream << "[i" << j << "]"; + // } + + stream << "arg_top_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul2 = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul2 *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul2; + } + stream << "]"; + + + stream << " = ("; + // stream << Type2ExtStr(arr->dtype); + stream << Type2Byte(arr->dtype); + + stream << ")(arg_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << "])"; + if (arr->dtype.fracs > 0) + stream << " >> " << static_cast(arr->dtype.fracs); + stream << ";\n"; + } + } + for (int i = 0; i < arr->ndim; i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } +} + +// copy values from local mem back to shared mem +void PrintCopyBack(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr) { + for (int i = 0; i < arr->ndim; i++) { + PrintIndent(stream, indent); + stream << "for (size_t i" << i << " = 0; "; + stream << "i" << i << " < " << arr->shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + if (i == arr->ndim-1) { + PrintIndent(stream, indent); + stream << "arg_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul; + } + stream << "] = ("; + // stream << Type2ExtStr(arr->dtype); + stream << Type2Byte(arr->dtype); + stream << ")(arg_top_" << nth_arr; + stream << "[i" << arr->ndim-1; + int mul2 = 1; + for (int j = arr->ndim-2; j >= 0; j--) { + mul2 *= arr->shape[j+1]; + stream << " + i" << j << "*" << mul2; + } + + stream << "])"; + + // for (int j = 0; j < arr->ndim; j++) { + // stream << "[i" << j << "]"; + // } + // stream << ")"; + if (arr->dtype.fracs > 0) + stream << " << " << static_cast(arr->dtype.fracs); + stream << ";\n"; + } + } + for (int i = 0; i < arr->ndim; i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } +} + +void GenKernelCode(std::string test_file) { + std::ofstream stream; + stream.open("__tmp__/kernel.cpp"); + stream << test_file; + stream.close(); +} + +// interface pragma to specify mem and ctrl interface in sdx +void GenWrapperCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + const std::vector>>& arg_stream_types, + LoweredFunc func) { + std::ofstream stream; + int indent = 0; + std::string path(getenv("PWD")); + stream.open("__tmp__/interface.cpp"); + stream << "#include \n"; + stream << "#include \"" + path + "/__tmp__/kernel.cpp\"\n"; + stream << "\n\n"; + stream << "extern \"C\" \n"; + stream << "{\n"; + indent += 2; + PrintIndent(stream, indent); + + // wrapper func interface + stream << "void App( "; + size_t ex_arg_count = 0; + ex_arg_count = arg_stream_types.size() - arg_types.size(); + for (size_t i = 0; i < arg_types.size(); i++) { + if (i != 0) stream << ", "; + stream << Type2WrapStr(arg_types[i]); + stream << "*"; + stream << " source_wrapper_" << i; + } + for (size_t k = 0; k < ex_arg_count; k++) { + if (k != ex_arg_count) stream << ", "; + stream << PrintHalideType(std::get<1>(arg_stream_types[k + arg_types.size()])); + stream << "*"; + stream << " source_wrapper_" << k + arg_types.size(); + } + stream << " ) {\n"; + + // memeory and control pragma + for (size_t i = 0; i < arg_stream_types.size(); i++) { + std::string interface; + if (std::get<0>(arg_stream_types[i])) interface = " m_axi "; + else interface = " m_axi "; + PrintIndent(stream, indent); + stream << "#pragma HLS INTERFACE" + interface + "port="; + stream << "source_wrapper_" << i; + stream << " offset=slave bundle=gmem\n"; + } + for (size_t i = 0; i < arg_stream_types.size(); i++) { + std::string interface; + if (std::get<0>(arg_stream_types[i])) interface = " s_axilite "; + else interface = " s_axilite "; + PrintIndent(stream, indent); + stream << "#pragma HLS INTERFACE" + interface + "port="; + stream << "source_wrapper_" << i; + stream << " bundle=control\n"; + } + PrintIndent(stream, indent); + stream << "#pragma HLS INTERFACE s_axilite port=return bundle=control\n"; + stream << "\n"; + + // intermediate vars init alloc + for (size_t i = 0; i < arg_stream_types.size(); i++) { + PrintIndent(stream, indent); + stream << PrintHalideType(std::get<1>(arg_stream_types[i])); + stream << " source_wrapper_temp_" << i; + auto shape = std::get<2>(arg_stream_types[i]); + for (size_t j = 0; j < shape.size(); j++) + stream << "[" << shape[j] << "]"; + if (shape.size() == 0) stream << "[1]"; + stream << ";\n"; + } + + // vars init for values + for (size_t i = 0; i < arg_stream_types.size(); i++) { + auto shape = std::get<2>(arg_stream_types[i]); + for (size_t j = 0; j < shape.size(); j++) { + PrintIndent(stream, indent); + stream << "for (int i" << j << " = 0; "; + stream << "i" << j << " < " << shape[j] << "; "; + stream << "i" << j << "++) {\n"; + indent += 2; + if (j == shape.size() - 1) { + PrintIndent(stream, indent); + stream << "source_wrapper_temp_" << i; + for (size_t k = 0; k < shape.size(); k++) { + stream << "[i" << k << "]"; + } + stream << " = "; + stream << "source_wrapper_" << i; + stream << "[i" << shape.size() - 1; + int mul = 1; + for (size_t k = shape.size() - 1; k > 0; k--) { + mul *= shape[k]; + stream << "+ i" << k - 1 << "*" << mul; + } + stream << "];\n"; + } + } + for (size_t j = 0; j < shape.size(); j++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } + if (shape.size() == 0) { + PrintIndent(stream, indent); + stream << "source_wrapper_temp_" << i; + stream << "[0] = source_wrapper_" << i << "[0];\n"; + } + } + + // print top func + stream << "\n"; + PrintIndent(stream, indent); + stream << "top( "; + for (size_t i = 0;i < arg_stream_types.size(); i++) { + if (i != arg_stream_types.size() - 1){ + stream << "source_wrapper_temp_" << i; + stream << ", "; + } else { + stream << "source_wrapper_temp_" << i; + stream << ");\n"; + } + + } + stream << "\n"; + + // read back return val + for (int k = arg_stream_types.size() - 1; + k > args.size() - 2; k--) { + auto shape = std::get<2>(arg_stream_types[k]); + for (size_t i = 0; i < shape.size(); i++) { + PrintIndent(stream, indent); + stream << "for (int i" << i << " = 0; "; + stream << "i" << i << " < " << shape[i] << "; "; + stream << "i" << i << "++) {\n"; + indent += 2; + + if (i == shape.size() - 1) { + PrintIndent(stream, indent); + stream << "source_wrapper_" << k; + stream << "[i" << shape.size() - 1; + int mul = 1; + for (size_t j = shape.size() - 1; j > 0; j--) { + mul *= shape[j]; + stream << " + i" << j - 1 << "*" << mul; + } + stream << " ] = "; + + stream << "source_wrapper_temp_" << k; + for (size_t j = 0; j < shape.size(); j++) { + stream << "[i" << j << "]"; + } + stream <<";\n"; + } + } + for (size_t i = 0;i < shape.size(); i++) { + indent -= 2; + PrintIndent(stream, indent); + stream << "}\n"; + } + } + stream << "}\n"; + indent -= 2; + stream << "}\n"; + stream.close(); +} + +// generate opencl + +// generate opencl kernel and mem obj +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string host_code, + std::vector>>& arg_stream_types) { + int indent = 0; + std::ofstream stream; + stream.open("__tmp__/host.cpp"); + stream << "#include \n"; + stream << "#include \n"; + stream << "\n"; + stream << "// standard C/C++ headers\n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "\n"; + stream << "// opencl harness headers\n"; + stream << "#include \"CLWorld.h\"\n"; + stream << "#include \"CLKernel.h\"\n"; + stream << "#include \"CLMemObj.h\"\n"; + stream << "// harness namespace\n"; + stream << "using namespace rosetta;\n"; + stream << "\n"; + stream << "//other headers\n"; + stream << "#include \"utils.h\"\n"; + // stream << "#include \"typedefs.h\"\n"; + stream << "int main(int argc, char ** argv) {\n"; + indent += 2; + + int cnt = 0; // label the constant value + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << "*)"; + stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + PrintIndent(stream, indent); + + stream << Type2Byte(arg_types[i]) << " "; + // stream << Type2Str(arg_types[i]) << " "; + stream << "arg_top_" << i; + TVMArray* arr = args[i]; + + stream << "["; + for (int j = 0; j < arr->ndim; j++) { + //stream << "[" << arr->shape[j] << "]"; + if (j == arr->ndim-1) { + stream << arr->shape[j]; + } else { + stream << arr->shape[j]; + stream << " * "; + } + } + stream << "];\n"; + // copy from shared mem + PrintCopy(arr, stream, indent, i); + + } else { + // directly assign the value to the variable + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << ")"; + if (args[i].type_code() == kDLInt || + args[i].type_code() == kDLUInt) { + stream << int64_t(args[i]); + } + stream << ";\n"; + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_top_" << i; + stream << "[1] = { "; + + stream << "arg_" << i << " }"; + if (arg_types[i].fracs > 0) + stream << " >> " << static_cast(arg_types[i].fracs); + stream << ";\n"; + + // PrintIndent(stream, indent); + // stream << Type2Byte(arg_types[i]) << " "; + // stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; + cnt += 1; + } + stream << "\n"; + } + // allocate mem for stream vars + for (size_t k = args.size(); k < arg_stream_types.size(); k++) { + auto type = std::get<1>(arg_stream_types[k]); + auto shape = std::get<2>(arg_stream_types[k]); + PrintIndent(stream, indent); + stream << Type2Byte(Type2TVMType(type)) << " " << "knn_mat["; + if (shape.size() > 0) { + for (size_t i = 0; i < shape.size(); i++) { + if (i != shape.size() - 1) + stream << shape[i] << " * "; + else stream << shape[i]; + } + } else { + stream << "1"; + } + stream << "];\n"; + } + + // generate host side (before) on arg_top_k + PrintIndent(stream,indent); + stream << "printf(\"Host Side Application\\n\");\n"; + stream << "\n"; + PrintIndent(stream, indent); + stream << "// compute bofore kernel function"; + // stream being axis interface host, channel for kernel + size_t pos = host_code.find("top("); + std::string pre_kernel = host_code.substr(0, pos -1); + std::string post_kernel = host_code.substr(host_code.find('\n', pos) + 1); + stream << pre_kernel; + + stream << "\n"; + PrintIndent(stream, indent); + stream << "// parse command line arguments for opencl version\n"; + PrintIndent(stream, indent); + stream << "std::string kernelFile(\"\");\n"; + PrintIndent(stream, indent); + stream << "parse_sdaccel_command_line_args(argc, argv, kernelFile);\n"; + stream << "\n"; + PrintIndent(stream, indent); + stream << "// create OpenCL world\n"; + PrintIndent(stream, indent); + stream << "CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; + stream << "\n"; + PrintIndent(stream, indent); + stream << "// add the bitstream file\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.addProgram(kernelFile);\n"; + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "// create kernels\n"; + PrintIndent(stream, indent); + stream << "CLKernel App(digit_rec_world.getContext(), digit_rec_world.getProgram(), \"App\", digit_rec_world.getDevice());\n"; + stream << "\n\n"; + + PrintIndent(stream, indent); + stream << "// create mem objects\n"; + for (int i = 0;i < args.size();i++) { + PrintIndent(stream, indent); + // if (cnt!=0) { + // stream << "CLMemObj source_" << i; + // stream << "((void*)fool_" << cnt - 1; + // stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; + // stream << "1, "; + // stream << "CL_MEM_READ_WRITE);\n"; + // cnt--; + // continue; + // } + stream << "CLMemObj source_" << i; + stream << "((void*)arg_top_" << i; + stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; + // stream << ", sizeof(" << Type2ExtStr(arg_types[i]) << "), "; + + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + for (int j = 0;j < arr->ndim;j++) { + if (j==0) { + stream << arr->shape[j] << " "; + } else { + stream << "* " << arr->shape[j]; + } + } + } else { + stream << "1"; + } + stream << ", "; + stream << "CL_MEM_READ_WRITE);\n"; + } + // addiion streamed data + for (size_t k = args.size(); k < arg_stream_types.size(); k++) { + auto type = std::get<1>(arg_stream_types[k]); + auto shape = std::get<2>(arg_stream_types[k]); + PrintIndent(stream, indent); + stream << "CLMemObj source_" << k; + stream << "((void*)knn_mat"; + stream << ", sizeof(" << Type2Byte(Type2TVMType(type)) << "), "; + if (shape.size() > 0) { + for (size_t j = 0; j < shape.size(); j++) { + if (j == 0) { + stream << shape[j] << " "; + } else { + stream << "* " << shape[j]; + } + } + } else { + stream << "1"; + } + stream << ", "; + stream << "CL_MEM_READ_WRITE);\n"; + } + + stream << "\n"; + PrintIndent(stream, indent); + stream << "// add them to the world\n"; + for (size_t i = 0;i < arg_stream_types.size();i++) { + PrintIndent(stream, indent); + stream << "digit_rec_world.addMemObj(source_" << i; + stream << ");\n"; + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << " // set work size\n"; + PrintIndent(stream, indent); + int size = arg_stream_types.size(); + std::string arr = "[" + std::to_string(size) + "] = {"; + for (int i = 0; i < size; i++) { + if (i != size -1) arr += "1, "; + else arr += "1};\n"; + } + stream << "int global_size" + arr; + PrintIndent(stream, indent); + stream << "int local_size" + arr; + PrintIndent(stream, indent); + stream << "App.set_global(global_size);\n"; + PrintIndent(stream, indent); + stream << "App.set_local(local_size);\n"; + stream << "\n"; + PrintIndent(stream, indent); + stream << "// add them to the world\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.addKernel(App);\n"; + stream << "\n"; + PrintIndent(stream, indent); + stream << "// set kernel arguments\n"; + // PrintIndent(stream, indent); + // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; + for (size_t i = 0; i < arg_stream_types.size(); i++) { + PrintIndent(stream, indent); + stream << "digit_rec_world.setMemKernelArg(0, "<< i << ", " << i; + stream << ");\n"; + } + + stream << "\n"; + PrintIndent(stream, indent); + stream << "// run\n"; + PrintIndent(stream, indent); + stream << "digit_rec_world.runKernels();\n\n"; + PrintIndent(stream, indent); + stream << "// read the data back\n"; + for (size_t i = args.size() - 1; i < arg_stream_types.size(); i++) { + PrintIndent(stream, indent); + stream << "digit_rec_world.readMemObj(" << i << ");\n"; + } + + // generate host (post-kernel) + stream << "\n"; + PrintIndent(stream, indent); + stream << "// compute after kernel function\n"; + // stream being axis interface host, channel for kernel + stream << post_kernel; + + // copy to shared mem + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + TVMArray* arr = args[i]; + PrintCopyBack(arr, stream, indent, i); + // PrintCopyBack2(arr, stream, indent, i); + PrintIndent(stream, indent); + stream << "shmdt("; + stream << "arg_" << i << ");\n"; + } + } + + stream << "\n\n"; + PrintIndent(stream, indent); + stream << "}\n"; + stream.close(); + +} +} // namespace runtime +} // namespace TVM diff --git a/tvm/src/codegen/build_helper.h b/tvm/src/codegen/build_helper.h new file mode 100644 index 000000000..ed30efd49 --- /dev/null +++ b/tvm/src/codegen/build_helper.h @@ -0,0 +1,66 @@ +/*! + * Copyright (c) 2017 by Contributors + * Common build utilities + * \file build_common.h + */ +#ifndef TVM_CODEGEN_BUILD_HELPER_H_ +#define TVM_CODEGEN_BUILD_HELPER_H_ + +#include +#include +#include +#include "../runtime/meta_data.h" + +namespace TVM { +namespace runtime { + +// get current work directory +std::string getpath(void); +void PrintIndent(std::ofstream& stream, int indent); +inline size_t GetTypeSize(TVMType t); +inline size_t GetDataSize(TVMArray* arr); +inline TVMType Type2TVMType(Type t); +inline std::string PrintHalideType(Type t); +inline std::string Type2Str(TVMType t); +inline std::string Type2ExtStr(TVMType t); +inline std::string Type2WrapStr(TVMType t); +inline std::string Type2Byte(TVMType t); + +void CollectArgInfo(TVMArgs& args, + LoweredFunc func, + std::vector& arg_sizes, + std::vector& arg_types); + +void GenSharedMem(TVMArgs& args, + std::vector& shmids, + std::vector& arg_sizes); + +void FreeSharedMem(TVMArgs& args, + const std::vector& shmids, + std::vector& arg_sizes); + +void PrintCopy(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr); + +void PrintCopyBack(TVMArray* arr, + std::ofstream& stream, + int indent, size_t nth_arr); + +void GenKernelCode(std::string test_file); + +void GenWrapperCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + const std::vector>>& arg_stream_types, + LoweredFunc func); + +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc func, + std::string host_code, + std::vector>>& arg_stream_types); +} // namespace runtime +} // namespace TVM +#endif // TVM_CODEGEN_BUILD_HELPER_H_ diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 6fd8887fb..bf6e2a07f 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -266,7 +266,7 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { else PrintType(op->ret_type, stream); stream << " " << op->name << "("; - // create function signature + // check channel and create function signature std::unordered_set stream_vars; for (size_t j = 0; j < op->channels.size(); j++) { stream_vars.insert(op->channels[j]); @@ -330,6 +330,7 @@ void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT os << op->name << "("; for (size_t i = 0; i < op->args.size(); ++i) { std::string str = op->name + "." + PrintExpr(op->args[i]); + // skip printing if arg is treamed if (!stream_exprs.count(str)) { if (i != 0) { std::string pre = op->name + "." + PrintExpr(op->args[i-1]); From 8c72a7a4b3cb9d8de5877ce08a44026f6b772a41 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Tue, 3 Dec 2019 22:03:49 -0500 Subject: [PATCH 096/103] [update] stream in codegen c --- .gitignore | 2 - .gitmodules | 3 - hlib/python/hlib/nn.py | 2 +- hlib/python/hlib/ppac.py | 199 ------ python/heterocl/api.py | 4 +- python/heterocl/config.py | 1 - python/heterocl/tvm/build_module.py | 42 +- samples/digitrec/digitrec_stream.py | 150 +++++ samples/digitrec/digitrec_vhls.py | 158 +---- samples/sobel/sobel.py | 2 +- tvm/include/tvm/schedule.h | 2 + tvm/src/codegen/build_common.cc | 602 +----------------- tvm/src/codegen/build_common.h | 1 - .../{build_helper.cc => build_util.cc} | 347 +++++----- .../codegen/{build_helper.h => build_util.h} | 12 +- tvm/src/codegen/codegen.cc | 1 - tvm/src/codegen/codegen_c.cc | 327 +++++++--- tvm/src/codegen/codegen_c.h | 7 +- tvm/src/codegen/codegen_cuda.cc | 5 +- tvm/src/codegen/codegen_cuda.h | 4 +- tvm/src/codegen/hlsc/build_hlsc.cc | 1 - tvm/src/codegen/hlsc/codegen_hlsc.cc | 114 ++-- tvm/src/codegen/hlsc/codegen_hlsc.h | 22 +- tvm/src/codegen/hlsc/codegen_vhls.cc | 21 +- tvm/src/codegen/hlsc/codegen_vhls.h | 2 +- tvm/src/codegen/hlsc/vhls_module.cc | 2 +- tvm/src/codegen/opencl/codegen_opencl.cc | 1 - tvm/src/codegen/opencl/codegen_opencl.h | 1 - tvm/src/codegen/opencl/codegen_sdaccel.cc | 65 +- tvm/src/codegen/opencl/sdaccel_module.cc | 99 +-- tvm/src/codegen/opencl/sdaccel_module.h | 4 - tvm/src/schedule/schedule_dataflow_rewrite.cc | 48 +- .../template/{design => sdaccel}/CLKernel.cpp | 0 .../template/{design => sdaccel}/CLKernel.h | 0 .../template/{design => sdaccel}/CLMemObj.cpp | 0 .../template/{design => sdaccel}/CLMemObj.h | 0 .../template/{design => sdaccel}/CLWorld.cpp | 0 .../template/{design => sdaccel}/CLWorld.h | 0 tvm/src/template/{design => sdaccel}/Makefile | 0 .../template/{design => sdaccel}/harness.mk | 0 tvm/src/template/{design => sdaccel}/run.tcl | 0 .../template/{design => sdaccel}/run_hw.sh | 0 .../template/{design => sdaccel}/run_sw.sh | 0 .../template/{design => sdaccel}/utils.cpp | 0 tvm/src/template/{design => sdaccel}/utils.h | 0 45 files changed, 782 insertions(+), 1469 deletions(-) delete mode 100644 hlib/python/hlib/ppac.py create mode 100644 samples/digitrec/digitrec_stream.py rename tvm/src/codegen/{build_helper.cc => build_util.cc} (84%) rename tvm/src/codegen/{build_helper.h => build_util.h} (87%) rename tvm/src/template/{design => sdaccel}/CLKernel.cpp (100%) rename tvm/src/template/{design => sdaccel}/CLKernel.h (100%) rename tvm/src/template/{design => sdaccel}/CLMemObj.cpp (100%) rename tvm/src/template/{design => sdaccel}/CLMemObj.h (100%) rename tvm/src/template/{design => sdaccel}/CLWorld.cpp (100%) rename tvm/src/template/{design => sdaccel}/CLWorld.h (100%) rename tvm/src/template/{design => sdaccel}/Makefile (100%) rename tvm/src/template/{design => sdaccel}/harness.mk (100%) rename tvm/src/template/{design => sdaccel}/run.tcl (100%) rename tvm/src/template/{design => sdaccel}/run_hw.sh (100%) rename tvm/src/template/{design => sdaccel}/run_sw.sh (100%) rename tvm/src/template/{design => sdaccel}/utils.cpp (100%) rename tvm/src/template/{design => sdaccel}/utils.h (100%) diff --git a/.gitignore b/.gitignore index ba1cf217b..65f3dfcf8 100644 --- a/.gitignore +++ b/.gitignore @@ -16,8 +16,6 @@ tags docs/source/samples docs/source/tutorials soda_* -# *.cpp -# *.h out # Downloaded files diff --git a/.gitmodules b/.gitmodules index 6e63adee0..292bfaba8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "soda"] path = soda url = https://github.com/UCLA-VAST/soda-compiler.git -[submodule "hlib/rocc-ppac"] - path = hlib/rocc-ppac - url = git@github.com:cornell-zhang/rocc-ppac.git diff --git a/hlib/python/hlib/nn.py b/hlib/python/hlib/nn.py index cf860b389..8f1c4d0e8 100644 --- a/hlib/python/hlib/nn.py +++ b/hlib/python/hlib/nn.py @@ -37,7 +37,7 @@ def conv2d_nchw_imp(Input, Filter, Output, stride=[1,1], padding=[[0,0],[0,0]]): with hcl.for_(0,Output.shape[1]) as c: with hcl.for_(0,Output.shape[2]) as h: with hcl.for_(0,Output.shape[3]) as w: - partial = hcl.local(0) + partial = hcl.scalar(0) with hcl.for_(0,Filter.shape[-2]) as x: with hcl.for_(0,Filter.shape[-1]) as y: partial.v += Input[n][c][h+x][w+y] * Filter[0][0][x][y] diff --git a/hlib/python/hlib/ppac.py b/hlib/python/hlib/ppac.py deleted file mode 100644 index cbfd473e0..000000000 --- a/hlib/python/hlib/ppac.py +++ /dev/null @@ -1,199 +0,0 @@ -from collections import OrderedDict -import heterocl as hcl -import heterocl.tvm as tvm - -class PPAC_config: - """Wrap PPAC parameters and function names.""" - def __init__(self, multi_bit=False, word_bits=None, elem_bits=None): - """Initialize PPAC configurations - - Parameters - ---------- - multi_bit : Whether to use specialized ppac accelerator - or generalized ppac module. - See hardware implementation for more. - word_bits : Number of bits in a row in ppac. - elem_bits : Number of bits in a number in matrix (datatype) - - """ - self.word_bits = (word_bits if word_bits else 256) if multi_bit else 64 - self.elem_bits = (elem_bits if elem_bits else 8) if multi_bit else 1 - self.elem_num = self.word_bits // self.elem_bits - self.depth = self.elem_num - assert self.elem_bits in [1, 2, 4, 8, 16, 32], "elem_bits must be in {1, 2, 4, 8, 16, 32}" - assert (self.word_bits % 64 == 0) and (self.elem_num*self.elem_bits == self.word_bits), \ - "word_bits must be times of 64 and times of elem_bits" - if multi_bit: - self.func_call = ['PPACFunc_GeMMUInt', 'PPACFunc_GeMMSInt'] - else: - self.func_call = ['PPACFunc_HmmSim', 'PPACFunc_GeMMBin'] - - -class PPAC_func_params: - """ - names of PPAC function call parameters - used as annotation key on the stage - """ - - def __init__(self): - self.func_name = '_ppac_func_name' - self.ret = '_ret' - self.arg0 = '_arg0' - self.arg1 = '_arg1' - self.b_n = '_batch_num' - self.i_b_n = '_in_block_num' - self.o_c_n = '_out_channel_num' - -ppac_params = PPAC_func_params() - -def hmm_sim(x, y, name=None): - """Compute hamming-similarity between each element in x and y - Parameters - ---------- - x : 1-d tensor of datatype uint64 - y : 1-d tensor of datatype uint64 - - Returns - ------- - res: 2-d tensor of shape (x.shape[0], y.shape[0]) and datatype uint64 - """ - assert x.dtype == 'uint64' and y.dtype == 'uint64', "only support datatype uint64" - assert len(x.shape) == 1 and len(y.shape) == 1, "only support 1-dim hamming-similarity operation" - - ppac_config = PPAC_config() - - try: - res_shape = x.shape + y.shape - batch_num = x.shape[0] - except: - # x is scalar - res_shape = y.shape - batch_num = 1 - res_name = name if name else 'res' - in_block_num = 1 - out_channel_num = y.shape[0] - - def _assign_val(*args): - temp = hcl.local(0, name='sim_acc', dtype=hcl.UInt(64)) - temp[0] = tvm.popcount(~(x[args[0]] ^ y[args[1]])) - return temp[0] - return hcl.compute( res_shape, _assign_val, res_name, dtype=hcl.UInt(64), - attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(ppac_config.func_call[0])), - (ppac_params.ret, tvm.make.StringImm(res_name)), - (ppac_params.arg0, tvm.make.StringImm(x.name)), - (ppac_params.arg1, tvm.make.StringImm(y.name)), - (ppac_params.b_n, batch_num), - (ppac_params.i_b_n, in_block_num), - (ppac_params.o_c_n, out_channel_num)]) ) - -def gemm_binary(d, w, name=None): - """Compute general matrix multiplication of datatype {1, -1} - Parameters - ---------- - d : 2-d tensor of datatype uint1 - w : 2-d tensor of datatype uint1 - - Returns - ------- - res: 2-d tensor of shape (d.shape[0], w.shape[0]) and datatype uint64 - res = dot(d, w.T) (with datatype {1, -1}) - """ - assert d.dtype == 'uint1' and w.dtype == 'uint1', 'only support binary data' - assert len(w.shape) == 2 and len(d.shape) == 2, "only support 2-dim binary gemm" - assert d.shape[1] == w.shape[1] - - ppac_config = PPAC_config() - assert d.shape[1] % ppac_config.elem_num == 0, \ - "input channel should be times of " + str(ppac_config.elem_num) - - res_name = name if name else 'res' - batch_num = d.shape[0] - in_channel_num = w.shape[1] - in_block_num = in_channel_num // ppac_config.elem_num - out_channel_num = w.shape[0] - res_shape = (batch_num, out_channel_num) - block_size = ppac_config.elem_num // 8 - - def _bin_pack_uint8(tensor): - """Pack uint1 to uint8. - uint1 is cast to uint8 in c backend. - This operation squeezes memory 8 times. - """ - assert tensor.dtype == 'uint1' - - ishape = tensor.shape - n = len(ishape) - oshape = ishape[:-1] + (ishape[n-1] // 8, ) - - def _assign_val(*args): - temp = hcl.local(0, name='pack_acc', dtype=hcl.UInt(8)) - with hcl.for_(0, 8) as i: - temp[0] = temp[0] | (tensor[args[0], i + args[1]*8] << i) - return temp[0] - - return hcl.compute(oshape, _assign_val, - name=tensor.name+'_packed', dtype=hcl.UInt(8)) - - def _mvpodd_reduce(*args): - """compute {1, -1} dot product on packed data.""" - temp = hcl.local(0, name='mvpodd_acc', dtype=hcl.Int(64)) - with hcl.for_(0, in_block_num) as o: - with hcl.for_(0, block_size) as i: - temp[0] += tvm.popcount(d_packed[args[0], i+block_size*o] ^ w_packed[args[1], i+block_size*o]) - temp[0] = ppac_config.elem_num - temp[0]*2 - return temp[0] - - d_packed = _bin_pack_uint8(d) - w_packed = _bin_pack_uint8(w) - return hcl.compute(res_shape, _mvpodd_reduce, name=res_name, dtype=hcl.Int(64), - attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(ppac_config.func_call[1])), - (ppac_params.ret, tvm.make.StringImm(res_name)), - (ppac_params.arg0, tvm.make.StringImm(d_packed.name)), - (ppac_params.arg1, tvm.make.StringImm(w_packed.name)), - (ppac_params.b_n, batch_num), - (ppac_params.i_b_n, in_block_num), - (ppac_params.o_c_n, out_channel_num)]) ) - - -def gemm_multi_bit(d, w, name=None): - """Compute general matrix multiplication of multi-bit data - Parameters - ---------- - d : 2-d tensor - w : 2-d tensor - - Returns - ------- - res: 2-d tensor of shape (d.shape[0], w.shape[0]) and datatype uint64 - res = dot(d, w.T) - """ - assert w.dtype == d.dtype - assert w.dtype in ['uint8', 'int8', 'uint16', 'int16', 'uint32', 'int32'] - - assert len(w.shape) == 2 and len(d.shape) == 2, "only support 2-dim gemm" - assert d.shape[1] == w.shape[1] - - ppac_config = PPAC_config(multi_bit=True) - assert d.shape[1] % ppac_config.elem_num == 0, \ - "only support data with size of times of " + str(ppac_config.elem_num) - - res_name = name if name else 'res' - res_dtype = hcl.UInt(64) if ('u' in d.dtype) else hcl.Int(64) - batch_num = d.shape[0] - in_channel_num = d.shape[1] - in_block_num = in_channel_num // ppac_config.elem_num - out_channel_num = w.shape[0] - res_shape = (batch_num, out_channel_num) - func_name = ppac_config.func_call[0] if ('u' in d.dtype) else ppac_config.func_call[1] - - r = hcl.reduce_axis(0, in_channel_num, name='k') - return hcl.compute(res_shape, - lambda i, j: hcl.sum(d[i, r] * w[j, r], axis=r), - name=res_name, dtype=res_dtype, - attrs=OrderedDict([(ppac_params.func_name, tvm.make.StringImm(func_name)), - (ppac_params.ret, tvm.make.StringImm(res_name)), - (ppac_params.arg0, tvm.make.StringImm(d.name)), - (ppac_params.arg1, tvm.make.StringImm(w.name)), - (ppac_params.b_n, batch_num), - (ppac_params.i_b_n, in_block_num), - (ppac_params.o_c_n, out_channel_num)])) \ No newline at end of file diff --git a/python/heterocl/api.py b/python/heterocl/api.py index 6442691a5..319bf5b1e 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -55,11 +55,11 @@ def app2(A, B, C): # set the configurations config.init_dtype = init_dtype # initialize global variables - Schedule.stage_ops = [] + Schedule.stage_ops = [] Schedule.last_stages = OrderedSet([]) Scheme.current = None -def placeholder(shape, name=None, dtype=None, place=None): +def placeholder(shape, name=None, dtype=None): """Construct a HeteroCL placeholder for inputs/outputs. If the shape is an empty tuple, the returned value is a scalar. diff --git a/python/heterocl/config.py b/python/heterocl/config.py index 16ffd96b0..5ea94483b 100644 --- a/python/heterocl/config.py +++ b/python/heterocl/config.py @@ -1,3 +1,2 @@ init_dtype = "int32" -init_device = "fpga_intel" diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index e0e37b54d..1e3d7bbbb 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -33,7 +33,7 @@ def tvm_callback_syn_postproc(code): @register_func def get_util_path(platform): if platform == "aws_f1": - return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/design/" + return "/work/zhang-x1/users/sx233/heterocl/tvm/src/template/sdaccel/" elif platform == "rocket": ppac = "/work/zhang-x1/users/sx233/heterocl/hlib/rocc-ppac" emulator = os.path.join(ppac, "rocket/emulator/emulator-freechips." + \ @@ -419,10 +419,6 @@ def lower(sch, return stmt if kernel_only: - for tensor in remove_args: - for arg in args: - if str(arg) == str(tensor): - args.remove(arg) return ir_pass.MakeKernelAPI(stmt, name, arg_list) else: return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func) @@ -469,29 +465,14 @@ def build_fpga_kernel(sch, args, target, name="default_function"): try: # generate and split code host, xcel = None, None - if "sdaccel" == target.tool.name: + if target.tool.name == "sdaccel": host = target.host.lang.replace("opencl", "aocl") xcel = target.xcel.lang.replace("hlsc", "vhls") - if "vivado_hls" == target.tool.name: + elif target.tool.name == "vivado_hls": host = target.host.lang.replace("hlsc", "vhls") xcel = target.xcel.lang.replace("hlsc", "vhls") - - # generate inline assembly c and invoke - if "rocket" == target.tool.name: + elif target.tool.name == "rocket": host = target.host.lang.replace("c", "rv64_ppac") - - host_code, xcel_code = "", "" - if host: # src mode generate host code - builder = getattr(codegen, "build_{0}".format(host)) - host_code = builder(fdevice) - findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") - host_code = host_code[findex + 6 : rindex] - - if xcel: # src mode generate xcel code - builder = getattr(codegen, "build_{0}".format(xcel)) - xcel_code = builder(fdevice) - findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") - xcel_code = xcel_code[findex + 8 : rindex] # return simulation built function if "emu" in str(target.tool.mode) or "sim" in str(target.tool.mode): @@ -501,8 +482,19 @@ def build_fpga_kernel(sch, args, target, name="default_function"): keys.insert(0, "name") vals.insert(0, target.tool.name) return builder(fdevice, keys, vals) - # return source code only - else: return xcel_code + host_code + else: # return source code only + host_code, xcel_code = "", "" + if host: # src mode generate host code + builder = getattr(codegen, "build_{0}".format(host)) + host_code = builder(fdevice) + findex, rindex = host_code.find("{host}"), host_code.rfind("{host}") + host_code = host_code[findex + 6 : rindex] + if xcel: # src mode generate xcel code + builder = getattr(codegen, "build_{0}".format(xcel)) + xcel_code = builder(fdevice) + findex, rindex = xcel_code.find("{device}"), xcel_code.rfind("{device}") + xcel_code = xcel_code[findex + 8 : rindex] + return xcel_code + host_code except AttributeError: raise AttributeError("Cannot find the target builder %s" % target) diff --git a/samples/digitrec/digitrec_stream.py b/samples/digitrec/digitrec_stream.py new file mode 100644 index 000000000..4c0da096a --- /dev/null +++ b/samples/digitrec/digitrec_stream.py @@ -0,0 +1,150 @@ +import heterocl as hcl +import time +import numpy as np +import math +from digitrec_data import read_digitrec_data + +N = 8 * 8 +max_bit = int(math.ceil(math.log(N, 2))) +test_size = (180, ) +data_size = (10, 1800) + +dtype_image = hcl.UInt(N) +dtype_knnmat = hcl.UInt(max_bit) + +setting = { + "version" : "2019.1", + "clock" : "10" +} +tool = hcl.tool.vivado("csim", setting) +target = hcl.platform.aws_f1 + +def knn(test_images, train_images): + + def popcount(num): + out = hcl.local(0, "out") + with hcl.for_(0, train_images.type.bits) as i: + out.v += num[i] + return out.v + + def update_knn(dist, knn_mat, i, j): + max_id = hcl.local(0, "max_id") + with hcl.for_(0, 3) as k: + with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): + max_id.v = k + with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): + knn_mat[i][max_id.v] = dist[i][j] + + def sort_knn(knn_mat, i, j): + val = hcl.local(0, "val") + with hcl.if_( j == 1 ): + with hcl.if_( knn_mat[i][1] > knn_mat[i][2] ): + val.v = knn_mat[i][1] + knn_mat[i][1] = knn_mat[i][2] + knn_mat[i][2] = val.v + with hcl.else_(): + with hcl.if_( knn_mat[i][0] > knn_mat[i][1] ): + val.v = knn_mat[i][0] + knn_mat[i][0] = knn_mat[i][1] + knn_mat[i][1] = val.v + + def knn_vote(knn_mat, j): + id0 = hcl.local(0, "id0") + id1 = hcl.local(0, "id1") + id2 = hcl.local(0, "id2") + count = hcl.local(0, "count") + with hcl.for_(0, 10) as n: + with hcl.if_(knn_mat[n][0] < knn_mat[id0.v][0]): + id0.v = n + with hcl.for_(0, 10) as m: + with hcl.if_(knn_mat[m][0] < knn_mat[id1.v][0]): + id1.v = m + with hcl.for_(0, 10) as k: + with hcl.if_(knn_mat[k][0] < knn_mat[id2.v][0]): + id2.v = k + with hcl.if_(j == id0.v): + count.v += 1 + with hcl.elif_(j == id1.v): + count.v += 1 + with hcl.elif_(j == id2.v): + count.v += 1 + with hcl.else_(): + count.v += 0 + return count.v + + # support hcl.compute in hcl def + @hcl.def_([(), data_size, (10,3)]) + def knn_dist(test_image, train_images, pred_matrix): + pass + + with hcl.for_(0, 180) as index: + test_image = test_images[index] + diff = hcl.compute(train_images.shape, + lambda x, y: train_images[x][y] ^ test_image, + "diff") + dist = hcl.compute(diff.shape, + lambda x, y: popcount(diff[x][y]), + "dist") + knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") + hcl.mutate(dist.shape, + lambda x, y: update_knn(dist, knn_mat, x, y), + "knn_update") + hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") + knn_new = hcl.compute(knn_mat.shape, + lambda x, y: knn_mat[x][y], "copy") + knn_pred = hcl.compute((10,), + lambda x: knn_vote(knn_mat, x), "vote") + return knn_pred + +test_image = hcl.placeholder(test_size, "test_image", dtype_image) +train_images = hcl.placeholder(data_size, "train_images", dtype_image) + +scheme = hcl.create_scheme([test_image, train_images], knn) +scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) + +s = hcl.create_schedule_from_scheme(scheme) + +diff = knn.diff +dist = knn.dist +vote = knn.copy +knn_update = knn.knn_update + +s.to([test_images, train_images], target.xcel) +s.to(vote, target.host) + +# merge loop nests +s[diff].compute_at(s[dist], dist.axis[1]) +s[dist].compute_at(s[knn_update], knn_update.axis[1]) + +# reorder loop to expose more parallelism +s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) + +# parallel outer loop and pipeline inner loop +s[knn_update].parallel(knn_update.axis[1]) +s[knn_update].pipeline(knn_update.axis[0]) + +# at the end, we build the whole offloaded function. +# print(hcl.lower(s)) +f = hcl.build(s, target) + +train_images, _, test_images, test_labels = read_digitrec_data() +total = len(test_images) +total_time = 0 + +# read returned prediction from streaming pipe +hcl_train_images = hcl.asarray(train_images, dtype_image) +hcl_knn_pred = hcl.asarray(np.zeros((total, 10)), dtype_knnmat) + +start = time.time() +f(test_images, hcl_train_images, hcl_knn_pred) +total_time = total_time + (time.time() - start) + +knn_result = hcl_knn_pred.asnumpy() + +correct = 0.0 +for i in range(total): + if np.argmax(knn_result[i]) == test_labels[i]: + correct += 1 + +print("Average kernel time (s): {:.2f}".format(total_time/total)) +print("Accuracy (%): {:.2f}".format(100*correct/1)) diff --git a/samples/digitrec/digitrec_vhls.py b/samples/digitrec/digitrec_vhls.py index 1f219db62..8ba4aa7b5 100644 --- a/samples/digitrec/digitrec_vhls.py +++ b/samples/digitrec/digitrec_vhls.py @@ -1,155 +1,25 @@ -import heterocl as hcl -import time -import numpy as np -import math -from digitrec_data import read_digitrec_data +from digitrec_main import * -N = 8 * 8 -max_bit = int(math.ceil(math.log(N, 2))) -test_size = (180, ) -data_size = (10, 1800) +f = top('vhls_csim') -dtype_image = hcl.UInt(N) -dtype_knnmat = hcl.UInt(max_bit) - -setting = { - "version" : "2019.1", - "clock" : "10" -} -tool = hcl.tool.vivado("csim", setting) -target = hcl.platform.aws_f1 - -def knn(test_images, train_images): - - def popcount(num): - out = hcl.local(0, "out") - with hcl.for_(0, train_images.type.bits) as i: - out.v += num[i] - return out.v - - def update_knn(dist, knn_mat, i, j): - max_id = hcl.local(0, "max_id") - with hcl.for_(0, 3) as k: - with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): - max_id.v = k - with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): - knn_mat[i][max_id.v] = dist[i][j] - - def sort_knn(knn_mat, i, j): - val = hcl.local(0, "val") - with hcl.if_( j == 1 ): - with hcl.if_( knn_mat[i][1] > knn_mat[i][2] ): - val.v = knn_mat[i][1] - knn_mat[i][1] = knn_mat[i][2] - knn_mat[i][2] = val.v - with hcl.else_(): - with hcl.if_( knn_mat[i][0] > knn_mat[i][1] ): - val.v = knn_mat[i][0] - knn_mat[i][0] = knn_mat[i][1] - knn_mat[i][1] = val.v - - def knn_vote(knn_mat, j): - id0 = hcl.local(0, "id0") - id1 = hcl.local(0, "id1") - id2 = hcl.local(0, "id2") - count = hcl.local(0, "count") - with hcl.for_(0, 10) as n: - with hcl.if_(knn_mat[n][0] < knn_mat[id0.v][0]): - id0.v = n - with hcl.for_(0, 10) as m: - with hcl.if_(knn_mat[m][0] < knn_mat[id1.v][0]): - id1.v = m - with hcl.for_(0, 10) as k: - with hcl.if_(knn_mat[k][0] < knn_mat[id2.v][0]): - id2.v = k - with hcl.if_(j == id0.v): - count.v += 1 - with hcl.elif_(j == id1.v): - count.v += 1 - with hcl.elif_(j == id2.v): - count.v += 1 - with hcl.else_(): - count.v += 0 - return count.v - - # support - @hcl.def_([(), data_size, (10,3)]) - def knn_dist(test_image, train_images, pred_matrix) - - with hcl.for_(0, 180) as index: - - test_image = test_images[index] - - diff = hcl.compute(train_images.shape, - lambda x, y: train_images[x][y] ^ test_image, - "diff") - - dist = hcl.compute(diff.shape, - lambda x, y: popcount(diff[x][y]), - "dist") - - knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") - hcl.mutate(dist.shape, - lambda x, y: update_knn(dist, knn_mat, x, y), - "knn_update") - - hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort") - knn_new = hcl.compute(knn_mat.shape, - lambda x, y: knn_mat[x][y], "copy") - knn_pred = hcl.compute((10,), - lambda x: knn_vote(knn_mat, x), "vote") - - return knn_pred - -test_image = hcl.placeholder(test_size, "test_image", dtype_image) -train_images = hcl.placeholder(data_size, "train_images", dtype_image) - -scheme = hcl.create_scheme([test_image, train_images], knn) -scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat) - -s = hcl.create_schedule_from_scheme(scheme) - -diff = knn.diff -dist = knn.dist -vote = knn.copy -knn_update = knn.knn_update - -s.to([test_images, train_images], target.xcel) -s.to(vote, target.host) - -# merge loop nests -s[diff].compute_at(s[dist], dist.axis[1]) -s[dist].compute_at(s[knn_update], knn_update.axis[1]) - -# reorder loop to expose more parallelism -s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0]) - -# parallel outer loop and pipeline inner loop -s[knn_update].parallel(knn_update.axis[1]) -s[knn_update].pipeline(knn_update.axis[0]) +train_images, _, test_images, test_labels = read_digitrec_data() -# at the end, we build the whole offloaded function. -# print(hcl.lower(s)) -f = hcl.build(s, target) +correct = 0.0 -train_images, _, test_images, test_labels = read_digitrec_data() -total = len(test_images) total_time = 0 +for i in range(0, 180): -# read returned prediction from streaming pipe -hcl_train_images = hcl.asarray(train_images, dtype_image) -hcl_knn_pred = hcl.asarray(np.zeros((total, 10)), dtype_knnmat) + hcl_train_images = hcl.asarray(train_images, dtype_image) + hcl_knn_mat = hcl.asarray(np.zeros((10, 3)), dtype_knnmat) -start = time.time() -f(test_images, hcl_train_images, hcl_knn_pred) -total_time = total_time + (time.time() - start) + start = time.time() + f(test_images[i], hcl_train_images, hcl_knn_mat) + total_time = total_time + (time.time() - start) -knn_result = hcl_knn_pred.asnumpy() + knn_mat = hcl_knn_mat.asnumpy() -correct = 0.0 -for i in range(total): - if np.argmax(knn_result[i]) == test_labels[i]: + if knn_vote(knn_mat) == test_labels[i]: correct += 1 -print("Average kernel time (s): {:.2f}".format(total_time/total)) -print("Accuracy (%): {:.2f}".format(100*correct/1)) +print("Average kernel time (s): {:.2f}".format(total_time/180)) +print("Accuracy (%): {:.2f}".format(100*correct/180)) diff --git a/samples/sobel/sobel.py b/samples/sobel/sobel.py index aee976462..61a6cb9a3 100644 --- a/samples/sobel/sobel.py +++ b/samples/sobel/sobel.py @@ -65,7 +65,7 @@ def dev(gx, gy, org): s.to(kernel.derv, target.cpu) # create stream channel between modules - # print(type(target.fpga), hcl.lower(s)) + print(type(target.fpga), hcl.lower(s)) return hcl.build(s, target) # Load sample data diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index 1ffa389da..396bbc7bd 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -401,6 +401,8 @@ class Schedule : public NodeRef { inline ScheduleNode* operator->(); // declare container type using ContainerType = ScheduleNode; + // insertion point for host & xcel separation + size_t split_bound{0}; }; /*! diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index d2efd74f9..dba722852 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -10,7 +10,7 @@ #include #include #include "./build_common.h" -#include "./build_helper.h" +#include "./build_util.h" #include #include @@ -30,12 +30,15 @@ class SimModuleNode final : public ModuleNode { public: SimModuleNode(LoweredFunc func, std::string host_code, - std::vector>> arg_stream_types, - std::string dev_code, std::string platform, std::unordered_map options) + argInfo arg_info, + std::string dev_code, std::string platform, + std::unordered_map options) : func_(func), host_(host_code), - arg_stream_types_(arg_stream_types), - dev_(dev_code), platform_(platform), options_(options) { + arg_info_(arg_info), + dev_(dev_code), + platform_(platform), + options_(options) { } const char* type_key() const { @@ -64,11 +67,12 @@ class SimModuleNode final : public ModuleNode { if (const auto* f = Registry::Get("get_util_path")) path = (*f)(platform_).operator std::string(); system(("cp -r " + path + "/* __tmp__/").c_str()); + LOG(CLEAN) << "Running SW simulation on " + platform_; if (platform_ == "sdaccel") { - GenWrapperCode(args, shmids, arg_types, arg_stream_types_, func_); + GenWrapperCode(args, shmids, arg_types, arg_info_, func_); GenHostCode(args, shmids, arg_types, func_, - host_, arg_stream_types_); + platform_, host_, arg_info_); GenKernelCode(dev_); LOG(CLEAN) << "Running SW simulation ..."; @@ -77,7 +81,7 @@ class SimModuleNode final : public ModuleNode { } else if (platform_ == "rocket") { // generate host and run proxy kernel test GenHostCode(args, shmids, arg_types, func_, - host_, arg_stream_types_); + platform_, host_, arg_info_); std::string compile = "cd __tmp__;"; compile += std::string("autoconf; mkdir build; cd build;") + std::string("../configure --with-riscvtools=") + @@ -86,8 +90,9 @@ class SimModuleNode final : public ModuleNode { } else if (platform_ == "vivado_hls") { GenHostCode(args, shmids, arg_types, func_, - host_, arg_stream_types_); + platform_, host_, arg_info_); GenKernelCode(dev_); + // system("cd __tmp__; make csim"); } else { LOG(FATAL) << "unrecognized platform " << platform_; } @@ -105,7 +110,7 @@ class SimModuleNode final : public ModuleNode { private: LoweredFunc func_; std::string host_; - std::vector>> arg_stream_types_; + argInfo arg_info_; std::string dev_; std::string platform_; std::unordered_map options_; @@ -118,11 +123,12 @@ Module CreateSimModule( LoweredFunc func, std::string host_code, std::string dev_code, - std::vector>> arg_type, - std::string platform, std::unordered_map options) { + argInfo arg_types, + std::string platform, + std::unordered_map options) { std::shared_ptr n = std::make_shared(func, host_code, - arg_type, dev_code, + arg_types, dev_code, platform, options); return Module(n); } @@ -154,555 +160,8 @@ class TypeCollector final : public IRVisitor { } }; -// record of vars for top func signature -// vars include passed-in and not registered vars on host -class StreamCollector final : public IRVisitor { - public: - StreamCollector(std::vector& arg_vars, - std::unordered_map& stream_table, - std::string initial_scope) - : arg_vars_(arg_vars), - stream_table_(stream_table), - scope_(initial_scope) {} - - // record alloc on host - void Visit_(const Allocate *op) { - if (!switch_on) - this->HandleDef(op->buffer_var.get()); - IRVisitor::Visit_(op); - } - - void Visit_(const Load *op) { - if (!switch_on) { - this->HandleUse(op->buffer_var); - } - IRVisitor::Visit_(op); - } - - // update placeholder status - void Visit_(const Store* op) { - if (switch_on) { - if (auto val = op->value.as()) { - const Variable* v = val->buffer_var.get(); - for (size_t i = 0; i < arg_vars_.size(); i++) { - std::string name = arg_vars_[i]->name_hint; - if (v->name_hint.find(name) != std::string::npos) { - // record in VisitStmt StreamStmt - // LOG(WARNING) << op->buffer_var << ":" << v->name_hint; - } - } - } - } else { // count use on host - this->HandleUse(op->buffer_var); - } - IRVisitor::Visit_(op); - } - - void Visit_(const StreamStmt* op) { - if (switch_on) { // in xcel scope - const Variable* v = op->buffer_var.get(); - // LOG(WARNING) << v->name_hint; - } - IRVisitor::Visit_(op); - } - - void Visit_(const AttrStmt* op) { - if (op->attr_key == attr::device_scope) { - if (op->value.as()->value != scope_) - switch_on = true; - else switch_on = false; - } - IRVisitor::Visit_(op); - } - - // additional data saved into stream table (for streamed - // data we keep the new id for arg_stream in var_idmap, - // and non-streamed using the repalced arg_top_k name) - void HandleDef(const Variable* v) { - CHECK(!host_def_count_.count(v)) - << "variable " << v->name_hint - << " has already been defined, the Stmt is not SSA"; - CHECK(!host_use_count_.count(v)) - << "variable " << v->name_hint - << " has been used before definition!"; - host_use_count_[v] = 0; - host_def_count_[v] = 1; - } - - void HandleUse(const Expr& v) { - CHECK(v.as()); - Var var(v.node_); - auto it = host_use_count_.find(var.get()); - if (it != host_use_count_.end()) { - if (it->second >= 0) { - ++it->second; - } - } else { - if (!stream_table_.count(var.get())) { - host_undefined_.push_back(var); - host_use_count_[var.get()] = -1; - } - } - } - - bool host_scope_{false}; - Array host_undefined_; - std::unordered_map host_use_count_; - std::unordered_map host_def_count_; - - private: - std::vector& arg_vars_; - std::unordered_map& stream_table_; - std::string scope_; - bool switch_on{true}; -}; - -// codegen for accelerators -class CodeGenXcel : public CodeGenVivadoHLS { - public: - int arg_top_count{0}; - str2tupleMap map_arg_type_; - LoweredFunc f_; - - void AddFunction(LoweredFunc f, - str2tupleMap map_arg_type) { - map_arg_type_ = map_arg_type; f_ = f; - CodeGenVivadoHLS::AddFunction(f, map_arg_type); - }; - - void VisitStmt_(const AttrStmt* op) { - if (op->attr_key == ir::attr::device_scope) { - // print top( ... in host and enter fpga scope - if (op->value.as()->value == "fpga" && !fpga_scope_) { - fpga_scope_ = true; - PrintIndent(); - - // track the stream usage - StreamCollector collector(arg_vars, stream_table, "cpu"); - collector.Visit(op->body); - - // update data type and name - for (auto k : collector.host_undefined_) { - auto v = k.get(); - arg_vars.push_back(v); - stream_table[v] = true; - auto tuple = arg_top_vars[v]; - arg_top_vars[v] = std::make_tuple(v->name_hint, - std::get<1>(tuple), - std::get<2>(tuple)); - } - TypeCollector visitor(arg_top_vars); - visitor.Visit(op->body); - - // generte function calls - stream << "top("; - int index = 0; - for (size_t i = 0; i < arg_vars.size(); i++) { - auto v = arg_vars[i]; - std::string arg_name; - if (stream_table[v]) - arg_name = std::get<0>(arg_top_vars[v]); - else arg_name = GetVarID(v); - if (index !=0) stream << ", "; - stream << arg_name; - // print kernel func signature - if (index !=0) arg_stream << ", "; - PrintType(std::get<1>(arg_top_vars[v]), arg_stream); - auto shape = std::get<2>(arg_top_vars[v]); - arg_stream << " " << arg_name; - for (size_t k = 0; k < shape.size(); k++) - arg_stream << "[" << shape[k] << "]"; - index++; - } - stream << ");\n"; - - // switch context to device scope - host_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // swtich from device to host - } else if (op->value.as()->value == "cpu" && - fpga_scope_) { - fpga_scope_ = false; - device_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - } - } - CodeGenC::VisitStmt_(op); - } - void VisitStmt_(const Store* op) { - std::string vid = GetVarID(op->buffer_var.get()); - if (vid.find("stream_") == std::string::npos) - CodeGenVivadoHLS::VisitStmt_(op); - }; - - void VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - // modify var idmap for passed in args - } else if (value.find("data") != std::string::npos || - value.substr(0, 3) == "arg") { - auto v = op->var.get(); - auto tuple = arg_top_vars[v]; - arg_vars.push_back(v); - stream_table[v] = false; - var_idmap_[v] = "arg_top_" + std::to_string(arg_top_count); - std::string api_name = "arg" + std::to_string(arg_top_count); - auto arg = map_arg_type_[api_name]; - // PrintType(std::get<1>(arg), arg_stream); - std::vector shape; - if (auto buf = f_->api_args[arg_top_count].as()) - for (size_t i = 0; i < buf->shape.size(); i++) - shape.push_back(buf->shape[i].as()->value); - arg_top_vars[v] = std::make_tuple(vid, std::get<1>(arg), shape); - arg_top_count += 1; - } - PrintStmt(op->body); - }; - - void VisitStmt_(const StreamStmt* op) { - //TODO: fix this - // std::string vid = GetVarID(op->buffer_var.get()); - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); - PrintIndent(); - auto load_op = op->value.as(); - auto v = load_op->buffer_var.as(); - // placeholder args using recv name - if (stream_table.count(v)) { - auto tuple = arg_top_vars[v]; - vid.replace(vid.find("stream_send"), 12, "stream_recv"); - arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), - std::get<2>(tuple)); - stream_table[v] = true; - } // else: streamed externop defined in analysis - // PrintExpr(op->value, stream); - // stream << vid << ".write()\n"; - }; - - void VisitStmt_(const Allocate* op) { - std::string vid = AllocVarID(op->buffer_var.get()); - CHECK(!is_zero(op->condition)); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - var_shape_map_[buffer] = op->extents; - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - - // initlize hls stream channel - if (arg_top_vars.count(buffer) || - vid.find("stream_") != std::string::npos) { - } else { - this->PrintIndent(); - PrintType(op->type, stream); - stream << ' '<< vid; - if (constant_size > 1) {// Transfer length one array to scalar - for (size_t i = 0; i < op->extents.size(); i++) { - stream << '['; - PrintExpr(op->extents[i], stream); - stream << "]"; - } - } - stream << ";\n"; - } - buf_length_map_[buffer] = constant_size; - RegisterHandleType(op->buffer_var.get(), op->type); - for (size_t i = 0; i < op->attrs.size(); i++) { - this->PrintStmt(op->attrs[i]); - } - this->PrintStmt(op->body); - }; -}; - -// replace host-device interface args with pragma -class CodeGenHost : public CodeGenAOCL { - public: - int arg_top_count{0}; - - void PrintType(Type t, std::ostream &os) { - int lanes = t.lanes(); - - if(t.is_handle()) - { - os << "void*";return; - } - if(t==Bool()) - { - os <<"bool"; return; - } - CHECK_EQ(lanes,1) - << "do not yet support vector types"; - - bool fail = false; - if(t.is_float()) - { - switch(t.bits()) - { - case 16: - os<<"half"; - // enable_fp16_ = true; - break; - case 32: - os<<"float"; - break; - case 64: - os<< "double"; - // enable_fp64_ = true; - break; - default: - fail = true; - break; - } - if(!fail && lanes ==1)return; - if(!fail&&(lanes >= 2 && lanes <=16)) - { - os<=2 && lanes <= 16)) - { - os< 64) { - os << "uint" << "64" << "_t"; return; - } else { - std::string str; - if (t.bits() <= 8) str = "8"; - else if (t.bits() <= 16) str = "16"; - else if (t.bits() <= 32) str = "32"; - else str = "64"; - os<< "uint"<< str <<"_t"; return; - } - } - if(t.is_int()) - { - if (t.bits() > 64) { - os << "int" << "64" << "_t"; return; - } else { - std::string str; - if (t.bits() <= 8) str = "8"; - else if (t.bits() <= 16) str = "16"; - else if (t.bits() <= 32) str = "32"; - else str = "64"; - os << "int" << str << "_t"; return; - } - } - } - } - - LOG(FATAL) << "Cannot convert type"<attr_key == ir::attr::device_scope) { - // print top( ... in host and enter fpga scope - if (op->value.as()->value == "fpga" && !fpga_scope_) { - fpga_scope_ = true; - PrintIndent(); - - // track the stream usage - var2nameType unreg_vars; - StreamCollector collector(arg_vars, stream_table, "cpu"); - collector.Visit(op->body); - // update data type and name - for (size_t k = 0; k < arg_vars.size(); k ++) - arg_top_vars[arg_vars[k]]; - for (auto k : collector.host_undefined_) - arg_top_vars[k.get()]; - TypeCollector visitor(arg_top_vars); - visitor.Visit(op->body); - - // generte function calls - stream << "top("; - // int index = 0; - // for (auto op : stream_stmts) { - // if (index !=0) stream << ", "; - // std::string vid; - // if (!var_idmap_.count(op->buffer_var.get())) - // vid = AllocVarID(op->buffer_var.get()); - // else vid = GetVarID(op->buffer_var.get()); - // stream << vid; - // if (vid.find("stream_send") != std::string::npos || - // vid.find("stream_recv") != std::string::npos) { - // if (index !=0) arg_stream << ", "; - // PrintType(op->buffer_var.type(), arg_stream); - // arg_stream << " " << vid; - // } - // index++; - // } - // for (auto op : stream_exprs) { - // if (index !=0) stream << ", "; - // std::string vid; - // if (!var_idmap_.count(op->buffer_var.get())) - // vid = AllocVarID(op->buffer_var.get()); - // else vid = GetVarID(op->buffer_var.get()); - // stream << vid; - // // stream << op->buffer_var.get()->name_hint; - // if (vid.find("stream_send") != std::string::npos || - // vid.find("stream_recv") != std::string::npos) { - // if (index !=0) arg_stream << ", "; - // PrintType(op->buffer_var.type(), arg_stream); - // arg_stream << " " << vid; - // } - // index++; - // } - stream << ");\n"; - - // switch context to device scope - host_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - - // swtich from device to host - } else if (op->value.as()->value == "cpu" && - fpga_scope_) { - fpga_scope_ = false; - device_stream << this->stream.str(); - this->stream.str(""); - this->stream.clear(); - } - } - CodeGenC::VisitStmt_(op); - } - - void VisitStmt_(const Allocate* op) { - std::string vid = AllocVarID(op->buffer_var.get()); - if (vid.find("stream_") != std::string::npos) { - // do not print alloc stream - this->PrintStmt(op->body); - } else { - CHECK(!is_zero(op->condition)); - this->PrintIndent(); - int32_t constant_size = op->constant_allocation_size(); - CHECK_GT(constant_size, 0) - << "Can only handle constant size stack allocation for now"; - const Variable* buffer = op->buffer_var.as(); - var_shape_map_[buffer] = op->extents; - std::string scope = alloc_storage_scope_.at(buffer); - PrintStorageScope(scope, stream); - - // initlize hls stream channel - if (vid.find("stream_in") != std::string::npos || - vid.find("stream_out") != std::string::npos) { - stream << "hls::stream<"; - PrintType(op->type, stream); - stream << "> " << vid << ";\n"; - } else { - PrintType(op->type, stream); - stream << ' '<< vid; - if (constant_size > 1) {// Transfer length one array to scalar - for (size_t i = 0; i < op->extents.size(); i++) { - stream << '['; - PrintExpr(op->extents[i], stream); - stream << "]"; - } - } - stream << ";\n"; - } - buf_length_map_[buffer] = constant_size; - RegisterHandleType(op->buffer_var.get(), op->type); - for (size_t i = 0; i < op->attrs.size(); i++) { - this->PrintStmt(op->attrs[i]); - } - this->PrintStmt(op->body); - } - }; - - void VisitExpr_(const StreamExpr* op, std::ostream& os) { - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); - // os << vid << ".read()"; - }; - - void VisitStmt_(const Store* op) { - std::string vid = GetVarID(op->buffer_var.get()); - if (vid.find("stream_") == std::string::npos) - CodeGenC::VisitStmt_(op); - }; - - void VisitStmt_(const StreamStmt* op) { - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); - PrintIndent(); - auto load_op = op->value.as(); - auto v = load_op->buffer_var.as(); - // placeholder args using recv name - if (stream_table.count(v)) { - auto tuple = arg_top_vars[v]; - arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), - std::get<2>(tuple)); - stream_table[v] = true; - } // else: streamed externop defined in analysis - // PrintExpr(op->value, stream); - // stream << vid << ".write()\n"; - }; - - void VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - // locate arg data and update arg_top_vars - } else if (value.find("data") != std::string::npos || - value.substr(0, 3) == "arg") { - auto v = op->var.get(); - auto tuple = arg_top_vars[v]; - arg_vars.push_back(v); - stream_table[v] = false; - var_idmap_[v] = "arg_top_" + std::to_string(arg_top_count); - arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), - std::get<2>(tuple)); - arg_top_count += 1; - } - PrintStmt(op->body); - }; - -}; +using argInfo = + std::vector>>; // unified simulation function for diff platforms template @@ -720,12 +179,14 @@ runtime::Module BuildSimModule(Array funcs, cg_host.AddFunction(f, map_arg_type); cg_dev.AddFunction(f, map_arg_type); } - // process info: shape type and stream + // vector {vars} auto& arg_vars = cg_dev.arg_vars; + // map {var : is_streamed(bool) } auto& stream_table = cg_dev.stream_table; + // map {var : (vid, Type, shape)} auto& arg_top_vars = cg_dev.arg_top_vars; - std::vector>> arg_type; + argInfo arg_info; for (size_t i = 0 ; i < arg_vars.size(); i++) { auto v = arg_vars[i]; auto nameType = arg_top_vars[v]; @@ -733,9 +194,12 @@ runtime::Module BuildSimModule(Array funcs, if (stream_table[v]) is_stream = true; else is_stream = false; - auto item = std::make_tuple(is_stream, std::get<1>(nameType), - std::get<2>(nameType)); - arg_type.push_back(item); + auto item = std::make_tuple( + /*var name*/std::get<0>(nameType), + /*whether is streamed*/is_stream, + /*data type*/std::get<1>(nameType), + /*shape*/std::get<2>(nameType)); + arg_info.push_back(item); } // tool option mapping and platform std::string platform = values[0].as()->value; @@ -748,7 +212,7 @@ runtime::Module BuildSimModule(Array funcs, return runtime::CreateSimModule(funcs[0], cg_host.GetHost(), cg_dev.GetDevice(), - arg_type, platform, options); + arg_info, platform, options); } TVM_REGISTER_API("codegen.build_sim") @@ -765,7 +229,7 @@ TVM_REGISTER_API("codegen.build_sim") *rv = BuildSimModule (args[0], args[1], args[2]); } else if (type == "sdaccel") { - *rv = BuildSimModule + *rv = BuildSimModule (args[0], args[1], args[2]); } else if (type == "vivado_hls") { *rv = BuildSimModule diff --git a/tvm/src/codegen/build_common.h b/tvm/src/codegen/build_common.h index 1be783c72..f9f42d219 100644 --- a/tvm/src/codegen/build_common.h +++ b/tvm/src/codegen/build_common.h @@ -12,7 +12,6 @@ #include "../runtime/meta_data.h" namespace TVM { - namespace codegen { // Extract function information from device function. inline std::unordered_map diff --git a/tvm/src/codegen/build_helper.cc b/tvm/src/codegen/build_util.cc similarity index 84% rename from tvm/src/codegen/build_helper.cc rename to tvm/src/codegen/build_util.cc index c896ba11d..5ea1bf722 100644 --- a/tvm/src/codegen/build_helper.cc +++ b/tvm/src/codegen/build_util.cc @@ -10,6 +10,7 @@ #include #include #include "./build_common.h" +#include "./build_util.h" #include #include @@ -237,6 +238,7 @@ void FreeSharedMem(TVMArgs& args, // copy values from the shared mem to local mem void PrintCopy(TVMArray* arr, + argInfo& arg_info, std::ofstream& stream, int indent, size_t nth_arr) { for (int i = 0; i < arr->ndim; i++) { @@ -245,14 +247,9 @@ void PrintCopy(TVMArray* arr, stream << "i" << i << " < " << arr->shape[i] << "; "; stream << "i" << i << "++) {\n"; indent += 2; - if (i == arr->ndim-1) { + if (i == arr->ndim - 1) { PrintIndent(stream, indent); - // stream << "arg_top_" << nth_arr; - // for (int j = 0; j < arr->ndim; j++) { - // stream << "[i" << j << "]"; - // } - - stream << "arg_top_" << nth_arr; + stream << std::get<0>(arg_info[nth_arr]); stream << "[i" << arr->ndim-1; int mul2 = 1; for (int j = arr->ndim-2; j >= 0; j--) { @@ -261,7 +258,6 @@ void PrintCopy(TVMArray* arr, } stream << "]"; - stream << " = ("; // stream << Type2ExtStr(arr->dtype); stream << Type2Byte(arr->dtype); @@ -288,6 +284,7 @@ void PrintCopy(TVMArray* arr, // copy values from local mem back to shared mem void PrintCopyBack(TVMArray* arr, + argInfo& arg_info, std::ofstream& stream, int indent, size_t nth_arr) { for (int i = 0; i < arr->ndim; i++) { @@ -306,10 +303,9 @@ void PrintCopyBack(TVMArray* arr, stream << " + i" << j << "*" << mul; } stream << "] = ("; - // stream << Type2ExtStr(arr->dtype); stream << Type2Byte(arr->dtype); - stream << ")(arg_top_" << nth_arr; - stream << "[i" << arr->ndim-1; + stream << ")(" << std::get<0>(arg_info[nth_arr]); + stream << "[i" << arr->ndim - 1; int mul2 = 1; for (int j = arr->ndim-2; j >= 0; j--) { mul2 *= arr->shape[j+1]; @@ -317,11 +313,6 @@ void PrintCopyBack(TVMArray* arr, } stream << "])"; - - // for (int j = 0; j < arr->ndim; j++) { - // stream << "[i" << j << "]"; - // } - // stream << ")"; if (arr->dtype.fracs > 0) stream << " << " << static_cast(arr->dtype.fracs); stream << ";\n"; @@ -345,7 +336,7 @@ void GenKernelCode(std::string test_file) { void GenWrapperCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, - const std::vector>>& arg_stream_types, + argInfo& arg_stream_types, LoweredFunc func) { std::ofstream stream; int indent = 0; @@ -371,7 +362,7 @@ void GenWrapperCode(TVMArgs& args, } for (size_t k = 0; k < ex_arg_count; k++) { if (k != ex_arg_count) stream << ", "; - stream << PrintHalideType(std::get<1>(arg_stream_types[k + arg_types.size()])); + stream << PrintHalideType(std::get<2>(arg_stream_types[k + arg_types.size()])); stream << "*"; stream << " source_wrapper_" << k + arg_types.size(); } @@ -380,7 +371,7 @@ void GenWrapperCode(TVMArgs& args, // memeory and control pragma for (size_t i = 0; i < arg_stream_types.size(); i++) { std::string interface; - if (std::get<0>(arg_stream_types[i])) interface = " m_axi "; + if (std::get<1>(arg_stream_types[i])) interface = " m_axi "; else interface = " m_axi "; PrintIndent(stream, indent); stream << "#pragma HLS INTERFACE" + interface + "port="; @@ -389,7 +380,7 @@ void GenWrapperCode(TVMArgs& args, } for (size_t i = 0; i < arg_stream_types.size(); i++) { std::string interface; - if (std::get<0>(arg_stream_types[i])) interface = " s_axilite "; + if (std::get<1>(arg_stream_types[i])) interface = " s_axilite "; else interface = " s_axilite "; PrintIndent(stream, indent); stream << "#pragma HLS INTERFACE" + interface + "port="; @@ -403,9 +394,9 @@ void GenWrapperCode(TVMArgs& args, // intermediate vars init alloc for (size_t i = 0; i < arg_stream_types.size(); i++) { PrintIndent(stream, indent); - stream << PrintHalideType(std::get<1>(arg_stream_types[i])); + stream << PrintHalideType(std::get<2>(arg_stream_types[i])); stream << " source_wrapper_temp_" << i; - auto shape = std::get<2>(arg_stream_types[i]); + auto shape = std::get<3>(arg_stream_types[i]); for (size_t j = 0; j < shape.size(); j++) stream << "[" << shape[j] << "]"; if (shape.size() == 0) stream << "[1]"; @@ -414,7 +405,7 @@ void GenWrapperCode(TVMArgs& args, // vars init for values for (size_t i = 0; i < arg_stream_types.size(); i++) { - auto shape = std::get<2>(arg_stream_types[i]); + auto shape = std::get<3>(arg_stream_types[i]); for (size_t j = 0; j < shape.size(); j++) { PrintIndent(stream, indent); stream << "for (int i" << j << " = 0; "; @@ -469,7 +460,7 @@ void GenWrapperCode(TVMArgs& args, // read back return val for (int k = arg_stream_types.size() - 1; k > args.size() - 2; k--) { - auto shape = std::get<2>(arg_stream_types[k]); + auto shape = std::get<3>(arg_stream_types[k]); for (size_t i = 0; i < shape.size(); i++) { PrintIndent(stream, indent); stream << "for (int i" << i << " = 0; "; @@ -507,130 +498,37 @@ void GenWrapperCode(TVMArgs& args, stream.close(); } -// generate opencl - -// generate opencl kernel and mem obj -void GenHostCode(TVMArgs& args, - const std::vector& shmids, - const std::vector& arg_types, - LoweredFunc func, - std::string host_code, - std::vector>>& arg_stream_types) { - int indent = 0; - std::ofstream stream; - stream.open("__tmp__/host.cpp"); +// generate opencl wrapper for sdaccel sim +void GenHostHeaders(std::ofstream& stream, + std::string platform) { stream << "#include \n"; - stream << "#include \n"; - stream << "\n"; + stream << "#include \n\n"; stream << "// standard C/C++ headers\n"; stream << "#include \n"; stream << "#include \n"; stream << "#include \n"; stream << "#include \n"; stream << "#include \n"; - stream << "#include \n"; - stream << "\n"; - stream << "// opencl harness headers\n"; - stream << "#include \"CLWorld.h\"\n"; - stream << "#include \"CLKernel.h\"\n"; - stream << "#include \"CLMemObj.h\"\n"; - stream << "// harness namespace\n"; - stream << "using namespace rosetta;\n"; - stream << "\n"; - stream << "//other headers\n"; - stream << "#include \"utils.h\"\n"; - // stream << "#include \"typedefs.h\"\n"; - stream << "int main(int argc, char ** argv) {\n"; - indent += 2; - - int cnt = 0; // label the constant value - for (int i = 0; i < args.size(); i++) { - if (args[i].type_code() == kArrayHandle) { - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << "* "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << "*)"; - stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - PrintIndent(stream, indent); - - stream << Type2Byte(arg_types[i]) << " "; - // stream << Type2Str(arg_types[i]) << " "; - stream << "arg_top_" << i; - TVMArray* arr = args[i]; - - stream << "["; - for (int j = 0; j < arr->ndim; j++) { - //stream << "[" << arr->shape[j] << "]"; - if (j == arr->ndim-1) { - stream << arr->shape[j]; - } else { - stream << arr->shape[j]; - stream << " * "; - } - } - stream << "];\n"; - // copy from shared mem - PrintCopy(arr, stream, indent, i); - - } else { - // directly assign the value to the variable - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Byte(arg_types[i]) << ")"; - if (args[i].type_code() == kDLInt || - args[i].type_code() == kDLUInt) { - stream << int64_t(args[i]); - } - stream << ";\n"; - PrintIndent(stream, indent); - stream << Type2Byte(arg_types[i]) << " "; - stream << "arg_top_" << i; - stream << "[1] = { "; - - stream << "arg_" << i << " }"; - if (arg_types[i].fracs > 0) - stream << " >> " << static_cast(arg_types[i].fracs); - stream << ";\n"; - - // PrintIndent(stream, indent); - // stream << Type2Byte(arg_types[i]) << " "; - // stream << "fool_" << cnt << "[1] = { arg_top_" << i << " };\n"; - cnt += 1; - } - stream << "\n"; - } - // allocate mem for stream vars - for (size_t k = args.size(); k < arg_stream_types.size(); k++) { - auto type = std::get<1>(arg_stream_types[k]); - auto shape = std::get<2>(arg_stream_types[k]); - PrintIndent(stream, indent); - stream << Type2Byte(Type2TVMType(type)) << " " << "knn_mat["; - if (shape.size() > 0) { - for (size_t i = 0; i < shape.size(); i++) { - if (i != shape.size() - 1) - stream << shape[i] << " * "; - else stream << shape[i]; - } - } else { - stream << "1"; - } - stream << "];\n"; - } - - // generate host side (before) on arg_top_k - PrintIndent(stream,indent); - stream << "printf(\"Host Side Application\\n\");\n"; - stream << "\n"; - PrintIndent(stream, indent); - stream << "// compute bofore kernel function"; - // stream being axis interface host, channel for kernel - size_t pos = host_code.find("top("); - std::string pre_kernel = host_code.substr(0, pos -1); - std::string post_kernel = host_code.substr(host_code.find('\n', pos) + 1); - stream << pre_kernel; + stream << "#include \n\n"; + + if (platform == "sdaccel") { + stream << "// opencl harness headers\n"; + stream << "#include \"CLWorld.h\"\n"; + stream << "#include \"CLKernel.h\"\n"; + stream << "#include \"CLMemObj.h\"\n"; + stream << "#include \"utils.h\"\n"; + stream << "// harness namespace\n"; + stream << "using namespace rosetta;\n"; + } // harness headers +} +// initialization before executing kernel +void KernelInit(std::ofstream& stream, + std::string platform, + TVMArgs& args, + const std::vector& arg_types, + argInfo& arg_stream_types) { + int indent = 2; stream << "\n"; PrintIndent(stream, indent); stream << "// parse command line arguments for opencl version\n"; @@ -642,36 +540,26 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "// create OpenCL world\n"; PrintIndent(stream, indent); - stream << "CLWorld digit_rec_world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; + stream << "CLWorld world = CLWorld(TARGET_DEVICE, CL_DEVICE_TYPE_ACCELERATOR);\n"; stream << "\n"; PrintIndent(stream, indent); stream << "// add the bitstream file\n"; PrintIndent(stream, indent); - stream << "digit_rec_world.addProgram(kernelFile);\n"; + stream << "dworld.addProgram(kernelFile);\n"; stream << "\n\n"; PrintIndent(stream, indent); stream << "// create kernels\n"; PrintIndent(stream, indent); - stream << "CLKernel App(digit_rec_world.getContext(), digit_rec_world.getProgram(), \"App\", digit_rec_world.getDevice());\n"; + stream << "CLKernel App(world.getContext(), world.getProgram(), \"App\", world.getDevice());\n"; stream << "\n\n"; PrintIndent(stream, indent); stream << "// create mem objects\n"; - for (int i = 0;i < args.size();i++) { + for (int i = 0;i < args.size(); i++) { PrintIndent(stream, indent); - // if (cnt!=0) { - // stream << "CLMemObj source_" << i; - // stream << "((void*)fool_" << cnt - 1; - // stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; - // stream << "1, "; - // stream << "CL_MEM_READ_WRITE);\n"; - // cnt--; - // continue; - // } stream << "CLMemObj source_" << i; stream << "((void*)arg_top_" << i; stream << ", sizeof(" << Type2Byte(arg_types[i]) << "), "; - // stream << ", sizeof(" << Type2ExtStr(arg_types[i]) << "), "; if (args[i].type_code() == kArrayHandle) { TVMArray* arr = args[i]; @@ -688,10 +576,10 @@ void GenHostCode(TVMArgs& args, stream << ", "; stream << "CL_MEM_READ_WRITE);\n"; } - // addiion streamed data + // additional streamed data for (size_t k = args.size(); k < arg_stream_types.size(); k++) { - auto type = std::get<1>(arg_stream_types[k]); - auto shape = std::get<2>(arg_stream_types[k]); + auto type = std::get<2>(arg_stream_types[k]); + auto shape = std::get<3>(arg_stream_types[k]); PrintIndent(stream, indent); stream << "CLMemObj source_" << k; stream << "((void*)knn_mat"; @@ -716,7 +604,7 @@ void GenHostCode(TVMArgs& args, stream << "// add them to the world\n"; for (size_t i = 0;i < arg_stream_types.size();i++) { PrintIndent(stream, indent); - stream << "digit_rec_world.addMemObj(source_" << i; + stream << "world.addMemObj(source_" << i; stream << ");\n"; } @@ -741,15 +629,13 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "// add them to the world\n"; PrintIndent(stream, indent); - stream << "digit_rec_world.addKernel(App);\n"; + stream << "world.addKernel(App);\n"; stream << "\n"; PrintIndent(stream, indent); stream << "// set kernel arguments\n"; - // PrintIndent(stream, indent); - // stream << "digit_rec_world.setConstKernelArg(0, 0, arg_top_0);\n"; for (size_t i = 0; i < arg_stream_types.size(); i++) { PrintIndent(stream, indent); - stream << "digit_rec_world.setMemKernelArg(0, "<< i << ", " << i; + stream << "world.setMemKernelArg(0, "<< i << ", " << i; stream << ");\n"; } @@ -757,27 +643,152 @@ void GenHostCode(TVMArgs& args, PrintIndent(stream, indent); stream << "// run\n"; PrintIndent(stream, indent); - stream << "digit_rec_world.runKernels();\n\n"; + stream << "world.runKernels();\n\n"; PrintIndent(stream, indent); stream << "// read the data back\n"; for (size_t i = args.size() - 1; i < arg_stream_types.size(); i++) { PrintIndent(stream, indent); - stream << "digit_rec_world.readMemObj(" << i << ");\n"; + stream << "world.readMemObj(" << i << ");\n"; + } +} + +// generate host code according to platform type +void GenHostCode(TVMArgs& args, + const std::vector& shmids, + const std::vector& arg_types, + LoweredFunc lowered_func, + std::string platform, + std::string host_code, + argInfo& arg_info) { + int indent = 0; + std::ofstream stream; + stream.open("__tmp__/host.cpp"); + GenHostHeaders(stream, platform); + + stream << "int main(int argc, char ** argv) {\n"; + indent += 2; + + int cnt = 0; // label the constant value + for (int i = 0; i < args.size(); i++) { + if (args[i].type_code() == kArrayHandle) { + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << "* "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << "*)"; + stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + PrintIndent(stream, indent); + + stream << Type2Byte(arg_types[i]) << " "; + stream << std::get<0>(arg_info[i]); + TVMArray* arr = args[i]; + + stream << "["; + for (int j = 0; j < arr->ndim; j++) { + if (j == arr->ndim - 1) { + stream << arr->shape[j]; + } else { + stream << arr->shape[j]; + stream << " * "; + } + } + stream << "];\n"; + PrintCopy(arr, arg_info, stream, indent, i); + + } else { + // directly assign the value to the variable + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Byte(arg_types[i]) << ")"; + if (args[i].type_code() == kDLInt || + args[i].type_code() == kDLUInt) { + stream << int64_t(args[i]); + } + stream << ";\n"; + PrintIndent(stream, indent); + stream << Type2Byte(arg_types[i]) << " "; + stream << "arg_top_" << i; + stream << "[1] = { "; + + stream << "arg_" << i << " }"; + if (arg_types[i].fracs > 0) + stream << " >> " << static_cast(arg_types[i].fracs); + stream << ";\n"; + cnt += 1; + } + stream << "\n"; + } + + // allocate mem for stream vars + for (size_t k = args.size(); k < arg_info.size(); k++) { + auto type = std::get<2>(arg_info[k]); + auto shape = std::get<3>(arg_info[k]); + PrintIndent(stream, indent); + stream << Type2Byte(Type2TVMType(type)) << " " << "name["; + if (shape.size() > 0) { + for (size_t i = 0; i < shape.size(); i++) { + if (i != shape.size() - 1) + stream << shape[i] << " * "; + else stream << shape[i]; + } + } else { + stream << "1"; + } + stream << "];\n"; + } + + // generate host side (before kernel) + PrintIndent(stream, indent); + stream << "printf(\"Finished setting up shared memory\\n\");\n"; + PrintIndent(stream, indent); + stream << "// compute bofore kernel function\n"; + size_t pos = host_code.find("top("); + std::string pre_kernel = host_code.substr(0, pos -1); + std::string post_kernel = host_code.substr(host_code.find('\n', pos) + 1); + pre_kernel = pre_kernel.substr(pre_kernel.find_first_not_of("\n")); + pre_kernel = pre_kernel.substr(pre_kernel.find_first_not_of(" ")); + PrintIndent(stream, indent); + stream << pre_kernel << "\n"; + + if (platform == "sdaccel") + KernelInit(stream, platform, args, + arg_types, arg_info); + + else if (platform == "vivado_hls") { + // init hls stream channels + for (size_t k = 0; k < arg_info.size(); k++) { + auto info = arg_info[k]; + if (std::get<1>(info)) { + PrintIndent(stream, indent); + stream << "int fd_" << std::get<0>(info) + << " = open(\"" << "/dev/xillybus_read_32" + << "\", O_WRONLY);" << "\n"; + } + } + stream << "\n"; + PrintIndent(stream, indent); + // create kernel call from host + stream << "top("; + for (size_t i = 0; i < arg_info.size(); i++) { + auto info = arg_info[i]; + auto name = std::get<0>(info); + if (i != 0) stream << ", "; + stream << name; + } + stream << ");\n"; } // generate host (post-kernel) - stream << "\n"; PrintIndent(stream, indent); stream << "// compute after kernel function\n"; - // stream being axis interface host, channel for kernel stream << post_kernel; // copy to shared mem for (int i = 0; i < args.size(); i++) { if (args[i].type_code() == kArrayHandle) { TVMArray* arr = args[i]; - PrintCopyBack(arr, stream, indent, i); - // PrintCopyBack2(arr, stream, indent, i); + PrintCopyBack(arr, arg_info, stream, indent, i); PrintIndent(stream, indent); stream << "shmdt("; stream << "arg_" << i << ");\n"; diff --git a/tvm/src/codegen/build_helper.h b/tvm/src/codegen/build_util.h similarity index 87% rename from tvm/src/codegen/build_helper.h rename to tvm/src/codegen/build_util.h index ed30efd49..ca95364c1 100644 --- a/tvm/src/codegen/build_helper.h +++ b/tvm/src/codegen/build_util.h @@ -1,7 +1,7 @@ /*! - * Copyright (c) 2017 by Contributors + * Copyright (c) 2019 by Contributors * Common build utilities - * \file build_common.h + * \file build_util.h */ #ifndef TVM_CODEGEN_BUILD_HELPER_H_ #define TVM_CODEGEN_BUILD_HELPER_H_ @@ -14,6 +14,9 @@ namespace TVM { namespace runtime { +using argInfo = + std::vector>>; + // get current work directory std::string getpath(void); void PrintIndent(std::ofstream& stream, int indent); @@ -52,15 +55,16 @@ void GenKernelCode(std::string test_file); void GenWrapperCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, - const std::vector>>& arg_stream_types, + argInfo& arg_info, LoweredFunc func); void GenHostCode(TVMArgs& args, const std::vector& shmids, const std::vector& arg_types, LoweredFunc func, + std::string platform, std::string host_code, - std::vector>>& arg_stream_types); + argInfo& arg_info); } // namespace runtime } // namespace TVM #endif // TVM_CODEGEN_BUILD_HELPER_H_ diff --git a/tvm/src/codegen/codegen.cc b/tvm/src/codegen/codegen.cc index 996d40744..36d3f39e9 100644 --- a/tvm/src/codegen/codegen.cc +++ b/tvm/src/codegen/codegen.cc @@ -89,6 +89,5 @@ std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { << "#endif\n"; return os.str(); } - } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index fc656601d..23ba899ca 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -8,6 +8,7 @@ #include #include #include "./codegen_c.h" +#include "./merlinc/codeanalys_merlinc.h" #include "../arithmetic/compute_expr.h" namespace TVM { @@ -15,6 +16,121 @@ namespace codegen { using namespace ir; +// collect type info for vars +class TypeCollector final : public IRVisitor { + public: + var2nameType& top_args_; + TypeCollector(var2nameType& top_args) + : top_args_(top_args) {} + void Visit_(const Allocate *op) { + auto v = op->buffer_var.get(); + + // record type and shape + if (top_args_.count(v)) { + std::vector shape; + for (size_t i = 0; i < op->extents.size(); i++) + shape.push_back(op->extents[i].as()->value); + top_args_[v] = std::make_tuple( + std::get<0>(top_args_[v]), + op->type, shape); + } + IRVisitor::Visit_(op); + } +}; + +// record of vars for top func signature +// vars include passed-in and not registered vars on host +class StreamCollector final : public IRVisitor { + public: + StreamCollector(std::vector& arg_vars, + std::unordered_map& stream_table, + std::string initial_scope) + : arg_vars_(arg_vars), + stream_table_(stream_table), + scope_(initial_scope) {} + + // record alloc on host + void Visit_(const Allocate *op) { + if (!switch_on) + this->HandleDef(op->buffer_var.get()); + IRVisitor::Visit_(op); + } + + void Visit_(const Load *op) { + if (!switch_on) { + this->HandleUse(op->buffer_var); + } + IRVisitor::Visit_(op); + } + + // update placeholder status + void Visit_(const Store* op) { + if (!switch_on) { // count use on host + if (auto val = op->value.as()) + this->HandleDef(op->buffer_var.get()); + this->HandleUse(op->buffer_var); + } + IRVisitor::Visit_(op); + } + + void Visit_(const StreamStmt* op) { + if (!switch_on) { // in host scope + this->HandleDef(op->buffer_var.get()); + } + IRVisitor::Visit_(op); + } + + void Visit_(const AttrStmt* op) { + if (op->attr_key == attr::device_scope) { + if (op->value.as()->value != scope_) + switch_on = true; + else switch_on = false; + } + IRVisitor::Visit_(op); + } + + // additional data saved into stream table (for streamed + // data we keep the new id for arg_stream in var_idmap, + // and non-streamed using the repalced arg_top_k name) + void HandleDef(const Variable* v) { + CHECK(!host_def_count_.count(v)) + << "variable " << v->name_hint + << " has already been defined, the Stmt is not SSA"; + CHECK(!host_use_count_.count(v)) + << "variable " << v->name_hint + << " has been used before definition!"; + host_use_count_[v] = 0; + host_def_count_[v] = 1; + } + + void HandleUse(const Expr& v) { + CHECK(v.as()); + Var var(v.node_); + auto it = host_use_count_.find(var.get()); + if (it != host_use_count_.end()) { + if (it->second >= 0) { + ++it->second; + } + } else { + if (!stream_table_.count(var.get())) { + host_undefined_.push_back(var); + host_use_count_[var.get()] = -1; + } + } + } + + bool host_scope_{false}; + Array host_undefined_; + std::unordered_map host_use_count_; + std::unordered_map host_def_count_; + + private: + std::vector& arg_vars_; + std::unordered_map& stream_table_; + std::string scope_; + bool switch_on{true}; +}; + void CodeGenC::Init(bool output_ssa) { print_ssa_form_ = output_ssa; } @@ -27,41 +143,45 @@ void CodeGenC::InitFuncState(LoweredFunc f) { CodeGenSourceBase::ClearFuncState(); } -void CodeGenC::AddFunction(LoweredFunc f) { +void CodeGenC::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { // clear previous generated state. this->InitFuncState(f); + map_arg_type_ = map_arg_type; // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { RegisterHandleType(kv.first.get(), kv.second.type()); } - // second move to generate + // generate function signature this->stream << "void " << f->name << "("; for (size_t i = 0; i < f->args.size(); ++i) { Var v = f->args[i]; std::string vid = AllocVarID(v.get()); if (i != 0) stream << ", "; - if (v.type().is_handle()) { - auto it = alloc_storage_scope_.find(v.get()); - if (it != alloc_storage_scope_.end()) - PrintStorageScope(it->second, stream); - stream << ' '; - - if (handle_data_type_.count(v.get())) { - PrintType(handle_data_type_.at(v.get()), stream); - } else { - stream << "void"; - } - stream << "*"; - - if (f->is_restricted && restrict_keyword_.length() != 0) { - stream << ' ' << restrict_keyword_; - } + // check type in the arg map + if (map_arg_type.find(vid) == map_arg_type.end()) { + LOG(WARNING) << vid << " type not found\n"; + PrintType(v.type(), this->stream); + this->stream << ' ' << vid; } else { - PrintType(v.type(), stream); + auto arg = map_arg_type[vid]; + PrintType(std::get<1>(arg), this->stream); + this->stream << "* " << std::get<0>(arg); + const BufferNode* buf = f->api_args[i].as(); + if (v.type().is_handle() && buf) { + std::vector shape; + for (size_t i = 0; i < buf->shape.size(); i++) + shape.push_back(buf->shape[i].as()->value); + arg_shapes.push_back(shape); + var_shape_map_[buf->data.get()] = buf->shape; + auto it = alloc_storage_scope_.find(v.get()); + if (it != alloc_storage_scope_.end()) + PrintStorageScope(it->second, stream); + } } - stream << ' ' << vid; } + stream << ") {\n"; int func_scope = this->BeginScope(); this->PrintStmt(f->body); @@ -95,17 +215,6 @@ std::string CodeGenC::Finish() { << "){\n" << device_stream.str(); if (fpga_scope_) device << stream.str(); else host_stream << stream.str(); - // finish host call stmt - // if (top_data_type_.size() > 0) { - // int i = 0; - // for (const auto & kv : top_data_type_) { - // // PrintType(kv.second, host_stream); - // if (i != 0) host_stream << ", "; - // host_stream << kv.first; - // i++; - // } - // host_stream << ");\n"; - // } device << "}\n"; return decl_stream.str() + "\n{device}\n" + module_stream.str() + device.str() + "\n{device}\n" + @@ -756,19 +865,22 @@ void CodeGenC::VisitExpr_(const GetSlice *op, std::ostream& os) { // NOLINT(*) } void CodeGenC::VisitExpr_(const SetBit *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "SetBit is not implemented yet"; + LOG(FATAL) << "SetBit is not implemented yet in C"; } void CodeGenC::VisitExpr_(const SetSlice *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "SetSlice is not implemented yet"; + LOG(FATAL) << "SetSlice is not implemented yet in C"; } void CodeGenC::VisitExpr_(const Quantize *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "Quantize is not yet support"; + LOG(FATAL) << "Quantize is not yet support in C"; } void CodeGenC::VisitExpr_(const StreamExpr *op, std::ostream& os) { // NOLINT(*) - LOG(FATAL) << "StreamExpr is not implemented yet"; + auto v = op->buffer_var.get(); + auto it = var_idmap_.find(v); + CHECK(it != var_idmap_.end()) + << "variable " << v->name_hint << " not decalred"; } void CodeGenC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) @@ -781,32 +893,56 @@ void CodeGenC::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) } void CodeGenC::VisitStmt_(const StreamStmt *op) { // NOLINT(*) - LOG(FATAL) << "StreamStmt is not implemented yet"; + CHECK(!var_idmap_.count(op->buffer_var.get())); + std::string vid = AllocVarID(op->buffer_var.get()); + vid = GetVarID(op->value.as()->buffer_var.get()); + PrintIndent(); + auto load_op = op->value.as(); + auto v = load_op->buffer_var.as(); + // placeholder args using recv name + if (stream_table.count(v)) { + auto tuple = arg_top_vars[v]; + arg_top_vars[v] = std::make_tuple(vid, std::get<1>(tuple), + std::get<2>(tuple)); + stream_table[v] = true; + } // else: streamed externop defined in analysis + // PrintExpr(op->value, stream); + // stream << vid << ".write()\n"; } void CodeGenC::VisitStmt_(const LetStmt* op) { std::string value = PrintExpr(op->value); + // Skip the argument retrieving assign statement + std::string vid = AllocVarID(op->var.get()); if (print_ssa_form_) { CHECK(!var_idmap_.count(op->var.get())); var_idmap_[op->var.get()] = value; } else { PrintIndent(); - if (op->var.type() == Handle() && - handle_data_type_.count(op->var.get())) { - PrintType(handle_data_type_.at(op->var.get()), stream); - stream << "* " - << AllocVarID(op->var.get()) - << " = ("; - PrintType(handle_data_type_.at(op->var.get()), stream); - stream << "*)" << value << ";\n"; - } else { + if (op->var.type() != Handle() && + value.find("TVMArray") == std::string::npos && + value.find("arg") != 0) { + PrintIndent(); PrintType(op->var.type(), this->stream); this->stream << ' ' - << AllocVarID(op->var.get()) + << vid << " = " << value << ";\n"; + // modify var idmap for passed in args + } else if (value.find("data") != std::string::npos || + value.substr(0, 3) == "arg") { + auto v = op->var.get(); + arg_vars.push_back(v); + stream_table[v] = false; + std::string api_name = "arg" + std::to_string(arg_count); + auto arg = map_arg_type_[api_name]; + // PrintType(std::get<1>(arg), arg_stream); + CHECK(arg_count < arg_shapes.size()); + auto shape = arg_shapes[arg_count]; + arg_top_vars[v] = std::make_tuple(vid, std::get<1>(arg), shape); + arg_count += 1; } + PrintStmt(op->body); } - PrintStmt(op->body); } void CodeGenC::VisitStmt_(const Allocate* op) { @@ -841,44 +977,6 @@ void CodeGenC::VisitStmt_(const Allocate* op) { this->PrintStmt(op->body); } -// record vars transferred between xcel and host -// collect info of needed args & streamed args (types) -class StreamCollector final : public IRVisitor { - public: - StreamCollector(std::vector& stream_stmt_list, - std::vector& stream_expr_list, - std::string initial_scope) - : stream_stmt_list_(stream_stmt_list), - stream_expr_list_(stream_expr_list), - scope_(initial_scope) {} - - void Visit_(const StreamExpr* op) { - if (switch_on) { - stream_expr_list_.push_back(op); - } - } - - void Visit_(const StreamStmt* op) { - if (switch_on) - stream_stmt_list_.push_back(op); - } - - void Visit_(const AttrStmt* op) { - if (op->attr_key == attr::device_scope) { - if (op->value.as()->value != scope_) - switch_on = true; - else switch_on = false; - } - this->Visit(op->body); - } - - private: - std::vector& stream_stmt_list_; - std::vector& stream_expr_list_; - std::string scope_; - bool switch_on{false}; -}; - void CodeGenC::VisitStmt_(const AttrStmt* op) { if (op->attr_key == ir::attr::thread_extent) { IterVar iv(op->node.node_); @@ -895,6 +993,65 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { const Variable* v = op->node.as(); CHECK(v); volatile_buf_.insert(v); + } else if (op->attr_key == ir::attr::device_scope) { + // print top( ... in host and enter fpga scope + if (op->value.as()->value == "fpga" && !fpga_scope_) { + fpga_scope_ = true; + PrintIndent(); + + // track the stream usage + StreamCollector collector(arg_vars, stream_table, "cpu"); + collector.Visit(op->body); + + // update data type and name + for (auto k : collector.host_undefined_) { + auto v = k.get(); + arg_vars.push_back(v); + stream_table[v] = true; + LOG(WARNING) << v->name_hint; + auto tuple = arg_top_vars[v]; + arg_top_vars[v] = std::make_tuple(v->name_hint, + std::get<1>(tuple), + std::get<2>(tuple)); + } + TypeCollector visitor(arg_top_vars); + visitor.Visit(op->body); + + // generte function calls + stream << "top("; + int index = 0; + for (size_t i = 0; i < arg_vars.size(); i++) { + auto v = arg_vars[i]; + std::string arg_name; + if (stream_table[v]) + arg_name = std::get<0>(arg_top_vars[v]); + else arg_name = GetVarID(v); + if (index !=0) stream << ", "; + stream << arg_name; + // print kernel func signature + if (index != 0) arg_stream << ", "; + PrintType(std::get<1>(arg_top_vars[v]), arg_stream); + auto shape = std::get<2>(arg_top_vars[v]); + arg_stream << " " << arg_name; + for (size_t k = 0; k < shape.size(); k++) + arg_stream << "[" << shape[k] << "]"; + index++; + } + stream << ");\n"; + + // switch context to device scope + host_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + + // swtich from device to host + } else if (op->value.as()->value == "cpu" && + fpga_scope_) { + fpga_scope_ = false; + device_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + } } this->PrintStmt(op->body); } diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index 03c8b2292..c7260d83a 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -15,6 +15,7 @@ #include #include #include "./codegen_source_base.h" +#include "./merlinc/codeanalys_merlinc.h" #include "../runtime/thread_storage_scope.h" namespace TVM { @@ -50,7 +51,7 @@ class CodeGenC : * \brief Add the function to the generated module. * \param f The function to be compiled. */ - virtual void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); /*! * \brief Finalize the compilation and return the code. * \return The code. @@ -189,9 +190,11 @@ class CodeGenC : std::map > var_shape_map_save; std::unordered_map range_save; - // map for generating wrapper + // streaming vars information + size_t arg_count{0}; var2nameType arg_top_vars; std::vector arg_vars; + std::vector> arg_shapes; std::unordered_map stream_table; protected: diff --git a/tvm/src/codegen/codegen_cuda.cc b/tvm/src/codegen/codegen_cuda.cc index badbf2849..3c675ad06 100644 --- a/tvm/src/codegen/codegen_cuda.cc +++ b/tvm/src/codegen/codegen_cuda.cc @@ -25,9 +25,10 @@ void CodeGenCUDA::Init(bool output_ssa) { CHECK_EQ(vid_global_barrier_state_, runtime::symbol::tvm_global_barrier_state); } -void CodeGenCUDA::AddFunction(LoweredFunc f) { +void CodeGenCUDA::AddFunction(LoweredFunc f, + str2tupleMap map_arg_type) { this->stream << "extern \"C\" __global__ "; - CodeGenC::AddFunction(f); + CodeGenC::AddFunction(f, map_arg_type); } void CodeGenCUDA::VisitStmt_(const ir::For* op) { diff --git a/tvm/src/codegen/codegen_cuda.h b/tvm/src/codegen/codegen_cuda.h index e49a47ae3..e0c4f1a41 100644 --- a/tvm/src/codegen/codegen_cuda.h +++ b/tvm/src/codegen/codegen_cuda.h @@ -10,6 +10,7 @@ #include #include #include "./codegen_c.h" +#include "./merlinc/codeanalys_merlinc.h" namespace TVM { namespace codegen { @@ -18,7 +19,8 @@ class CodeGenCUDA final : public CodeGenC { public: CodeGenCUDA(); void Init(bool output_ssa); - void AddFunction(LoweredFunc f); + void AddFunction(LoweredFunc f, + str2tupleMap map_arg_type); // override behavior void VisitStmt_(const ir::For* op) final; void PrintStorageSync(const Call* op) final; diff --git a/tvm/src/codegen/hlsc/build_hlsc.cc b/tvm/src/codegen/hlsc/build_hlsc.cc index e373371a2..2494ee66f 100644 --- a/tvm/src/codegen/hlsc/build_hlsc.cc +++ b/tvm/src/codegen/hlsc/build_hlsc.cc @@ -46,7 +46,6 @@ std::string BuildHLSC(Array funcs) { cg.AddFunction(f, map_arg_type); } std::string code = cg.Finish(); - LOG(WARNING) << "HLS C doesn't have runtime, return kernel code"; return code; } diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index 7007f7e1c..880c258f9 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -15,49 +15,50 @@ namespace codegen { void CodeGenHLSC::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { - // Write header files - // TODO: Insert header files here - // Clear previous generated state - this->InitFuncState(f); - // Register alloc buffer type - for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.type()); - } - // Write entry function name - this->stream << "void " << f->name << "("; - // Write arguments - for (size_t i = 0; i < f->args.size(); ++i) { - Var v = f->args[i]; - std::string vid = AllocVarID(v.get()); - if (i != 0) this->stream << ", "; - if (map_arg_type.find(vid) == map_arg_type.end()) { - LOG(WARNING) << vid << " type not found\n"; - PrintType(v.type(), this->stream); - this->stream << ' ' << vid; - } - else { - auto arg = map_arg_type[vid]; - PrintType(std::get<1>(arg), this->stream); - this->stream << ' ' << std::get<0>(arg); - const BufferNode* buf = f->api_args[i].as(); - if (v.type().is_handle() && buf) { - var_shape_map_[buf->data.get()] = buf->shape; - for (size_t i = 0; i < buf->shape.size(); i++) { - this->stream << '['; - this->PrintExpr(buf->shape[i], this->stream); - this->stream << ']'; - } - } - // this->stream << "*"; TODO: create an option for this - } - } - stream << ") {\n"; - int func_scope = this->BeginScope(); - range_ = CollectIterRange(f->body); - this->PrintStmt(f->body); - this->EndScope(func_scope); - this->PrintIndent(); - this->stream << "}\n\n"; + CodeGenC::AddFunction(f, map_arg_type); + // // Write header files + // // TODO: Insert header files here + // // Clear previous generated state + // this->InitFuncState(f); + // // Register alloc buffer type + // for (const auto & kv : f->handle_data_type) { + // RegisterHandleType(kv.first.get(), kv.second.type()); + // } + // // Write entry function name + // this->stream << "void " << f->name << "("; + // // Write arguments + // for (size_t i = 0; i < f->args.size(); ++i) { + // Var v = f->args[i]; + // std::string vid = AllocVarID(v.get()); + // if (i != 0) this->stream << ", "; + // if (map_arg_type.find(vid) == map_arg_type.end()) { + // LOG(WARNING) << vid << " type not found\n"; + // PrintType(v.type(), this->stream); + // this->stream << ' ' << vid; + // } + // else { + // auto arg = map_arg_type[vid]; + // PrintType(std::get<1>(arg), this->stream); + // this->stream << ' ' << std::get<0>(arg); + // const BufferNode* buf = f->api_args[i].as(); + // if (v.type().is_handle() && buf) { + // var_shape_map_[buf->data.get()] = buf->shape; + // for (size_t i = 0; i < buf->shape.size(); i++) { + // this->stream << '['; + // this->PrintExpr(buf->shape[i], this->stream); + // this->stream << ']'; + // } + // } + // // this->stream << "*"; TODO: create an option for this + // } + // } + // stream << ") {\n"; + // int func_scope = this->BeginScope(); + // range_ = CollectIterRange(f->body); + // this->PrintStmt(f->body); + // this->EndScope(func_scope); + // this->PrintIndent(); + // this->stream << "}\n\n"; } std::string CodeGenHLSC::GetBufferRef(Type t, const Variable* buffer, Expr index) { @@ -98,19 +99,20 @@ void CodeGenHLSC::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) } void CodeGenHLSC::VisitStmt_(const LetStmt* op) { - std::string value = PrintExpr(op->value); - // Skip the argument retrieving assign statement - std::string vid = AllocVarID(op->var.get()); - if (op->var.type() != Handle() && - value.find("TVMArray") == std::string::npos && - value.find("arg") != 0) { - PrintIndent(); - PrintType(op->var.type(), this->stream); - this->stream << ' ' - << vid - << " = " << value << ";\n"; - } - PrintStmt(op->body); + CodeGenC::VisitStmt_(op); + // std::string value = PrintExpr(op->value); + // // Skip the argument retrieving assign statement + // std::string vid = AllocVarID(op->var.get()); + // if (op->var.type() != Handle() && + // value.find("TVMArray") == std::string::npos && + // value.find("arg") != 0) { + // PrintIndent(); + // PrintType(op->var.type(), this->stream); + // this->stream << ' ' + // << vid + // << " = " << value << ";\n"; + // } + // PrintStmt(op->body); } void CodeGenHLSC::GenForStmt(const For* op, std::string pragma, bool before) { diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.h b/tvm/src/codegen/hlsc/codegen_hlsc.h index c3dd0740d..fdd1747fa 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.h +++ b/tvm/src/codegen/hlsc/codegen_hlsc.h @@ -16,20 +16,20 @@ namespace TVM { namespace codegen { class CodeGenHLSC : public CodeGenC { - public: - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + public: + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void VisitExpr_(const Min* op, std::ostream& os) override; - void VisitExpr_(const Max* op, std::ostream& os) override; + void VisitExpr_(const Min* op, std::ostream& os) override; + void VisitExpr_(const Max* op, std::ostream& os) override; - void VisitStmt_(const LetStmt* op) override; - void VisitStmt_(const IfThenElse* op) override; - void VisitStmt_(const Allocate* op) override; + void VisitStmt_(const LetStmt* op) override; + void VisitStmt_(const IfThenElse* op) override; + void VisitStmt_(const Allocate* op) override; - void GenForStmt(const For* op, std::string pragma, bool before); - - protected: - std::string GetBufferRef(Type t, const Variable* buffer, Expr index); + void GenForStmt(const For* op, std::string pragma, bool before); + + protected: + std::string GetBufferRef(Type t, const Variable* buffer, Expr index); }; } // namespace codegen diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index 9e447488a..42f466a98 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -144,22 +144,16 @@ void CodeGenVivadoHLS::VisitStmt_(const Partition* op) { } void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); - // std::string vid = GetVarID(op->buffer_var.get()); - os << vid << ".read()"; + CodeGenC::VisitExpr_(op, os); + std::string vid = GetVarID(op->buffer_var.get()); + os << "read(fd_" << vid << ", (void*)&output, sizeof(output);"; } void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { - std::string vid; - if (!var_idmap_.count(op->buffer_var.get())) - vid = AllocVarID(op->buffer_var.get()); - else vid = GetVarID(op->buffer_var.get()); + CodeGenC::VisitStmt_(op); + std::string vid = GetVarID(op->buffer_var.get()); // std::string vid = GetVarID(op->buffer_var.get()); PrintIndent(); - stream << vid; switch (op->stream_type) { case StreamType::Channel: break; @@ -168,8 +162,11 @@ void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { case StreamType::Pipe: break; } - stream << ".write("; + vid = vid.substr(0, vid.find("_stream_send")); + stream << "write(" << "fd_" << vid + << ", " << "(void*)&"; PrintExpr(op->value, stream); + stream << ", sizeof(" << vid; stream << ");\n"; } diff --git a/tvm/src/codegen/hlsc/codegen_vhls.h b/tvm/src/codegen/hlsc/codegen_vhls.h index b6d8dbd39..a2dd5fa0e 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.h +++ b/tvm/src/codegen/hlsc/codegen_vhls.h @@ -16,7 +16,7 @@ namespace TVM { namespace codegen { -class CodeGenVivadoHLS : public CodeGenHLSC { +class CodeGenVivadoHLS final : public CodeGenHLSC { public: void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); void PrintType(Type t, std::ostream& os) override; diff --git a/tvm/src/codegen/hlsc/vhls_module.cc b/tvm/src/codegen/hlsc/vhls_module.cc index 7355c7894..1addcf58b 100644 --- a/tvm/src/codegen/hlsc/vhls_module.cc +++ b/tvm/src/codegen/hlsc/vhls_module.cc @@ -349,7 +349,7 @@ class VivadoHLSModuleNode final : public ModuleNode { LOG(CLEAN) << "Running C simulation ..."; system("./out"); LOG(CLEAN) << "Finished C simulation"; - // system("rm out main.cpp"); + system("rm out main.cpp"); FreeSharedMem(args, shmids, arg_sizes); }); } diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index 1ca8f6058..ddc1131f8 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -237,6 +237,5 @@ void CodeGenOpenCL::GenForStmt(const For* op, std::string pragma, bool before) { stream << "}\n"; } - } // namespace codegen } // namespace TVM diff --git a/tvm/src/codegen/opencl/codegen_opencl.h b/tvm/src/codegen/opencl/codegen_opencl.h index 9dd69842d..4f9a15fe5 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.h +++ b/tvm/src/codegen/opencl/codegen_opencl.h @@ -4,7 +4,6 @@ # include # include # include -// # include "./codeanalys_openclc.h" # include "../codegen_c.h" namespace TVM{ diff --git a/tvm/src/codegen/opencl/codegen_sdaccel.cc b/tvm/src/codegen/opencl/codegen_sdaccel.cc index 601e566f0..cba08fa2d 100644 --- a/tvm/src/codegen/opencl/codegen_sdaccel.cc +++ b/tvm/src/codegen/opencl/codegen_sdaccel.cc @@ -57,64 +57,6 @@ void CodeGenSDACCEL::AddFunction(LoweredFunc f, this->stream << "}\n\n"; } - -// void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) -// int lanes = t.lanes(); -// if (t.is_handle()) { -// CHECK_EQ(lanes, 1) -// << "do not yet support vector types"; -// os << "void*"; return; -// } -// if ( t== Bool() ) { -// os << "bool"; return; -// } -// bool fail = false; -// if (t.is_float()) { -// switch (t.bits()) { -// case 16: -// os << "half"; -// enable_fp16_ = true; -// break; -// case 32: -// os << "float"; -// break; -// case 64: -// os << "double"; -// enable_fp64_ = true; -// break; -// default: -// fail = true; -// break; -// } -// if (!fail && lanes == 1) return; -// if (!fail && (lanes >= 2 && lanes <= 16)) { -// os << lanes; return; -// } -// } else if (t.is_uint() || t.is_int()) { -// if (t.is_uint()) { -// os << 'u'; -// } -// if (t.bits() == 8 && t.lanes() == 4) { -// // directly 4 8 bit int in integer. -// os << "int"; return; -// } -// switch (t.bits()) { -// case 8: os << "char"; break; -// case 16: os << "short"; break; -// case 32: os << "int"; break; -// case 64: os << "long"; break; -// case 1: os << "int"; break; -// default: fail = true; break; -// } -// if (!fail && lanes == 1) return; -// if (!fail && (lanes >= 2 && lanes <= 16)) { -// os << lanes; return; -// } -// } -// LOG(FATAL) << "Cannot convert type " << t << " to SDAccel type"; -// } - - void CodeGenSDACCEL::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { @@ -194,10 +136,9 @@ void CodeGenSDACCEL::VisitStmt_(const For* op) { if (unroll_factor > 0) { os << "__attribute__((opencl_unroll_hint("; os << unroll_factor << ")))\n"; - } - else + } else { os << "\n"; - + } } else if (op->for_type == ForType::Pipelined) { int II = 1, i = 0; @@ -237,7 +178,7 @@ void CodeGenSDACCEL::VisitStmt_(const Partition* op) { } stream << op->factor << ","; stream << op->dim << ")))\n"; - }else { + } else { if (op->dim == 0) { stream << "__attribute__((xcl_array_partition))\n"; } else { diff --git a/tvm/src/codegen/opencl/sdaccel_module.cc b/tvm/src/codegen/opencl/sdaccel_module.cc index 3876f14d2..63f12e86b 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.cc +++ b/tvm/src/codegen/opencl/sdaccel_module.cc @@ -53,45 +53,12 @@ inline TVMType Type2TVMType(Type t) { return tt; } - -// inline std::string Type2Str(TVMType t) { -// std::string str = ""; -// if (t.code == kDLInt) { -// if (t.fracs > 0) str += "ap_fixed<"; -// else str += "ap_int<"; -// str += std::to_string(static_cast(t.bits)); -// if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; -// else str += ">"; -// } else if (t.code == kDLUInt) { -// if (t.fracs > 0) str += "ap_ufixed<"; -// else str += "ap_uint<"; -// str += std::to_string(static_cast(t.bits)); -// if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; -// else str += ">"; -// } else if (t.code == kDLFloat) { -// str += "float"; -// } else { -// LOG(FATAL) << "Unknown type"; -// } -// return str; -// } - inline std::string Type2Str(TVMType t) { std::string str = ""; if (t.code == kDLInt) { str += "int"; - // if (t.fracs > 0) str += "ap_fixed<"; - // else str += "ap_int<"; - // str += std::to_string(static_cast(t.bits)); - // if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - // else str += ">"; } else if (t.code == kDLUInt) { str += "unsigned int"; - // if (t.fracs > 0) str += "ap_ufixed<"; - // else str += "ap_uint<"; - // str += std::to_string(static_cast(t.bits)); - // if (t.fracs > 0) str += ", " + std::to_string(static_cast(t.bits - t.fracs)) + ">"; - // else str += ">"; } else if (t.code == kDLFloat) { str += "float"; } else { @@ -180,14 +147,6 @@ void FreeSharedMem(TVMArgs& args, const std::vector& shmids, std::vector& arg_sizes) { for (size_t i = 0; i < shmids.size(); i++) { - // if (args[i].type_code() == kArrayHandle) { - // TVMArray* arr = args[i]; - // int shmid = shmids[i]; - // void* mem = shmat(shmid, nullptr, 0); - // memcpy(arr->data, mem, arg_sizes[i]); - // shmdt(mem); - // shmctl(shmid, IPC_RMID, nullptr); - // } TVMArray* arr = args[i]; int shmid = shmids[i]; void* mem = shmat(shmid, nullptr, 0); @@ -461,20 +420,6 @@ void GenHostCode(TVMArgs& args, stream << "char* xclbinFilename = argv[1];\n"; stream << "\n"; - - // Source Memories - // std::vector source_a(LENGTH); - // for (int i = 0;i < args.size();i++) { - // PrintIndent(stream, indent); - // stream << Type2Str(arg_types[i]) << " "; - // stream << arg_types[i] << " "; - // stream << "arg_" << i; - // TVMArray* arr = args[i]; - // for (int j = 0;j < arr->ndim;j++) { - // stream << "[" << arr->shape[j] << "]"; - // } - // stream << ";\n"; - // } for (int i = 0;i < args.size();i++) { PrintIndent(stream, indent); stream << "std::vector<" << Type2Str(arg_types[i]); @@ -507,30 +452,17 @@ void GenHostCode(TVMArgs& args, stream << "\n"; for (int i = 0;i < args.size();i++ ) { - // if (args[i].type_code() == kArrayHandle) { - // // read from the shared memory - // PrintIndent(stream, indent); - // stream << Type2Str(arg_types[i]) << "* "; - // stream << "arg_" << i << " = "; - // stream << "(" << Type2Str(arg_types[i]) << "*)"; - // stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - // TVMArray* arr = args[i]; - // // copy from shared mem - // PrintCopy(arr, stream, indent, i); - // } - // read from the shared memory - PrintIndent(stream, indent); - stream << Type2Str(arg_types[i]) << "* "; - stream << "arg_" << i << " = "; - stream << "(" << Type2Str(arg_types[i]) << "*)"; - stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; - TVMArray* arr = args[i]; - // copy from shared mem - PrintCopy(arr, stream, indent, i); + // read from the shared memory + PrintIndent(stream, indent); + stream << Type2Str(arg_types[i]) << "* "; + stream << "arg_" << i << " = "; + stream << "(" << Type2Str(arg_types[i]) << "*)"; + stream << "shmat(" << shmids[i] << ", nullptr, 0);\n"; + TVMArray* arr = args[i]; + // copy from shared mem + PrintCopy(arr, stream, indent, i); } - - // Getting First Platform PrintIndent(stream, indent); stream << "std::vector platforms;\n"; @@ -540,7 +472,6 @@ void GenHostCode(TVMArgs& args, stream << "cl::Platform platform = platforms[0];\n"; stream << "\n"; - // Getting ACCELERATOR Devices and selecting 1st such device PrintIndent(stream, indent); stream << "std::vector devices;\n"; @@ -557,7 +488,6 @@ void GenHostCode(TVMArgs& args, stream << "cl::CommandQueue q(context, device);\n"; stream << "\n"; - // Loading XCL Bin into char buffer PrintIndent(stream, indent); stream << "std::ifstream bin_file(xclbinFilename, std::ifstream::binary);\n"; @@ -573,7 +503,6 @@ void GenHostCode(TVMArgs& args, stream << "bin_file.read(buf, nb);\n"; stream << "\n"; - // Creating Program from Binary File PrintIndent(stream, indent); stream << "cl::Program::Binaries bins;\n"; @@ -585,7 +514,6 @@ void GenHostCode(TVMArgs& args, stream << "cl::Program program(context, devices, bins);\n"; stream << "\n"; - // Creating Kernel and Functor of Kernel PrintIndent(stream, indent); stream << "int err1;\n"; @@ -600,13 +528,9 @@ void GenHostCode(TVMArgs& args, stream << "cl::Buffer&, "; } } - // stream << "auto default_function = cl::KernelFunctor(kernel);\n"; stream << "\n"; - // Creating Buffers inside Device - // cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, vector_size_bytes); - // cl::Buffer buffer_b(context, CL_MEM_WRITE_ONLY, vector_size_bytes); for (int i = 0;i < args.size();i++) { PrintIndent(stream, indent); stream << "cl::Buffer buffer_" << i; @@ -615,7 +539,6 @@ void GenHostCode(TVMArgs& args, stream << "\n"; // Copying input data to Device buffer from host memory - // q.enqueueWriteBuffer(buffer_a, CL_TRUE, 0, vector_size_bytes, source_a.data()); for (int i = 0;i < args.size();i++) { PrintIndent(stream, indent); stream << "q.enqueueWriteBuffer(buffer_" << i; @@ -634,14 +557,11 @@ void GenHostCode(TVMArgs& args, stream << ", "; } stream << ");\n"; - PrintIndent(stream, indent); stream << "q.finish();\n"; stream << "\n"; - // Copying Device result data to Host memory - // q.enqueueReadBuffer(buffer_c, CL_TRUE, 0, vector_size_bytes, result_krnl.data()); for (int i = 0;i < args.size(); i++) { PrintIndent(stream, indent); stream << "q.enqueueReadBuffer(buffer_" << i; @@ -674,7 +594,6 @@ class SDAccelModuleNode final : public ModuleNode { const char* type_key() const { return "sdaccel_sw_emu"; - } PackedFunc GetFunction( diff --git a/tvm/src/codegen/opencl/sdaccel_module.h b/tvm/src/codegen/opencl/sdaccel_module.h index 313f08214..01f361dba 100644 --- a/tvm/src/codegen/opencl/sdaccel_module.h +++ b/tvm/src/codegen/opencl/sdaccel_module.h @@ -1,7 +1,3 @@ -/* - Yang.Bai - yb269@cornell.edu -*/ #ifndef SDACCEL_MODULE_H #define SDACCEL_MODULE_H diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 17b6b71bf..a4caac6ef 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -469,7 +469,6 @@ Tensor Schedule::move_to(const Tensor& target, consumer_input_placeholders.push_back(target_buffer); consumer_output_placeholders.push_back(consumer_buffer); - // stream statement // std::vector csm_indices; // std::vector csm_loop_vars; // for (size_t i = 0; i < target->shape.size(); i++) { @@ -486,18 +485,29 @@ Tensor Schedule::move_to(const Tensor& target, load_expr, stream_type, channel_depth); - // handle placeholder back to host case + + Expr sender_scope, receiver_scope; size_t consumer_pos = min_pos; switch (device_type) { case DeviceType::CPU: consumer_pos = num_stage; + sender_scope = StringImm::make("fpga"); + receiver_scope = StringImm::make("cpu"); break; case DeviceType::FPGA: + sender_scope = StringImm::make("cpu"); + receiver_scope = StringImm::make("fpga"); break; case DeviceType::GPU: + sender_scope = StringImm::make("cpu"); + receiver_scope = StringImm::make("gpu"); break; } + consumer_body = AttrStmt::make( + consumer_buffer->data, + "device_scope", sender_scope, consumer_body); + // for (size_t j = 0; j < target->shape.size(); j++) { // consumer_body = For::make( // VarExpr(csm_loop_vars[j]), @@ -515,6 +525,7 @@ Tensor Schedule::move_to(const Tensor& target, // n->input_placeholders = consumer_input_placeholders; // n->output_placeholders = consumer_output_placeholders; // Operation consumer_op(n); + Operation consumer_op = ExternOpNode::make(consumer_name, "", Array(), @@ -523,6 +534,15 @@ Tensor Schedule::move_to(const Tensor& target, consumer_output_placeholders, consumer_body); Stage consumer_stage = Stage(consumer_op); + // insert sender before bound for (host,xcel <- host) case + if (device_type == DeviceType::FPGA) { + if (split_bound == 0) + split_bound = consumer_pos + 1; + else { // insert host sender before bound + consumer_pos = split_bound; + split_bound += 1; + } + } stages->data.insert(stages->data.begin() + consumer_pos, consumer_stage.node_); (*this)->stage_map.Set(consumer_op, consumer_stage); @@ -568,22 +588,11 @@ Tensor Schedule::move_to(const Tensor& target, // DeviceAPI::None, // for_stmt); // } - Expr device; - switch (device_type) { - case DeviceType::CPU: - device = StringImm::make("cpu"); - break; - case DeviceType::FPGA: - device = StringImm::make("fpga"); - break; - case DeviceType::GPU: - device = StringImm::make("gpu"); - break; - } + // attr annotates new scope Stmt body = AttrStmt::make( target_buffer->data, - "device_scope", device, for_stmt); + "device_scope", receiver_scope, for_stmt); Tensor producer = ExternOpNode::make(producer_buffer->name, "", Array(), @@ -592,10 +601,13 @@ Tensor Schedule::move_to(const Tensor& target, producer_output_placeholders, body).output(0); - // create new stage and return stream tensors + // recv stage creation + return tensor Stage producer_stage = Stage(producer->op); size_t pos = FindNodeRef(stages, consumer_stage); - stages->data.insert(stages->data.begin() + pos + 1, producer_stage.node_); + if (split_bound == 0 || device_type == DeviceType::CPU) + pos = pos + 1; + else pos = split_bound + 1; // insert to xcel range + stages->data.insert(stages->data.begin() + pos, producer_stage.node_); (*this)->stage_map.Set(producer->op, producer_stage); // update consumer stages with new tensor and buffer @@ -618,7 +630,7 @@ Tensor Schedule::move_to(const Tensor& target, Stmt new_body = AttrStmt::make( target_buffer->data, "device_scope", - device, + receiver_scope, op->body); s->op = ExternOpNode::make( op->name, diff --git a/tvm/src/template/design/CLKernel.cpp b/tvm/src/template/sdaccel/CLKernel.cpp similarity index 100% rename from tvm/src/template/design/CLKernel.cpp rename to tvm/src/template/sdaccel/CLKernel.cpp diff --git a/tvm/src/template/design/CLKernel.h b/tvm/src/template/sdaccel/CLKernel.h similarity index 100% rename from tvm/src/template/design/CLKernel.h rename to tvm/src/template/sdaccel/CLKernel.h diff --git a/tvm/src/template/design/CLMemObj.cpp b/tvm/src/template/sdaccel/CLMemObj.cpp similarity index 100% rename from tvm/src/template/design/CLMemObj.cpp rename to tvm/src/template/sdaccel/CLMemObj.cpp diff --git a/tvm/src/template/design/CLMemObj.h b/tvm/src/template/sdaccel/CLMemObj.h similarity index 100% rename from tvm/src/template/design/CLMemObj.h rename to tvm/src/template/sdaccel/CLMemObj.h diff --git a/tvm/src/template/design/CLWorld.cpp b/tvm/src/template/sdaccel/CLWorld.cpp similarity index 100% rename from tvm/src/template/design/CLWorld.cpp rename to tvm/src/template/sdaccel/CLWorld.cpp diff --git a/tvm/src/template/design/CLWorld.h b/tvm/src/template/sdaccel/CLWorld.h similarity index 100% rename from tvm/src/template/design/CLWorld.h rename to tvm/src/template/sdaccel/CLWorld.h diff --git a/tvm/src/template/design/Makefile b/tvm/src/template/sdaccel/Makefile similarity index 100% rename from tvm/src/template/design/Makefile rename to tvm/src/template/sdaccel/Makefile diff --git a/tvm/src/template/design/harness.mk b/tvm/src/template/sdaccel/harness.mk similarity index 100% rename from tvm/src/template/design/harness.mk rename to tvm/src/template/sdaccel/harness.mk diff --git a/tvm/src/template/design/run.tcl b/tvm/src/template/sdaccel/run.tcl similarity index 100% rename from tvm/src/template/design/run.tcl rename to tvm/src/template/sdaccel/run.tcl diff --git a/tvm/src/template/design/run_hw.sh b/tvm/src/template/sdaccel/run_hw.sh similarity index 100% rename from tvm/src/template/design/run_hw.sh rename to tvm/src/template/sdaccel/run_hw.sh diff --git a/tvm/src/template/design/run_sw.sh b/tvm/src/template/sdaccel/run_sw.sh similarity index 100% rename from tvm/src/template/design/run_sw.sh rename to tvm/src/template/sdaccel/run_sw.sh diff --git a/tvm/src/template/design/utils.cpp b/tvm/src/template/sdaccel/utils.cpp similarity index 100% rename from tvm/src/template/design/utils.cpp rename to tvm/src/template/sdaccel/utils.cpp diff --git a/tvm/src/template/design/utils.h b/tvm/src/template/sdaccel/utils.h similarity index 100% rename from tvm/src/template/design/utils.h rename to tvm/src/template/sdaccel/utils.h From 280ae2f8ef7e0fabc0489dbdccff841ff993fc74 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Fri, 6 Dec 2019 23:53:14 -0500 Subject: [PATCH 097/103] [update] codegen construct for streaming --- hlib/python/hlib/__init__.py | 2 +- samples/conv/conv.py | 70 +++++ samples/sobel/sobel.py | 4 +- tvm/HalideIR/src/ir/IR.cpp | 2 +- tvm/HalideIR/src/ir/IR.h | 4 +- tvm/src/codegen/build_common.cc | 22 -- tvm/src/codegen/build_util.cc | 6 +- tvm/src/codegen/codegen_c.cc | 244 +++++++++--------- tvm/src/codegen/codegen_c.h | 58 ++++- tvm/src/codegen/hlsc/codegen_vhls.cc | 201 ++++++++++++++- tvm/src/codegen/hlsc/codegen_vhls.h | 6 + tvm/src/codegen/opencl/codegen_aocl.cc | 38 ++- tvm/src/codegen/opencl/codegen_aocl.h | 28 +- tvm/src/schedule/compute_primitive.cc | 109 -------- tvm/src/schedule/schedule_dataflow_rewrite.cc | 98 +++---- 15 files changed, 535 insertions(+), 357 deletions(-) create mode 100644 samples/conv/conv.py diff --git a/hlib/python/hlib/__init__.py b/hlib/python/hlib/__init__.py index 3d0ef336b..416239f3a 100644 --- a/hlib/python/hlib/__init__.py +++ b/hlib/python/hlib/__init__.py @@ -1 +1 @@ -from . import nn, ppac +from . import nn diff --git a/samples/conv/conv.py b/samples/conv/conv.py new file mode 100644 index 000000000..f350f4d03 --- /dev/null +++ b/samples/conv/conv.py @@ -0,0 +1,70 @@ +import heterocl as hcl +import hlib +import numpy as np +from PIL import Image +from urllib.request import urlopen + +batch_size = 1 +hcl.init(hcl.UInt(32)) +dtype = hcl.UInt(32) +image_size = () +kernel_size = 3 + +# setup target using vivado +tool = hcl.tool.vivado("csim") +target = hcl.platform.zc706 + +def conv(): + image = hcl.placeholder((batch_size, 1, 256, 256), "input_image") + k1 = hcl.placeholder((1, 1, 3, 3), "kernel_1") + k2 = hcl.placeholder((1, 1, 3, 3), "kernel_2") + + def kernel(input_image, kernel_1, kernel_2): + + # return tensor required (cannot do def_()) + interm_shape = (1,1,254,254) + output_shape = (1,1,252,252) + + # make compute wrapped in hcl def + module1 = hcl.def_([input_image.shape, kernel_1.shape, interm_shape], name="conv1")(hlib.nn.conv2d_nchw_imp) + module2 = hcl.def_([interm_shape, kernel_2.shape, output_shape], name="conv2")(hlib.nn.conv2d_nchw_imp) + conv1 = hcl.compute(interm_shape, lambda *args: 0) + conv2 = hcl.compute(output_shape, lambda *args: 0) + module1(input_image, kernel_1, conv1) + module2(conv1, kernel_2, conv2) + + # derivative module for normalization + return hcl.compute(output_shape, lambda *args: conv2[args], name="derv") + + s = hcl.create_schedule([image, k1, k2], kernel) + + # data moved to local + i0, k10 = s.to([image, k1], target.fpga) + s.to([i0, k10], s[kernel.conv1]) + s.to(kernel.derv, target.cpu) + + # create stream channel between modules + print(type(target.fpga), hcl.lower(s)) + return hcl.build(s, target) + +# Load sample data +img = Image.open(urlopen('http://i.stack.imgur.com/8zINU.gif')) +kernel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) +kernel_y = np.flip(kernel_x.T.T, axis=0) +img = np.array(img) + +img = img[np.newaxis, ...] +img = img[np.newaxis, ...] +kernel_x = kernel_x[np.newaxis, ...] +kernel_x = kernel_x[np.newaxis, ...] +kernel_y = kernel_y[np.newaxis, ...] +kernel_y = kernel_y[np.newaxis, ...] + +hcl_input = hcl.asarray(img, dtype) +kernel_x = hcl.asarray(kernel_x, dtype) +kernel_y = hcl.asarray(kernel_y, dtype) +hcl_output = hcl.asarray(np.zeros((1,1,254,254)), dtype) + +f = conv() +f(hcl_input, kernel_x, kernel_y, hcl_output) + diff --git a/samples/sobel/sobel.py b/samples/sobel/sobel.py index 61a6cb9a3..a4299d8ae 100644 --- a/samples/sobel/sobel.py +++ b/samples/sobel/sobel.py @@ -5,8 +5,8 @@ from urllib.request import urlopen batch_size = 1 -hcl.init(hcl.Float()) -dtype = hcl.Float() +hcl.init(hcl.UInt(32)) +dtype = hcl.UInt(32) image_size = () kernel_size = 3 diff --git a/tvm/HalideIR/src/ir/IR.cpp b/tvm/HalideIR/src/ir/IR.cpp index 783dd5377..a604b6fd2 100644 --- a/tvm/HalideIR/src/ir/IR.cpp +++ b/tvm/HalideIR/src/ir/IR.cpp @@ -694,7 +694,7 @@ Expr Quantize::make(Expr body, Expr bitwidth) { Stmt KernelDef::make(Array args, Array> api_args, Array api_types, Stmt body, Expr ret_void, - Type ret_type, std::string name, Array channels) { + Type ret_type, std::string name, Array channels) { internal_assert(api_args.size() == api_types.size()) << "KernelDef of unmatched args\n"; for (size_t i = 0; i < args.size(); i++) { internal_assert(args[i].defined()) << "KernelDef of undefined arg\n"; diff --git a/tvm/HalideIR/src/ir/IR.h b/tvm/HalideIR/src/ir/IR.h index 7d1429200..e8a8835bf 100644 --- a/tvm/HalideIR/src/ir/IR.h +++ b/tvm/HalideIR/src/ir/IR.h @@ -1056,12 +1056,12 @@ struct KernelDef : public StmtNode { Type ret_type; std::string name; // args to stream data - Array channels; + Array channels; EXPORT static Stmt make(Array args, Array> api_args, Array api_types, Stmt body, Expr ret_void, Type ret_type, std::string name, - Array channels); + Array channels); void VisitAttrs(IR::AttrVisitor* v) final { v -> Visit("args", &args); diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index dba722852..f7c5ce9cd 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -138,28 +138,6 @@ namespace codegen { using var2nameType = std::unordered_map>>; -// collect type info for vars -class TypeCollector final : public IRVisitor { - public: - var2nameType& top_args_; - TypeCollector(var2nameType& top_args) - : top_args_(top_args) {} - void Visit_(const Allocate *op) { - auto v = op->buffer_var.get(); - - // record type and shape - if (top_args_.count(v)) { - std::vector shape; - for (size_t i = 0; i < op->extents.size(); i++) - shape.push_back(op->extents[i].as()->value); - top_args_[v] = std::make_tuple( - std::get<0>(top_args_[v]), - op->type, shape); - } - IRVisitor::Visit_(op); - } -}; - using argInfo = std::vector>>; diff --git a/tvm/src/codegen/build_util.cc b/tvm/src/codegen/build_util.cc index 5ea1bf722..29a5d49ec 100644 --- a/tvm/src/codegen/build_util.cc +++ b/tvm/src/codegen/build_util.cc @@ -761,9 +761,9 @@ void GenHostCode(TVMArgs& args, auto info = arg_info[k]; if (std::get<1>(info)) { PrintIndent(stream, indent); - stream << "int fd_" << std::get<0>(info) - << " = open(\"" << "/dev/xillybus_read_32" - << "\", O_WRONLY);" << "\n"; + stream << "hls::stream<" + << PrintHalideType(std::get<2>(info)) + << "> " << "fd_" << std::get<0>(info) << ";\n"; } } stream << "\n"; diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 23ba899ca..8a0b5e29e 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -3,7 +3,6 @@ * \file codegen_c.cc */ #include -#include #include #include #include @@ -16,120 +15,122 @@ namespace codegen { using namespace ir; +Type String2Type(std::string& s) { + if (s.front() == '\"' && s.back() == '\"') { + s.erase(0, 1); + s.pop_back(); + } + std::istringstream is(s); + halideir_type_code_t code = Type::Int; + if (s.substr(0, 3) == "int") { + code = Type::Int; s = s.substr(3); + } else if (s.substr(0, 4) == "uint") { + code = Type::UInt; s = s.substr(4); + } else if (s.substr(0, 5) == "float") { + code = Type::Float; s = s.substr(5); + } else if (s.substr(0, 5) == "float") { + code = Type::Float; s = s.substr(5); + } else if (s == "handle") { + return Handle(); + } else { + LOG(FATAL) << "unknown type " << s; + } + int bits = 32, lanes = 1; + if (sscanf(s.c_str(), "%dx%d", &bits, &lanes) == 0) { + LOG(FATAL) << "unknown type " << s; + } + return Type(code, bits, lanes); +} + +// generate row major index +std::string getIndex(std::vector shape) { + std::string str; + int mul = 1; + for (size_t i = shape.size(); i > 0; i--) { + mul = mul * shape[i-1]; + str += "i" + std::to_string(i-1) + + "*" + std::to_string(mul); + if (i != 1) str += "+ "; + } + return str; +} + // collect type info for vars -class TypeCollector final : public IRVisitor { - public: - var2nameType& top_args_; - TypeCollector(var2nameType& top_args) - : top_args_(top_args) {} - void Visit_(const Allocate *op) { - auto v = op->buffer_var.get(); - - // record type and shape - if (top_args_.count(v)) { - std::vector shape; - for (size_t i = 0; i < op->extents.size(); i++) - shape.push_back(op->extents[i].as()->value); - top_args_[v] = std::make_tuple( - std::get<0>(top_args_[v]), - op->type, shape); - } - IRVisitor::Visit_(op); - } -}; - -// record of vars for top func signature -// vars include passed-in and not registered vars on host -class StreamCollector final : public IRVisitor { - public: - StreamCollector(std::vector& arg_vars, - std::unordered_map& stream_table, - std::string initial_scope) - : arg_vars_(arg_vars), - stream_table_(stream_table), - scope_(initial_scope) {} - - // record alloc on host - void Visit_(const Allocate *op) { - if (!switch_on) - this->HandleDef(op->buffer_var.get()); - IRVisitor::Visit_(op); - } - - void Visit_(const Load *op) { - if (!switch_on) { - this->HandleUse(op->buffer_var); - } - IRVisitor::Visit_(op); - } +void TypeCollector::Visit_(const Allocate *op) { + auto v = op->buffer_var.get(); + if (top_args_.count(v)) { + std::vector shape; + for (size_t i = 0; i < op->extents.size(); i++) + shape.push_back(op->extents[i].as()->value); + top_args_[v] = std::make_tuple(std::get<0>(top_args_[v]), op->type, shape); + } + IRVisitor::Visit_(op); +} - // update placeholder status - void Visit_(const Store* op) { - if (!switch_on) { // count use on host - if (auto val = op->value.as()) - this->HandleDef(op->buffer_var.get()); - this->HandleUse(op->buffer_var); - } - IRVisitor::Visit_(op); - } +void StreamCollector::Visit_(const Allocate *op) { + this->HandleDef(op->buffer_var.get()); + IRVisitor::Visit_(op); +} + +void StreamCollector::Visit_(const Load *op) { + this->HandleUse(op->buffer_var); + IRVisitor::Visit_(op); +} - void Visit_(const StreamStmt* op) { - if (!switch_on) { // in host scope - this->HandleDef(op->buffer_var.get()); - } - IRVisitor::Visit_(op); - } +// update placeholder status +void StreamCollector::Visit_(const Store* op) { + if (auto val = op->value.as()) { + this->HandleDef(op->buffer_var.get()); + } + this->HandleUse(op->buffer_var); + IRVisitor::Visit_(op); +} - void Visit_(const AttrStmt* op) { - if (op->attr_key == attr::device_scope) { - if (op->value.as()->value != scope_) - switch_on = true; - else switch_on = false; - } - IRVisitor::Visit_(op); - } +void StreamCollector::Visit_(const StreamStmt* op) { + this->HandleDef(op->buffer_var.get()); + IRVisitor::Visit_(op); +} - // additional data saved into stream table (for streamed - // data we keep the new id for arg_stream in var_idmap, - // and non-streamed using the repalced arg_top_k name) - void HandleDef(const Variable* v) { - CHECK(!host_def_count_.count(v)) - << "variable " << v->name_hint - << " has already been defined, the Stmt is not SSA"; - CHECK(!host_use_count_.count(v)) - << "variable " << v->name_hint - << " has been used before definition!"; - host_use_count_[v] = 0; - host_def_count_[v] = 1; - } +void StreamCollector::Visit_(const AttrStmt* op) { + if (op->attr_key == attr::device_scope) { + if (op->value.as()->value != scope_) + switch_on = true; + else switch_on = false; + } + IRVisitor::Visit_(op); +} + +// additional data saved into stream table +void StreamCollector::HandleDef(const Variable* v) { + if (!switch_on) { // def on host scope + CHECK(!host_def_count_.count(v)) + << "variable " << v->name_hint + << " has already been defined, the Stmt is not SSA"; + CHECK(!host_use_count_.count(v)) + << "variable " << v->name_hint + << " has been used before definition!"; + host_use_count_[v] = 0; + host_def_count_[v] = 1; + } +} - void HandleUse(const Expr& v) { - CHECK(v.as()); - Var var(v.node_); - auto it = host_use_count_.find(var.get()); - if (it != host_use_count_.end()) { - if (it->second >= 0) { - ++it->second; - } - } else { - if (!stream_table_.count(var.get())) { - host_undefined_.push_back(var); - host_use_count_[var.get()] = -1; - } +void StreamCollector::HandleUse(const Expr& v) { + CHECK(v.as()); + Var var(v.node_); + auto it = host_use_count_.find(var.get()); + if (!switch_on) { // def on host scope + if (it != host_use_count_.end()) { + if (it->second >= 0) { + ++it->second; + } + } else { + if (!stream_table_.count(var.get())) { + host_undefined_.push_back(var); + host_use_count_[var.get()] = -1; } } - - bool host_scope_{false}; - Array host_undefined_; - std::unordered_map host_use_count_; - std::unordered_map host_def_count_; - - private: - std::vector& arg_vars_; - std::unordered_map& stream_table_; - std::string scope_; - bool switch_on{true}; -}; + } +} void CodeGenC::Init(bool output_ssa) { print_ssa_form_ = output_ssa; @@ -202,8 +203,13 @@ std::string CodeGenC::GetHost() { std::string CodeGenC::GetDevice() { std::ostringstream device; - device << "void top(" << arg_stream.str() - << "){\n" << device_stream.str(); + device << "void top(" << arg_stream.str() << "){\n"; + + // process device code + PreProcess(device); + device << device_stream.str(); + PostProcess(device); + if (fpga_scope_) device << stream.str(); return decl_stream.str() + module_stream.str() + device.str() + "}\n\n"; @@ -1000,7 +1006,7 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { PrintIndent(); // track the stream usage - StreamCollector collector(arg_vars, stream_table, "cpu"); + StreamCollector collector(stream_table, "cpu"); collector.Visit(op->body); // update data type and name @@ -1008,7 +1014,6 @@ void CodeGenC::VisitStmt_(const AttrStmt* op) { auto v = k.get(); arg_vars.push_back(v); stream_table[v] = true; - LOG(WARNING) << v->name_hint; auto tuple = arg_top_vars[v]; arg_top_vars[v] = std::make_tuple(v->name_hint, std::get<1>(tuple), @@ -1161,18 +1166,26 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { // print function signature PrintType(op->ret_type, stream); stream << " " << op->name << "("; + for (size_t k = 0; k < op->channels.size(); k+=2) { + int pos = op->channels[k].as()->value; + stream_arg_pos[op->name].insert(pos); + } for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; var_shape_map_[v.get()] = op->api_args[i]; std::string vid = AllocVarID(v.get()); if (i != 0) stream << ", "; - this->stream << vid; + std::string str = PrintExpr(op->api_types[i]); + Type type = String2Type(str); + PrintType(type, stream); + this->stream << " " << vid << "["; if (v.type().is_handle()) { for (size_t j = 0; j < op->api_args[i].size(); j++) { - this->stream << '['; - this->PrintExpr(op->api_args[i][j], this->stream); - this->stream << ']'; + if (j != 0) stream << "* "; + auto dim = op->api_args[i][j].as()->value; + this->stream << dim; } + this->stream << ']'; } } stream << ") {\n"; @@ -1191,7 +1204,6 @@ void CodeGenC::VisitStmt_(const KernelDef* op) { } void CodeGenC::VisitStmt_(const KernelStmt *op) { - // kernel stmt (call module func) PrintIndent(); stream << op->name << "("; for (size_t i = 0; i < op->args.size(); i++) { diff --git a/tvm/src/codegen/codegen_c.h b/tvm/src/codegen/codegen_c.h index c7260d83a..d7292b38f 100644 --- a/tvm/src/codegen/codegen_c.h +++ b/tvm/src/codegen/codegen_c.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,52 @@ using str2tupleMap = std::unordered_map>; using var2nameType = std::unordered_map>>; +Type String2Type(std::string& s); +std::string getIndex(std::vector shape); + +/*! + * \brief A data type collector + * + * CodeGenC TypeCollector gathers information + * of different types of each variable + * + */ +class TypeCollector final : public IRVisitor { + public: + var2nameType& top_args_; + TypeCollector(var2nameType& top_args) : top_args_(top_args) {}; + void Visit_(const Allocate *op); +}; + +/*! + * \brief An undefined variable collector + * + * CodeGenC stream data collector detects undefined + * variable and create channels for them + * + * */ +class StreamCollector final : public IRVisitor { + public: + Array host_undefined_; + std::unordered_map host_use_count_; + std::unordered_map host_def_count_; + StreamCollector(std::unordered_map& stream_table, + std::string initial_scope) + : stream_table_(stream_table), + scope_(initial_scope) {}; + void Visit_(const Allocate *op); + void Visit_(const Load *op); + void Visit_(const Store *op); + void Visit_(const StreamStmt *op); + void Visit_(const AttrStmt *op); + void HandleDef(const Variable* v); + void HandleUse(const Expr& v); + private: + std::unordered_map& stream_table_; + std::string scope_; + bool switch_on{true}; +}; + /*! * \brief A base class to generate C code. * @@ -190,12 +237,21 @@ class CodeGenC : std::map > var_shape_map_save; std::unordered_map range_save; - // streaming vars information + // index into ap_arg_type size_t arg_count{0}; + // map {var : (vid, Type, shape)} var2nameType arg_top_vars; + // vector {vars} in top function std::vector arg_vars; + // vector of top function arg dimension std::vector> arg_shapes; + // whether the function arg is streamed std::unordered_map stream_table; + // map from kernel name to set of streamed arg position index + std::unordered_map> stream_arg_pos; + // pre and post processing device code + virtual void PreProcess(std::ostringstream& os) {}; + virtual void PostProcess(std::ostringstream& os) {}; protected: void SaveFuncState(LoweredFunc f); diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index 42f466a98..db634b752 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -21,11 +21,59 @@ namespace TVM { namespace codegen { +void CodeGenVivadoHLS::PreProcess(std::ostringstream& os) { + return; + os << "\n"; + int indent = 2; + for (size_t i = 0; i < arg_vars.size(); i++) { + auto v = arg_vars[i]; + std::string arg_name; + if (stream_table[v]) + arg_name = std::get<0>(arg_top_vars[v]); + else arg_name = GetVarID(v); + + // create local buffer saving result + auto shape = std::get<2>(arg_top_vars[v]); + for (size_t j = 0; j < shape.size(); j++) { + for (int k = 0; k < indent; k++) os << ' '; + os << "for (int i" << j << " = 0; i" + << j << "< " << shape[j] << "; i" + << j << "++) {\n"; + if (j == shape.size() - 1) { + for (int k = 0; k < indent; k++) os << ' '; + os << " " << arg_name << "[" + << getIndex(shape) << "] = " + << "fd_" << arg_name << ".read();\n"; + } + indent += 2; + } + for (size_t m = 0; m < shape.size(); m++) { + indent -= 2; + for (int k = 0; k < indent; k++) os << ' '; + os << "}\n"; + } + } +} + +void CodeGenVivadoHLS::PostProcess(std::ostringstream& os) { +// os << "\n"; +// int indent = 2; +// for (size_t i = 0; i < arg_vars.size(); i++) { +// auto v = arg_vars[i]; +// std::string arg_name; +// if (stream_table[v]) +// arg_name = std::get<0>(arg_top_vars[v]); +// else arg_name = GetVarID(v); +// os << arg_name << " = " << "fd_" +// << arg_name << ".write();\n"; +} + void CodeGenVivadoHLS::AddFunction(LoweredFunc f, str2tupleMap map_arg_type) { // Write header files this->decl_stream << "#include \n"; this->decl_stream << "#include \n"; + this->decl_stream << "#include \n"; this->decl_stream << "#include \n\n"; CodeGenHLSC::AddFunction(f, map_arg_type); if (soda_header_.is_open()) @@ -77,6 +125,17 @@ void CodeGenVivadoHLS::VisitStmt_(const Store* op) { this->stream << ref << "[" << PrintExpr(sb->index) << "] = " << PrintExpr(sb->value) << ";\n"; + } else if (const StreamExpr* se = op->value.as()) { + if (!fpga_scope_) { + std::string vid = GetVarID(se->buffer_var.get()); + vid = vid.substr(0, vid.find("_stream_send")); + PrintIndent(); + this->stream << vid << "[" + << op->index << "] = " + << "fd_" << vid << ".read();\n"; + } else { + CodeGenC::VisitStmt_(op); + } } else { CodeGenC::VisitStmt_(op); } @@ -146,7 +205,8 @@ void CodeGenVivadoHLS::VisitStmt_(const Partition* op) { void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { CodeGenC::VisitExpr_(op, os); std::string vid = GetVarID(op->buffer_var.get()); - os << "read(fd_" << vid << ", (void*)&output, sizeof(output);"; + vid = vid.substr(0, vid.find("_stream_send")); + os << vid << ".read()"; } void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { @@ -163,11 +223,8 @@ void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { break; } vid = vid.substr(0, vid.find("_stream_send")); - stream << "write(" << "fd_" << vid - << ", " << "(void*)&"; - PrintExpr(op->value, stream); - stream << ", sizeof(" << vid; - stream << ");\n"; + stream << "fd_" << vid << ".write(" + << vid << ");\n"; } class AllocateCollector final : public IRVisitor { @@ -187,6 +244,138 @@ class AllocateCollector final : public IRVisitor { VarExprUnorderedSet& outputs_; }; +void CodeGenVivadoHLS::VisitStmt_(const AttrStmt* op) { + if (op->attr_key == ir::attr::device_scope) { + // print top( ... in host and enter fpga scope + if (op->value.as()->value == "fpga" && !fpga_scope_) { + fpga_scope_ = true; + PrintIndent(); + + // track the stream usage + StreamCollector collector(stream_table, "cpu"); + collector.Visit(op->body); + + // update data type and name + for (auto k : collector.host_undefined_) { + auto v = k.get(); + arg_vars.push_back(v); + stream_table[v] = true; + auto tuple = arg_top_vars[v]; + arg_top_vars[v] = std::make_tuple(v->name_hint, + std::get<1>(tuple), + std::get<2>(tuple)); + } + TypeCollector visitor(arg_top_vars); + visitor.Visit(op->body); + + // generte function calls + stream << "top("; + for (size_t i = 0; i < arg_vars.size(); i++) { + auto v = arg_vars[i]; + std::string arg_name; + if (stream_table[v]) + arg_name = std::get<0>(arg_top_vars[v]); + else arg_name = GetVarID(v); + if (i != 0) stream << ", "; + stream << "fd_" << arg_name; + + // generate kernel func definition + if (i != 0) arg_stream << ", "; + arg_stream << "hls::stream<"; + PrintType(std::get<1>(arg_top_vars[v]), arg_stream); + auto shape = std::get<2>(arg_top_vars[v]); + arg_stream << "> fd_" << arg_name; + } + stream << ");\n"; + + // switch context to device scope + host_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + + // swtich from device to host + } else if (op->value.as()->value == "cpu" && + fpga_scope_) { + fpga_scope_ = false; + device_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + } + this->PrintStmt(op->body); + } else { + CodeGenC::VisitStmt_(op); + } +} + +void CodeGenVivadoHLS::VisitStmt_(const KernelStmt *op) { + PrintIndent(); + stream << op->name << "("; + for (size_t i = 0; i < op->args.size(); i++) { + if (stream_arg_pos[op->name].count(i)) + stream << "fd_"; + PrintExpr(op->args[i], stream); + if (i < op->args.size() -1) stream << ", "; + } + stream << ");\n"; +} + +void CodeGenVivadoHLS::VisitStmt_(const KernelDef* op) { + LoweredFunc f; + // save func states + CodeGenC::SaveFuncState(f); + CodeGenC::InitFuncState(f); + std::ostringstream save; + save << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + + // skip the first underscore + GetUniqueName("_"); + // add to alloc buffer : type. + for (const auto & k : op->args) { + RegisterHandleType(k.get(), k.get()->type); + } + // print function signature + PrintType(op->ret_type, stream); + stream << " " << op->name << "("; + for (size_t k = 0; k < op->channels.size(); k+=2) { + int pos = op->channels[k].as()->value; + stream_arg_pos[op->name].insert(pos); + } + for (size_t i = 0; i < op->args.size(); ++i) { + VarExpr v = op->args[i]; + var_shape_map_[v.get()] = op->api_args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) stream << ", "; + std::string str = PrintExpr(op->api_types[i]); + Type type = String2Type(str); + + // pass the stream channel reference + // TODO: broadcast in hlsc (one wr multi read) + if (stream_arg_pos[op->name].count(i)) { + stream << "hls::stream<"; + PrintType(type, stream); + stream << ">& " << vid; + } else { + PrintType(type, stream); + this->stream << "* " << vid; + } + } + stream << ") {\n"; + int func_scope = BeginScope(); + range_ = CollectIterRange(op->body); + PrintStmt(op->body); + EndScope(func_scope); + stream << "}\n\n"; + + // restore default stream + module_stream << this->stream.str(); + this->stream.str(""); + this->stream.clear(); + this->stream << save.str(); + RestoreFuncState(f); +} + void CodeGenVivadoHLS::VisitStmt_(const Stencil* op) { // Use SODA codegen for stencil analysis CodeGenSODA cg_soda; diff --git a/tvm/src/codegen/hlsc/codegen_vhls.h b/tvm/src/codegen/hlsc/codegen_vhls.h index a2dd5fa0e..6462251db 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.h +++ b/tvm/src/codegen/hlsc/codegen_vhls.h @@ -30,6 +30,12 @@ class CodeGenVivadoHLS final : public CodeGenHLSC { void VisitStmt_(const Partition* op) override; void VisitStmt_(const Stencil* op) override; void VisitStmt_(const StreamStmt* op) override; + void VisitStmt_(const AttrStmt* op) override; + void VisitStmt_(const KernelDef* op) override; + void VisitStmt_(const KernelStmt* op) override; + + void PreProcess(std::ostringstream& os); + void PostProcess(std::ostringstream& os); private: std::ofstream soda_header_; }; diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index bf6e2a07f..8d2329e98 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -94,13 +94,10 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, void CodeGenAOCL::PrintType(Type t, std::ostream &os) { int lanes = t.lanes(); - - if(t.is_handle()) - { + if(t.is_handle()) { os << "void*";return; } - if(t==Bool()) - { + if(t==Bool()) { os <<"bool"; return; } CHECK_EQ(lanes,1) @@ -266,27 +263,28 @@ void CodeGenAOCL::VisitStmt_(const KernelDef* op) { else PrintType(op->ret_type, stream); stream << " " << op->name << "("; - // check channel and create function signature - std::unordered_set stream_vars; - for (size_t j = 0; j < op->channels.size(); j++) { - stream_vars.insert(op->channels[j]); - stream_exprs.insert(op->channels[j].get()->name_hint); + // streamed arg position to channel index + std::unordered_map stream_args; + for (size_t j = 0; j < op->channels.size(); j=j+2) { + int pos = op->channels[j].as()->value; + int idx = op->channels[j+1].as()->value; + stream_args[pos] = idx; } for (size_t i = 0; i < op->args.size(); ++i) { VarExpr v = op->args[i]; var_shape_map_[v.get()] = op->api_args[i]; std::string vid = AllocVarID(v.get()); - if (stream_vars.count(v)) { - // define channel out of scope + if (stream_args.count(i)) { + stream_arg_pos[op->name].insert(i); if (!stream_pragma) { decl_stream << "#pragma OPENCL EXTENSION cl_intel_channels : enable\n"; stream_pragma = true; } } else { if (i != 0) { - if (stream_vars.count(op->args[i-1])) void(0); + if (stream_args.count(i-1)) void(0); else stream << ", "; - } + } // un-streamed argument this->stream << "__global "; std::string str = PrintExpr(op->api_types[i]); Type type = String2Type(str); @@ -314,10 +312,9 @@ void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { stream << op->name << "("; for (size_t i = 0; i < op->args.size(); i++) { std::string str = op->name + "." + PrintExpr(op->args[i]); - if (!stream_exprs.count(str)) { + if (!stream_arg_pos[op->name].count(i)) { if (i != 0) { - std::string pre = op->name + "." + PrintExpr(op->args[i-1]); - if (stream_exprs.count(pre)) void(0); + if (stream_arg_pos[op->name].count(i-1)) void(0); else stream << ", "; } PrintExpr(op->args[i], stream); @@ -329,12 +326,9 @@ void CodeGenAOCL::VisitStmt_(const KernelStmt *op) { void CodeGenAOCL::VisitExpr_(const KernelExpr *op, std::ostream& os) { // NOLINT(*) os << op->name << "("; for (size_t i = 0; i < op->args.size(); ++i) { - std::string str = op->name + "." + PrintExpr(op->args[i]); - // skip printing if arg is treamed - if (!stream_exprs.count(str)) { + if (!stream_arg_pos[op->name].count(i)) { if (i != 0) { - std::string pre = op->name + "." + PrintExpr(op->args[i-1]); - if (stream_exprs.count(pre)) void(0); + if (stream_arg_pos[op->name].count(i-1)) void(0); else stream << ", "; } PrintExpr(op->args[i], stream); diff --git a/tvm/src/codegen/opencl/codegen_aocl.h b/tvm/src/codegen/opencl/codegen_aocl.h index 9f3d8c379..5778b70ec 100755 --- a/tvm/src/codegen/opencl/codegen_aocl.h +++ b/tvm/src/codegen/opencl/codegen_aocl.h @@ -9,22 +9,24 @@ namespace TVM { namespace codegen { class CodeGenAOCL : public CodeGenOpenCL { - public: - CodeGenAOCL(){} - void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); - void PrintType(Type t, std::ostream& os) override; //NOLINT(*) + public: + CodeGenAOCL(){} + void AddFunction(LoweredFunc f, str2tupleMap map_arg_type); + void PrintType(Type t, std::ostream& os) override; //NOLINT(*) - void VisitStmt_(const For* op) override; //NOLINT(*) - void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) - void VisitStmt_(const KernelDef* op) override; //NOLINT(*) - void VisitStmt_(const KernelStmt* op) override; //NOLINT(*) + void VisitStmt_(const For* op) override; //NOLINT(*) + void VisitStmt_(const StreamStmt* op) override; //NOLINT(*) + void VisitStmt_(const KernelDef* op) override; //NOLINT(*) + void VisitStmt_(const KernelStmt* op) override; //NOLINT(*) - void VisitExpr_(const StreamExpr* op, std::ostream& os) override; //NOLINT(*) - void VisitExpr_(const KernelExpr* op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const StreamExpr* op, std::ostream& os) override; //NOLINT(*) + void VisitExpr_(const KernelExpr* op, std::ostream& os) override; //NOLINT(*) - private: - bool stream_pragma{false}; - std::unordered_set stream_exprs; + private: + // whether to enable streaming + bool stream_pragma{false}; + // map from kernel name to set of streamed arg position index + std::unordered_map> stream_arg_pos; }; } // namespace codegen } // namespace TVM diff --git a/tvm/src/schedule/compute_primitive.cc b/tvm/src/schedule/compute_primitive.cc index 3b3fa9734..ae59872b3 100644 --- a/tvm/src/schedule/compute_primitive.cc +++ b/tvm/src/schedule/compute_primitive.cc @@ -147,65 +147,6 @@ class LoopFuser final : public IRMutator { std::unordered_map& sub_; }; -class StreamConsumer final : public IRMutator { - public: - VarExpr stream_data; - StreamConsumer( - const std::string& target, - const ir::StreamType& type) - : target_(target), type_(type) {} - - // Replace with StreamExpr e.g. var.read(op. index) - Expr Mutate_(const Load* op, const Expr& e) { - Expr index = op->index; - std::string target_name = op->buffer_var.get()->name_hint; - if (has_suffix(target_name, "." + target_)) { - stream_data = op->buffer_var; - return StreamExpr::make(op->type, op->buffer_var, type_, 10); - } else { - return Load::make(op->type, op->buffer_var, index, op->predicate); - } - } - - private: - const std::string target_; - const ir::StreamType type_; - bool has_suffix(const std::string &str, const std::string &suffix) { - return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; - } -}; - -class StreamProducer final : public IRMutator { - public: - VarExpr stream_data; - StreamProducer( - const std::string& target, - const ir::StreamType& type) - : target_(target), type_(type) {} - - // Replace with StreamStmt e.g. var.write(value) - Stmt Mutate_(const Store* op, const Stmt& s) { - Expr index = op->index; - Expr value = this->Mutate(op->value); - std::string target_name = op->buffer_var.get()->name_hint; - if (has_suffix(target_name, "." + target_)) { - stream_data = op->buffer_var; - return StreamStmt::make(op->buffer_var, value, type_, 10); - } else { - return Store::make(op->buffer_var, value, index, op->predicate); - } - } - - private: - const std::string target_; - const ir::StreamType type_; - bool has_suffix(const std::string &str, const std::string &suffix) { - return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; - } -}; - class LoopReorderer final : public IRMutator { public: LoopReorderer(const Array& order) : order_(order) { @@ -248,38 +189,6 @@ class LoopReorderer final : public IRMutator { } }; -class KernelUpdater final : public IRMutator { - public: - KernelUpdater( - const std::string& target, - const ir::StreamType& type, - const bool is_producer) - : target_(target), type_(type), is_producer_(is_producer){} - - Stmt Mutate_(const KernelDef* op, const Stmt& s) { - // mutate target load - Stmt stmt = op->body; - Array arr; - if (is_producer_) { - StreamProducer mutator(target_, type_); - stmt = mutator.Mutate(stmt); - arr.push_back(mutator.stream_data); - } else { // replace load consumer - StreamConsumer mutator(target_, type_); - stmt = mutator.Mutate(stmt); - arr.push_back(mutator.stream_data); - } - // update kernel arg signature - return KernelDef::make(op->args, op->api_args, - op->api_types, stmt, op->ret_void, - op->ret_type, op->name, arr); - } - private: - const std::string target_; - const ir::StreamType type_; - const bool is_producer_; -}; - class IterVarAttrUpdater final : public IRMutator { public: IterVarAttrUpdater(const IterVar& var, const IterVarAttrNode* node) @@ -594,24 +503,6 @@ Stmt ReorderLoop(Stmt& stmt, const Array& order) { return stmt; } -Stmt StreamFromProducer(Stmt& stmt, - Buffer& producer_buf, - ir::StreamType& type) { - std::string target_name = producer_buf.operator->()->name; - KernelUpdater mutator(target_name, type, true); - stmt = mutator.Mutate(stmt); - return stmt; -} - -Stmt StreamToConsumer(Stmt& stmt, - Buffer& producer_buf, - ir::StreamType& type) { - std::string target_name = producer_buf.operator->()->name; - KernelUpdater mutator(target_name, type, false); - stmt = mutator.Mutate(stmt); - return stmt; -} - Stmt UpdateIterVarAttr(Stmt& stmt, const IterVar& var, const IterVarAttrNode* node) { diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index a4caac6ef..f32bd0a05 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -109,27 +109,20 @@ void ReplaceDataFlow(const Array& stages, class StreamConsumer final : public IRMutator { public: - VarExpr stream_data; StreamConsumer( const std::string& target, const ir::StreamType& type, - const bool kernel_channel, - const std::string& common_name) - : target_(target), type_(type), - kernel_channel_(kernel_channel), - common_name_(common_name) {} + int channel_index) + : target_(target), type_(type), + channel_index_(channel_index) {} - // Replace with StreamExpr e.g. var.read(op. index) Expr Mutate_(const Load* op, const Expr& e) { Expr index = op->index; std::string target_name = op->buffer_var.get()->name_hint; if (target_ == target_name) { - stream_data = op->buffer_var; Array keys, values; - if (kernel_channel_) { - keys.push_back(StringImm::make("name")); - values.push_back(StringImm::make(common_name_)); - } + keys.push_back(StringImm::make("index")); + values.push_back(IntImm::make(Int(32), channel_index_)); return StreamExpr::make(op->type, op->buffer_var, type_, 10, keys, values); } else { @@ -141,36 +134,28 @@ class StreamConsumer final : public IRMutator { private: const std::string target_; const ir::StreamType type_; - const bool kernel_channel_; - const std::string common_name_; + const int channel_index_; }; class StreamProducer final : public IRMutator { public: - VarExpr stream_data; StreamProducer( const std::string& target, - const ir::StreamType& type, - const bool kernel_channel, - const std::string& common_name) - : target_(target), type_(type), - kernel_channel_(kernel_channel), - common_name_(common_name) {} - - // Replace with StreamStmt e.g. var.write(value) + const ir::StreamType& type, + int channel_index) + : target_(target), type_(type), + channel_index_(channel_index) {} + Stmt Mutate_(const Store* op, const Stmt& s) { Expr index = op->index; Expr value = this->Mutate(op->value); std::string target_name = op->buffer_var.get()->name_hint; if (target_name == target_) { - stream_data = op->buffer_var; Array keys, values; - if (kernel_channel_) { - keys.push_back(StringImm::make("name")); - values.push_back(StringImm::make(common_name_)); - } + keys.push_back(StringImm::make("index")); + values.push_back(IntImm::make(Int(32), channel_index_)); return StreamStmt::make(op->buffer_var, value, - type_, 10, keys, values); + type_, 10, keys, values); } else { return Store::make(op->buffer_var, value, index, op->predicate); @@ -180,8 +165,7 @@ class StreamProducer final : public IRMutator { private: const std::string target_; const ir::StreamType type_; - const bool kernel_channel_; - const std::string common_name_; + const int channel_index_; }; class KernelUpdater final : public IRMutator { @@ -194,30 +178,25 @@ class KernelUpdater final : public IRMutator { const bool kernel_channel) : arg_pos_(arg_pos), type_(type), is_producer_(is_producer), - // setup common channel name kernel_channel_(kernel_channel) { - if (kernel_channel_) common_name = getName(); + if (kernel_channel_) channel_index_ = getIndex(); } Stmt Mutate_(const KernelDef* op, const Stmt& s) { - // mutate target load Stmt stmt = op->body; - Array arr = op->channels; + // arr saves arg_pos and common channel idx + Array arr = op->channels; + CHECK(op->channels.size() % 2 == 0) + << "arg_pos, index pair number mismatch"; + arr.push_back(IntImm::make(Int(32), arg_pos_)); + arr.push_back(IntImm::make(Int(32), channel_index_)); std::string target_ = op->args[arg_pos_].get()->name_hint; - if (is_producer_) { - StreamProducer mutator(target_, type_, - kernel_channel_, - common_name); + if (is_producer_) { // mutate target load + StreamProducer mutator(target_, type_, channel_index_); stmt = mutator.Mutate(stmt); - if (kernel_channel_) - arr.push_back(mutator.stream_data); } else { // replace load consumer - StreamConsumer mutator(target_, type_, - kernel_channel_, - common_name); + StreamConsumer mutator(target_, type_, channel_index_); stmt = mutator.Mutate(stmt); - if (kernel_channel_) - arr.push_back(mutator.stream_data); } // update kernel arg signature return KernelDef::make(op->args, op->api_args, @@ -229,12 +208,13 @@ class KernelUpdater final : public IRMutator { const ir::StreamType type_; const bool is_producer_; const bool kernel_channel_; - std::string common_name; - std::string getName() { + int channel_index_{0}; + int getIndex() { channelCount += 1; int channel_num = channelCount; - if (channelCount % 2 == 0) channel_num = channelCount - 1; - return std::string("channel_" + std::to_string(channel_num)); + if (channelCount % 2 == 0) + channel_num = channelCount - 1; + return channel_num; } }; @@ -575,19 +555,19 @@ Tensor Schedule::move_to(const Tensor& target, indices.push_back(iter); loop_vars.push_back(iter); } - Expr index = Expr(0); //getIndex(indices, target->shape); + Expr index = getIndex(indices, target->shape); // store op initialized with variable node Stmt for_stmt = Store::make(producer_buffer->data, stream, index, UIntImm::make(UInt(1), 1)); - // for (size_t j = 0; j < target->shape.size(); j++) { - // for_stmt = For::make( - // VarExpr(loop_vars[j]), - // 0, target->shape[j], - // ForType::Serial, - // DeviceAPI::None, - // for_stmt); - // } + for (size_t j = 0; j < target->shape.size(); j++) { + for_stmt = For::make( + VarExpr(loop_vars[j]), + 0, target->shape[j], + ForType::Serial, + DeviceAPI::None, + for_stmt); + } // attr annotates new scope Stmt body = AttrStmt::make( From 96e388fafe95b04062a50d81046b4f694db7b1ed Mon Sep 17 00:00:00 2001 From: Hecmay Date: Sun, 8 Dec 2019 11:45:16 -0500 Subject: [PATCH 098/103] [update] code post-processing --- samples/conv/conv.py | 3 +- tvm/include/tvm/schedule.h | 2 +- tvm/src/codegen/build_util.cc | 23 +++++--- tvm/src/codegen/hlsc/codegen_vhls.cc | 58 +++++++++++++------ tvm/src/schedule/schedule_dataflow_rewrite.cc | 49 ++++++++-------- 5 files changed, 84 insertions(+), 51 deletions(-) diff --git a/samples/conv/conv.py b/samples/conv/conv.py index f350f4d03..9502ffa6c 100644 --- a/samples/conv/conv.py +++ b/samples/conv/conv.py @@ -39,8 +39,9 @@ def kernel(input_image, kernel_1, kernel_2): s = hcl.create_schedule([image, k1, k2], kernel) # data moved to local - i0, k10 = s.to([image, k1], target.fpga) + i0, k10, k20 = s.to([image, k1, k2], target.fpga) s.to([i0, k10], s[kernel.conv1]) + s.to([k20], s[kernel.conv2]) s.to(kernel.derv, target.cpu) # create stream channel between modules diff --git a/tvm/include/tvm/schedule.h b/tvm/include/tvm/schedule.h index 396bbc7bd..faacc7d96 100644 --- a/tvm/include/tvm/schedule.h +++ b/tvm/include/tvm/schedule.h @@ -402,7 +402,7 @@ class Schedule : public NodeRef { // declare container type using ContainerType = ScheduleNode; // insertion point for host & xcel separation - size_t split_bound{0}; + static int split_bound; }; /*! diff --git a/tvm/src/codegen/build_util.cc b/tvm/src/codegen/build_util.cc index 29a5d49ec..e0a5f8b2d 100644 --- a/tvm/src/codegen/build_util.cc +++ b/tvm/src/codegen/build_util.cc @@ -519,7 +519,13 @@ void GenHostHeaders(std::ofstream& stream, stream << "#include \"utils.h\"\n"; stream << "// harness namespace\n"; stream << "using namespace rosetta;\n"; - } // harness headers + } else if (platform == "vivado_hls") { + stream << "// vivado hls headers\n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \n"; + stream << "#include \"kernel.cpp\"\n\n"; + } } // initialization before executing kernel @@ -749,13 +755,13 @@ void GenHostCode(TVMArgs& args, pre_kernel = pre_kernel.substr(pre_kernel.find_first_not_of("\n")); pre_kernel = pre_kernel.substr(pre_kernel.find_first_not_of(" ")); PrintIndent(stream, indent); - stream << pre_kernel << "\n"; - if (platform == "sdaccel") + if (platform == "sdaccel") { + // create variable wrapper + stream << pre_kernel << "\n"; KernelInit(stream, platform, args, arg_types, arg_info); - - else if (platform == "vivado_hls") { + } else if (platform == "vivado_hls") { // init hls stream channels for (size_t k = 0; k < arg_info.size(); k++) { auto info = arg_info[k]; @@ -764,9 +770,10 @@ void GenHostCode(TVMArgs& args, stream << "hls::stream<" << PrintHalideType(std::get<2>(info)) << "> " << "fd_" << std::get<0>(info) << ";\n"; - } + } } - stream << "\n"; + PrintIndent(stream, indent); + stream << pre_kernel << "\n"; PrintIndent(stream, indent); // create kernel call from host stream << "top("; @@ -774,7 +781,7 @@ void GenHostCode(TVMArgs& args, auto info = arg_info[i]; auto name = std::get<0>(info); if (i != 0) stream << ", "; - stream << name; + stream << "fd_" << name; } stream << ");\n"; } diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index db634b752..e9c915576 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -22,7 +22,6 @@ namespace TVM { namespace codegen { void CodeGenVivadoHLS::PreProcess(std::ostringstream& os) { - return; os << "\n"; int indent = 2; for (size_t i = 0; i < arg_vars.size(); i++) { @@ -34,23 +33,47 @@ void CodeGenVivadoHLS::PreProcess(std::ostringstream& os) { // create local buffer saving result auto shape = std::get<2>(arg_top_vars[v]); - for (size_t j = 0; j < shape.size(); j++) { + auto dtype = std::get<1>(arg_top_vars[v]); + if (!stream_table[v]) { // unstreamed args + // allocate local buffer for (int k = 0; k < indent; k++) os << ' '; - os << "for (int i" << j << " = 0; i" - << j << "< " << shape[j] << "; i" - << j << "++) {\n"; - if (j == shape.size() - 1) { + PrintType(dtype, os); + os << " " << arg_name << "["; + for (size_t n = 0; n < shape.size(); n++) { + os << shape[n]; + if (n != shape.size() - 1) os << "* "; + } + os << "];\n"; + + for (size_t j = 0; j < shape.size(); j++) { for (int k = 0; k < indent; k++) os << ' '; - os << " " << arg_name << "[" - << getIndex(shape) << "] = " - << "fd_" << arg_name << ".read();\n"; + os << "for (int i" << j << " = 0; i" + << j << "< " << shape[j] << "; i" + << j << "++) {\n"; + // pass stream reference + if (j == shape.size() - 1) { + for (int k = 0; k < indent; k++) os << ' '; + os << " " << arg_name << "[" + << getIndex(shape) << "] = " + << "fd_" << arg_name << ".read();\n"; + } + indent += 2; } - indent += 2; - } - for (size_t m = 0; m < shape.size(); m++) { - indent -= 2; + for (size_t m = 0; m < shape.size(); m++) { + indent -= 2; + for (int k = 0; k < indent; k++) os << ' '; + os << "}\n"; + } + } else if (i == arg_vars.size() - 1) { + // allocate for return variable for (int k = 0; k < indent; k++) os << ' '; - os << "}\n"; + PrintType(dtype, os); + os << " " << arg_name << "["; + for (size_t n = 0; n < shape.size(); n++) { + os << shape[n]; + if (n != shape.size() - 1) os << "* "; + } + os << "];\n"; } } } @@ -212,8 +235,6 @@ void CodeGenVivadoHLS::VisitExpr_(const StreamExpr* op, std::ostream& os) { void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { CodeGenC::VisitStmt_(op); std::string vid = GetVarID(op->buffer_var.get()); - // std::string vid = GetVarID(op->buffer_var.get()); - PrintIndent(); switch (op->stream_type) { case StreamType::Channel: break; @@ -223,8 +244,9 @@ void CodeGenVivadoHLS::VisitStmt_(const StreamStmt* op) { break; } vid = vid.substr(0, vid.find("_stream_send")); + auto load = op->value.as(); stream << "fd_" << vid << ".write(" - << vid << ");\n"; + << vid << "["<< load->index << "]);\n"; } class AllocateCollector final : public IRVisitor { @@ -284,7 +306,7 @@ void CodeGenVivadoHLS::VisitStmt_(const AttrStmt* op) { arg_stream << "hls::stream<"; PrintType(std::get<1>(arg_top_vars[v]), arg_stream); auto shape = std::get<2>(arg_top_vars[v]); - arg_stream << "> fd_" << arg_name; + arg_stream << ">& fd_" << arg_name; } stream << ");\n"; diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index f32bd0a05..6db9f542b 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -264,6 +264,9 @@ class ParentStmtCollector final : public IRMutator { const IterVar& axis_; }; +// initialize static split bound +int Schedule::split_bound = 0; + // stream buffer data to kernel stage void Schedule::to_stage(const Tensor& target, /*kernel def stage*/ Stage dest, @@ -432,8 +435,7 @@ Tensor Schedule::move_to(const Tensor& target, } } - // build consumer (sender) stage which consumes data from original source - // and write into the streaming channel + // create sender and write into streaming channel Array consumer_inputs; Array consumer_input_placeholders; Array consumer_output_placeholders; @@ -449,14 +451,15 @@ Tensor Schedule::move_to(const Tensor& target, consumer_input_placeholders.push_back(target_buffer); consumer_output_placeholders.push_back(consumer_buffer); - // std::vector csm_indices; - // std::vector csm_loop_vars; - // for (size_t i = 0; i < target->shape.size(); i++) { - // VarExpr iter("i" + std::to_string(i)); - // csm_indices.push_back(iter); - // csm_loop_vars.push_back(iter); - // } - Expr csm_index = Expr(0); //getIndex(csm_indices, target->shape); + // create statement index + std::vector csm_indices; + std::vector csm_loop_vars; + for (size_t i = 0; i < target->shape.size(); i++) { + VarExpr iter(target_buffer->name + std::to_string(i)); + csm_indices.push_back(iter); + csm_loop_vars.push_back(iter); + } + Expr csm_index = getIndex(csm_indices, target->shape); Expr load_expr = Load::make(target->dtype, target_buffer->data, csm_index, @@ -484,19 +487,19 @@ Tensor Schedule::move_to(const Tensor& target, break; } + for (size_t j = 0; j < target->shape.size(); j++) { + consumer_body = For::make( + VarExpr(csm_loop_vars[j]), + 0, target->shape[j], + ForType::Serial, + DeviceAPI::None, + consumer_body); + } + consumer_body = AttrStmt::make( consumer_buffer->data, "device_scope", sender_scope, consumer_body); - // for (size_t j = 0; j < target->shape.size(); j++) { - // consumer_body = For::make( - // VarExpr(csm_loop_vars[j]), - // 0, target->shape[j], - // ForType::Serial, - // DeviceAPI::None, - // consumer_body); - // } - // create new stage and return stream tensors // auto n = std::make_shared(); // n->name = consumer_name; @@ -516,9 +519,9 @@ Tensor Schedule::move_to(const Tensor& target, Stage consumer_stage = Stage(consumer_op); // insert sender before bound for (host,xcel <- host) case if (device_type == DeviceType::FPGA) { - if (split_bound == 0) + if (split_bound == 0) { split_bound = consumer_pos + 1; - else { // insert host sender before bound + } else { // insert host sender before bound consumer_pos = split_bound; split_bound += 1; } @@ -551,7 +554,7 @@ Tensor Schedule::move_to(const Tensor& target, std::vector indices; std::vector loop_vars; for (size_t i = 0; i < target->shape.size(); i++) { - VarExpr iter("i" + std::to_string(i)); + VarExpr iter(target_buffer->name + std::to_string(i)); indices.push_back(iter); loop_vars.push_back(iter); } @@ -586,7 +589,7 @@ Tensor Schedule::move_to(const Tensor& target, size_t pos = FindNodeRef(stages, consumer_stage); if (split_bound == 0 || device_type == DeviceType::CPU) pos = pos + 1; - else pos = split_bound + 1; // insert to xcel range + else pos = split_bound + 1; stages->data.insert(stages->data.begin() + pos, producer_stage.node_); (*this)->stage_map.Set(producer->op, producer_stage); From 1aabf4e0ef8101d37e885a75b8ab088cffcc5eaa Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 9 Dec 2019 18:07:35 -0500 Subject: [PATCH 099/103] [fix] test cases --- HISTORY | 11 +++ python/heterocl/api.py | 2 +- python/heterocl/dsl.py | 1 + python/heterocl/tvm/build_module.py | 27 +++-- python/heterocl/util.py | 26 ----- samples/conv/conv.py | 5 +- tests/test_codegen_aocl.py | 29 ++---- tests/test_codegen_ppac.py | 99 ------------------- tests/test_codegen_soda.py | 2 + tests/test_codegen_vhls.py | 6 +- tvm/src/codegen/build_common.cc | 2 +- tvm/src/codegen/codegen_c.cc | 11 ++- tvm/src/codegen/hlsc/codegen_hlsc.cc | 27 +++-- tvm/src/codegen/hlsc/codegen_vhls.cc | 26 ++--- tvm/src/codegen/opencl/codegen_aocl.cc | 47 +++------ tvm/src/codegen/opencl/codegen_opencl.cc | 2 - tvm/src/schedule/schedule_dataflow_rewrite.cc | 2 +- tvm/src/template/vivado/Makefile | 2 +- 18 files changed, 106 insertions(+), 221 deletions(-) create mode 100644 HISTORY delete mode 100644 tests/test_codegen_ppac.py diff --git a/HISTORY b/HISTORY new file mode 100644 index 000000000..e08d564bc --- /dev/null +++ b/HISTORY @@ -0,0 +1,11 @@ +### 2019-12-09 + * fixed issue of zc706 simulation + * remove kernel-name variable allocation before KernelDef + * change multi-dimension array access to row-major single-dimension access + * create local buffer for each on-device variable + * updated the `KernelUpdater` class (using position index instead of name) + * added `stream_arg_pos` map in `CodeGenC` to facilitate codegen with streaming + * fixed test cases + * changed tvm `build` function to support legacy string type target + * fixed opencl aocl data type mismatching issue + * fixed kernel def data type conversion issue diff --git a/python/heterocl/api.py b/python/heterocl/api.py index 319bf5b1e..f3e2151c8 100644 --- a/python/heterocl/api.py +++ b/python/heterocl/api.py @@ -270,7 +270,7 @@ def lower(schedule): new_inputs.append(i.var) return _lower(schedule.sch, new_inputs, simple_mode=True) -def build(schedule, target=None, name="host_function", stmt=None): +def build(schedule, target=None, name="default_function", stmt=None): """Build the executable according to the schedule and target. The default target is `llvm` (i.e., CPU execution). If stmt is specified, diff --git a/python/heterocl/dsl.py b/python/heterocl/dsl.py index 1dce1c25e..b226cb0ab 100644 --- a/python/heterocl/dsl.py +++ b/python/heterocl/dsl.py @@ -405,6 +405,7 @@ def decorator(fmodule, shapes=shapes, dtypes=dtypes, ret_dtype=ret_dtype, name=n raise APIError("The number of data types does not match the of arguments") for (name_, dtype_) in zip(new_names, dtypes): dtypes.append(util.get_dtype(dtype_, name_)) + dtypes = dtypes[int(len(dtypes)/2):] else: dtype = util.get_dtype(dtypes) dtypes = [] diff --git a/python/heterocl/tvm/build_module.py b/python/heterocl/tvm/build_module.py index 1e3d7bbbb..47b4e31ae 100755 --- a/python/heterocl/tvm/build_module.py +++ b/python/heterocl/tvm/build_module.py @@ -463,6 +463,17 @@ def build_fpga_kernel(sch, args, target, name="default_function"): flist = [flist] fdevice = [ir_pass.LowerIntrin(x, str(target)) for x in flist] + if isinstance(target, str): # string type + builder = getattr(codegen, "build_{0}".format(target)) + ret = builder(fdevice) + if isinstance(ret, str): + decl = ret[:ret.find("{device}")] + start = ret.find("{host}") + end = ret.rfind("{host}") + ret = decl + "\n" + ret[start+6:end] + ret = ret.strip("\n").lstrip("\n") + "\n\n" + return ret + try: # generate and split code host, xcel = None, None if target.tool.name == "sdaccel": @@ -475,13 +486,16 @@ def build_fpga_kernel(sch, args, target, name="default_function"): host = target.host.lang.replace("c", "rv64_ppac") # return simulation built function - if "emu" in str(target.tool.mode) or "sim" in str(target.tool.mode): + mode = str(target.tool.mode) + if "emu" in mode or "sim" in mode: builder = getattr(codegen, "build_{0}".format("sim")) keys = [k for k in target.tool.options.keys()] vals = [v for v in target.tool.options.values()] keys.insert(0, "name") vals.insert(0, target.tool.name) return builder(fdevice, keys, vals) + elif mode != "debug": # impl mode + pass else: # return source code only host_code, xcel_code = "", "" if host: # src mode generate host code @@ -504,7 +518,7 @@ def build(sch, args=None, target=None, target_host=None, - name="host_function", + name="default_function", binds=None, stmt=None): """Build a function with arguments as signiture. @@ -545,12 +559,13 @@ def build(sch, ---- See the note on :any:`tvm.target` on target string format. """ - if target and isinstance(target, str): + if isinstance(target, platform): + return build_fpga_kernel(sch, args, target, name=name) + else: # default string type target target = _target.current_target() if target is None else target target = _target.create(target) if target else _target.create("llvm") - else: # platform target - assert isinstance(target, platform), "unsupported target type" - return build_fpga_kernel(sch, args, target, name=name) + if "fpga" in target.keys: + return build_fpga_kernel(sch, args, target.target_name, name=name) BuildConfig.current = build_config() if isinstance(sch, schedule._Schedule): diff --git a/python/heterocl/util.py b/python/heterocl/util.py index fac15fed1..704b774cb 100644 --- a/python/heterocl/util.py +++ b/python/heterocl/util.py @@ -51,32 +51,6 @@ def get_name(var_type, name=None): VarName.name_dict[var_type] = counter return var_type + str(counter) -def get_device(device, name=None): - """Get the data type by default or from a value. - - Device type of a variable needs to be specified before - the scheduling. - - Parameters - ---------- - dtype: Type or str or None - The specified data type. - - name: str, optional - The name of the variable that will be given a data type. - - Returns - ------- - dtype: str - A data type represented in str. - """ - if Scheme.current is not None: - device_ = Scheme.current.device_dict.get(name) - device = device if device_ is None else device_ - device = config.init_device if device is None else device - return devices.device_to_str(device) - - def get_dtype(dtype, name=None): """Get the data type by default or from a value. diff --git a/samples/conv/conv.py b/samples/conv/conv.py index 9502ffa6c..ca41a50a1 100644 --- a/samples/conv/conv.py +++ b/samples/conv/conv.py @@ -40,8 +40,8 @@ def kernel(input_image, kernel_1, kernel_2): # data moved to local i0, k10, k20 = s.to([image, k1, k2], target.fpga) - s.to([i0, k10], s[kernel.conv1]) - s.to([k20], s[kernel.conv2]) + # s.to([i0, k10], s[kernel.conv1]) + # s.to([k20], s[kernel.conv2]) s.to(kernel.derv, target.cpu) # create stream channel between modules @@ -68,4 +68,3 @@ def kernel(input_image, kernel_1, kernel_2): f = conv() f(hcl_input, kernel_x, kernel_y, hcl_output) - diff --git a/tests/test_codegen_aocl.py b/tests/test_codegen_aocl.py index 8359e9a08..a72d364f2 100644 --- a/tests/test_codegen_aocl.py +++ b/tests/test_codegen_aocl.py @@ -1,6 +1,5 @@ import heterocl as hcl - def test_ap_int(): hcl.init(); A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) @@ -10,10 +9,9 @@ def test_ap_int(): code = hcl.build(s, target='aocl') print (code) assert "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" in code - assert "ap_int<3>intd_t" in code - assert "ap_uint<3>uintd_t" in code - assert "ap_int<8>intd_t" in code - + assert "ap_int<3> intd_t" in code + assert "ap_uint<3> uintd_t" in code + assert "ap_int<8> intd_t" in code def test_pragma(): hcl.init() @@ -35,7 +33,6 @@ def test_pragma(): print (code2) assert "#pragma ii 2" in code2 - def test_reorder(): hcl.init() A = hcl.placeholder((10, 100), "A") @@ -53,8 +50,6 @@ def two_stage(A): code2 = hcl.build(s, target='aocl') print (code2) - - def test_split_fuse(): hcl.init() A = hcl.placeholder((10, 100), "A") @@ -75,8 +70,6 @@ def two_stage(A): code2 = hcl.build(s2, target='aocl') print (code2) - - def test_binary_conv(): hcl.init() A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A") @@ -92,17 +85,15 @@ def test_binary_conv(): s[C].split(C.axis[1], factor=5) code = hcl.build(s, target='aocl') print (code) - assert "for (ap_int<32>intd_t ff_outer = 0; ff_outer < 13; ++ff_outer)" in code - assert "for (ap_int<32>intd_t ff_inner = 0; ff_inner < 5; ++ff_inner)" in code + assert "for (ap_int<32> intd_t ff_outer = 0; ff_outer < 13; ++ff_outer)" in code + assert "for (ap_int<32> intd_t ff_inner = 0; ff_inner < 5; ++ff_inner)" in code assert "if (ff_inner < (64 - (ff_outer * 5)))" in code - - if __name__ == '__main__': - test_ap_int() - test_pragma() - test_reorder() - test_split_fuse() - test_binary_conv() + test_ap_int() + test_pragma() + test_reorder() + test_split_fuse() + test_binary_conv() diff --git a/tests/test_codegen_ppac.py b/tests/test_codegen_ppac.py deleted file mode 100644 index 43b7e04cc..000000000 --- a/tests/test_codegen_ppac.py +++ /dev/null @@ -1,99 +0,0 @@ -import heterocl as hcl -import hlib - -def test_func_print(): - def test_hmm_sim(): - hcl.init() - x = hcl.placeholder((1,), 'x', dtype=hcl.UInt(64)) - y = hcl.placeholder((64,), 'y', dtype=hcl.UInt(64)) - def kernel(X, Y): - return hlib.ppac.hmm_sim(X, Y, name='Z') - s = hcl.create_schedule([x, y], kernel) - f = hcl.build(s, target='rv64_ppac') - code = str(f) - assert 'PPACFunc_HmmSim' in code - - def test_gemm_binary(): - hcl.init() - data = hcl.placeholder((64, 64), 'd', dtype=hcl.UInt(1)) - weight = hcl.placeholder((64, 64), 'w', dtype=hcl.UInt(1)) - def kernel(d, w): - return hlib.ppac.gemm_binary(d, w, 'res') - s = hcl.create_schedule([data, weight], kernel) - f = hcl.build(s, target='rv64_ppac') - code = str(f) - assert 'PPACFunc_GeMMBin' in code - - def test_gemm_multi_bit_unsigned(): - hcl.init() - data = hcl.placeholder((32, 32), 'd', dtype=hcl.UInt(8)) - weight = hcl.placeholder((32, 32), 'w', dtype=hcl.UInt(8)) - def kernel(d, w): - return hlib.ppac.gemm_multi_bit(d, w, 'res') - s = hcl.create_schedule([data, weight], kernel) - f = hcl.build(s, target='rv64_ppac') - code = str(f) - assert 'PPACFunc_GeMMUInt' in code - - def test_gemm_multi_bit_signed(): - hcl.init() - data = hcl.placeholder((32, 32), 'd', dtype=hcl.Int(8)) - weight = hcl.placeholder((32, 32), 'w', dtype=hcl.Int(8)) - def kernel(d, w): - return hlib.ppac.gemm_multi_bit(d, w, 'res') - s = hcl.create_schedule([data, weight], kernel) - f = hcl.build(s, target='rv64_ppac') - code = str(f) - assert 'PPACFunc_GeMMSInt' in code - - test_hmm_sim() - test_gemm_binary() - test_gemm_multi_bit_unsigned() - test_gemm_multi_bit_signed() - -def test_tile(): - def test_hmm_sim(): - hcl.init() - b_n = 10 - d_n = 256 - X = hcl.placeholder((b_n,), 'X', dtype=hcl.UInt(64)) - Y = hcl.placeholder((d_n,), 'Y', dtype=hcl.UInt(64)) - def kernel(X, Y): - return hlib.ppac.hmm_sim(X, Y, name='Z') - s = hcl.create_schedule([X, Y], kernel) - ir = str(hcl.lower(s)) - assert ('\"_batch_num\"=' + str(b_n)) in ir - assert ('\"_in_block_num\"=' + str(1)) in ir - assert ('\"_out_channel_num\"=' + str(d_n)) in ir - - def test_gemm_binary(): - hcl.init() - b_n, i_c, o_c = 64, 256, 256 - ppac_config = hlib.ppac.PPAC_config(multi_bit=False) - data = hcl.placeholder((b_n, i_c), 'd', dtype=hcl.UInt(1)) - weight = hcl.placeholder((o_c, i_c), 'w', dtype=hcl.UInt(1)) - def kernel(d, w): - return hlib.ppac.gemm_binary(d, w, 'res') - s = hcl.create_schedule([data, weight], kernel) - ir = str(hcl.lower(s)) - assert ('\"_batch_num\"=' + str(b_n)) in ir - assert ('\"_in_block_num\"=' + str(i_c // ppac_config.elem_num)) in ir - assert ('\"_out_channel_num\"=' + str(o_c)) in ir - - def test_gemm_multi_bit(): - hcl.init() - b_n, i_c, o_c = 64, 256, 256 - ppac_config = hlib.ppac.PPAC_config(multi_bit=True) - data = hcl.placeholder((b_n, i_c), 'd', dtype=hcl.Int(8)) - weight = hcl.placeholder((o_c, i_c), 'w', dtype=hcl.Int(8)) - def kernel(d, w): - return hlib.ppac.gemm_multi_bit(d, w, 'res') - s = hcl.create_schedule([data, weight], kernel) - ir = str(hcl.lower(s)) - assert ('\"_batch_num\"=' + str(b_n)) in ir - assert ('\"_in_block_num\"=' + str(i_c // ppac_config.elem_num)) in ir - assert ('\"_out_channel_num\"=' + str(o_c)) in ir - - test_hmm_sim() - test_gemm_binary() - test_gemm_multi_bit() \ No newline at end of file diff --git a/tests/test_codegen_soda.py b/tests/test_codegen_soda.py index 56fb8df77..492ee6146 100644 --- a/tests/test_codegen_soda.py +++ b/tests/test_codegen_soda.py @@ -52,6 +52,7 @@ def test_blur(self): img_t(0, 0) = uint16((int32((uint18((uint17(img_i(-1, 0)) + uint17(img_i(0, 0)))) + uint18(img_i(1, 0)))) / 3)) output uint16: img_o(0, 0) = uint16((int32((uint18((uint17(img_t(0, -1)) + uint17(img_t(0, 0)))) + uint18(img_t(0, 1)))) / 3)) + ''') def test_gaussian(self): @@ -76,6 +77,7 @@ def test_gaussian(self): reduce_ssa3 = float32(((float64(img_i(-1, 0)) * 3699.65) + float64(reduce_ssa2))) reduce_ssa4 = float32(((float64(img_i(0, 0)) * 4620.30) + float64(reduce_ssa3))) img_o(0, 0) = reduce_ssa4 + ''' ) diff --git a/tests/test_codegen_vhls.py b/tests/test_codegen_vhls.py index dadae5068..a6385975b 100644 --- a/tests/test_codegen_vhls.py +++ b/tests/test_codegen_vhls.py @@ -85,7 +85,7 @@ def test_index_split(): s = hcl.create_schedule([A, B]) s[B].split(B.axis[0], 5) code = hcl.build(s, target="vhls") - assert "B[(y_inner + (y_outer * 5))][x]" in code + assert "B[(x + ((y_inner + (y_outer * 5)) * 10))]" in code def test_index_split_reshape(): hcl.init() @@ -95,7 +95,7 @@ def test_index_split_reshape(): s[B].split(B.axis[0], 5) s.reshape(B, (2, 5, 10)) code = hcl.build(s, target="vhls") - assert "B[y_outer][y_inner][x]" in code + assert "B[(x + ((y_inner + (y_outer * 5)) * 10))]" in code def test_index_fuse(): hcl.init() @@ -104,7 +104,7 @@ def test_index_fuse(): s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target="vhls") - assert "B[(y_x_fused / 10)][(y_x_fused % 10)]" in code + assert "B[y_x_fused]" in code def test_binary_conv(): hcl.init() diff --git a/tvm/src/codegen/build_common.cc b/tvm/src/codegen/build_common.cc index f7c5ce9cd..8bdbf7e98 100644 --- a/tvm/src/codegen/build_common.cc +++ b/tvm/src/codegen/build_common.cc @@ -92,7 +92,7 @@ class SimModuleNode final : public ModuleNode { GenHostCode(args, shmids, arg_types, func_, platform_, host_, arg_info_); GenKernelCode(dev_); - // system("cd __tmp__; make csim"); + system("cd __tmp__; make csim"); } else { LOG(FATAL) << "unrecognized platform " << platform_; } diff --git a/tvm/src/codegen/codegen_c.cc b/tvm/src/codegen/codegen_c.cc index 8a0b5e29e..006edf933 100644 --- a/tvm/src/codegen/codegen_c.cc +++ b/tvm/src/codegen/codegen_c.cc @@ -207,7 +207,16 @@ std::string CodeGenC::GetDevice() { // process device code PreProcess(device); - device << device_stream.str(); + // remove the kernel name alloc + auto text = device_stream.str(); + for (auto const& m : stream_arg_pos) { + std::string alloc = m.first + ";"; + size_t nFPos = text.find(alloc); + size_t secondNL = text.find('\n', nFPos); + size_t firstNL = text.rfind('\n', nFPos); + text.erase(firstNL, secondNL - firstNL); + } + device << text; PostProcess(device); if (fpga_scope_) device << stream.str(); diff --git a/tvm/src/codegen/hlsc/codegen_hlsc.cc b/tvm/src/codegen/hlsc/codegen_hlsc.cc index 880c258f9..d7fc610d7 100644 --- a/tvm/src/codegen/hlsc/codegen_hlsc.cc +++ b/tvm/src/codegen/hlsc/codegen_hlsc.cc @@ -70,13 +70,15 @@ std::string CodeGenHLSC::GetBufferRef(Type t, const Variable* buffer, Expr index if (is_scalar) { os << vid; } else { - os << vid; - std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); - for (size_t i = 0; i < indices.size(); i++) { - os << '['; - PrintExpr(indices[i], os); - os << ']'; - } + os << vid << "["; + PrintExpr(index, os); + os << "]"; + // std::vector indices = ExtractIndices(index, var_shape_map_[buffer], range_); + // for (size_t i = 0; i < indices.size(); i++) { + // os << '['; + // PrintExpr(indices[i], os); + // os << ']'; + // } } } return os.str(); @@ -180,21 +182,18 @@ void CodeGenHLSC::VisitStmt_(const Allocate* op) { std::string scope = alloc_storage_scope_.at(buffer); PrintStorageScope(scope, stream); - // initlize hls stream channel if (vid.find("stream_") != std::string::npos) { - void(0); - // stream << "hls::stream<"; - // PrintType(op->type, stream); - // stream << "> " << vid << ";\n"; + void(0); // alloc stream channel in pre-processing } else { PrintType(op->type, stream); stream << ' '<< vid; if (constant_size > 1) {// Transfer length one array to scalar + stream << "["; for (size_t i = 0; i < op->extents.size(); i++) { - stream << '['; PrintExpr(op->extents[i], stream); - stream << "]"; + if (i != op->extents.size()-1) stream << "*"; } + stream << "]"; } stream << ";\n"; } diff --git a/tvm/src/codegen/hlsc/codegen_vhls.cc b/tvm/src/codegen/hlsc/codegen_vhls.cc index e9c915576..f944bef83 100644 --- a/tvm/src/codegen/hlsc/codegen_vhls.cc +++ b/tvm/src/codegen/hlsc/codegen_vhls.cc @@ -64,7 +64,7 @@ void CodeGenVivadoHLS::PreProcess(std::ostringstream& os) { for (int k = 0; k < indent; k++) os << ' '; os << "}\n"; } - } else if (i == arg_vars.size() - 1) { + } else if (i == arg_vars.size() - 1 || true) { // allocate for return variable for (int k = 0; k < indent; k++) os << ' '; PrintType(dtype, os); @@ -149,16 +149,12 @@ void CodeGenVivadoHLS::VisitStmt_(const Store* op) { << "[" << PrintExpr(sb->index) << "] = " << PrintExpr(sb->value) << ";\n"; } else if (const StreamExpr* se = op->value.as()) { - if (!fpga_scope_) { - std::string vid = GetVarID(se->buffer_var.get()); - vid = vid.substr(0, vid.find("_stream_send")); - PrintIndent(); - this->stream << vid << "[" - << op->index << "] = " - << "fd_" << vid << ".read();\n"; - } else { - CodeGenC::VisitStmt_(op); - } + std::string vid = GetVarID(se->buffer_var.get()); + vid = vid.substr(0, vid.find("_stream_send")); + PrintIndent(); + this->stream << vid << "[" + << op->index << "] = " + << "fd_" << vid << ".read();\n"; } else { CodeGenC::VisitStmt_(op); } @@ -380,7 +376,13 @@ void CodeGenVivadoHLS::VisitStmt_(const KernelDef* op) { stream << ">& " << vid; } else { PrintType(type, stream); - this->stream << "* " << vid; + this->stream << " " << vid << "["; + int mul = 1; + for (size_t j = 0; j < op->api_args[i].size(); j++) { + auto dim = op->api_args[i][j].as()->value; + mul = mul * dim; + } + this->stream << mul << "]"; } } stream << ") {\n"; diff --git a/tvm/src/codegen/opencl/codegen_aocl.cc b/tvm/src/codegen/opencl/codegen_aocl.cc index 8d2329e98..6d3247d02 100644 --- a/tvm/src/codegen/opencl/codegen_aocl.cc +++ b/tvm/src/codegen/opencl/codegen_aocl.cc @@ -55,6 +55,7 @@ void CodeGenAOCL::AddFunction(LoweredFunc f, } this->decl_stream << "#include \"ihc_apint.h\"" << "\n"; + this->decl_stream << "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable\n"; this->stream << "__kernel " << "void " << f->name << "("; // Write arguments @@ -97,15 +98,14 @@ void CodeGenAOCL::PrintType(Type t, std::ostream &os) if(t.is_handle()) { os << "void*";return; } - if(t==Bool()) { + if(t == Bool()) { os <<"bool"; return; } - CHECK_EQ(lanes,1) + CHECK_EQ(lanes, 1) << "do not yet support vector types"; bool fail = false; - if(t.is_float()) - { + if(t.is_float()) { switch(t.bits()) { case 16: @@ -123,47 +123,30 @@ void CodeGenAOCL::PrintType(Type t, std::ostream &os) fail = true; break; } - if(!fail && lanes ==1)return; + if(!fail && lanes ==1) return; if(!fail&&(lanes >= 2 && lanes <=16)) { os<=2 && lanes <= 16)) { + os << lanes; return; } - if(!fail && lanes == 1)return; - if(!fail && (lanes >=2 && lanes <= 16)) - { - os< 64) { os << "uint" << "64" << "_t"; return; } else { - os<< "uint"< uintd_t"; return; } } - if(t.is_int()) - { + if(t.is_int()) { if (t.bits() > 64) { os << "int" << "64" << "_t"; return; } else { - os << "int" << t.bits() << "_t"; return; + os << "ap_int<" << t.bits() << "> intd_t"; return; } } } diff --git a/tvm/src/codegen/opencl/codegen_opencl.cc b/tvm/src/codegen/opencl/codegen_opencl.cc index ddc1131f8..979a19e0f 100755 --- a/tvm/src/codegen/opencl/codegen_opencl.cc +++ b/tvm/src/codegen/opencl/codegen_opencl.cc @@ -160,8 +160,6 @@ void CodeGenOpenCL::VisitStmt_(const LetStmt* op) { this->stream << ' ' << vid << " = " << value << ";\n"; - } else if (value.find(".data)") !=0) { - var_idmap_[op->var.get()] = "arg_top_" + vid; } PrintStmt(op->body); } diff --git a/tvm/src/schedule/schedule_dataflow_rewrite.cc b/tvm/src/schedule/schedule_dataflow_rewrite.cc index 6db9f542b..a7fc8ee72 100644 --- a/tvm/src/schedule/schedule_dataflow_rewrite.cc +++ b/tvm/src/schedule/schedule_dataflow_rewrite.cc @@ -70,8 +70,8 @@ Expr getIndex(std::vector indices, const Array shape) { Expr ret = indices[0]; Expr mul = 1; for (size_t i = 1; i < indices.size(); i++) { - mul = Simplify(mul * shape[i]); ret = Simplify(ret + indices[i] * mul); + mul = Simplify(mul * shape[i]); } return ret; } diff --git a/tvm/src/template/vivado/Makefile b/tvm/src/template/vivado/Makefile index 2afd05040..1d84baead 100644 --- a/tvm/src/template/vivado/Makefile +++ b/tvm/src/template/vivado/Makefile @@ -16,7 +16,7 @@ CFLAGS = -g -I${VHLS_INC} all: csim -csim: kernel.cpp host.cpp +csim: host.cpp @echo "Compiling & simulating on amdpool ..." g++ ${CFLAGS} $^ -o out -lrt ./out From dd3e2a831a6b96e4278a12c38e7d39bd6e5d3696 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 9 Dec 2019 18:48:14 -0500 Subject: [PATCH 100/103] [fix] python compatibility --- python/heterocl/devices.py | 3 +- samples/kmeans/kmeans_main.py | 28 ++------- samples/lenet/lenet_main.py | 54 ++++++---------- samples/smith_waterman/smith_waterman_main.py | 61 ++----------------- 4 files changed, 30 insertions(+), 116 deletions(-) diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 734c45b8c..6b7148df0 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -2,6 +2,7 @@ #pylint: disable=too-few-public-methods, too-many-return-statements from .debug import DeviceError from .tools import option_table, model_table +from future.utils import with_metaclass class tooling(type): def __getattr__(cls, key): @@ -10,7 +11,7 @@ def __getattr__(cls, key): else: # unsupported device raise DeviceError("not supported") -class tool(metaclass=tooling): +class tool(with_metaclass(tooling, object)): """The base class for all device tooling mode (sim/impl) is decided by tool configuration diff --git a/samples/kmeans/kmeans_main.py b/samples/kmeans/kmeans_main.py index cbb634a4d..d648ce31c 100644 --- a/samples/kmeans/kmeans_main.py +++ b/samples/kmeans/kmeans_main.py @@ -13,19 +13,12 @@ ############################################################################## # Define the number of the clustering means as K, the number of points as N, # the number of dimensions as dim, and the number of iterations as niter -# K = 16 -# N = 320 -# dim = 32 -# niter = 200 - -K = 6 -N = 32 -dim = 3 -niter = 10 +K = 16 +N = 320 +dim = 32 +niter = 200 hcl.init() -#hcl.init(hcl.Float()) - ############################################################################## # Main Algorithm @@ -73,19 +66,6 @@ def calc_sum(n): return hcl.build(s, target=target) f = top() -# code = top('merlinc') -# with open('merlinc_code.cl', 'w') as f: -# f.write(code) -# code2 = top('sdaccel') -# with open('sdaccel_code.cl', 'w') as f: -# f.write(code2) -# code3 = top('vhls') -# with open('vhls_code.cl', 'w') as f: -# f.write(code3) -code = top('aocl') -with open('kmeans_aocl.cl', 'w') as f: - f.write(code) -assert 1==2 points_np = np.random.randint(100, size=(N, dim)) labels_np = np.zeros(N) diff --git a/samples/lenet/lenet_main.py b/samples/lenet/lenet_main.py index b3f29c42e..a3bdc8282 100644 --- a/samples/lenet/lenet_main.py +++ b/samples/lenet/lenet_main.py @@ -67,8 +67,7 @@ def build_lenet(input_image, weight_conv1, weight_conv2, qtype1 = hcl.Fixed(16, 14) qtype2 = hcl.Fixed(16, 14) correct_sum = 0 -# batch_size = 1000 -batch_size = 1 +batch_size = 1000 mnist = mx.test_utils.get_mnist() ############################################################################### @@ -77,38 +76,23 @@ def build_lenet(input_image, weight_conv1, weight_conv2, # the internal tensors, we use `hcl.quantize` API. def build_lenet_inf(batch_size=batch_size, target=None): # set up input/output placeholders - #input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") - input_image = hcl.placeholder((batch_size, 1, 3, 3), "input_image") - # weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) - weight_conv1 = hcl.placeholder((1, 1, 5, 5), "weight_conv1") - # weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1") - weight_conv2 = hcl.placeholder((10, 1, 5, 5), "weight_conv2") - # weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2") - # weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1) - weight_fc1 = hcl.placeholder((25, 40), "weight_fc1") - # weight_fc1 = hcl.placeholder((500, 800), "weight_fc1") - # weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1) - weight_fc2 = hcl.placeholder((10, 25), "weight_fc2") - # weight_fc2 = hcl.placeholder((10, 500), "weight_fc2") + input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") + weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) + weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1) + weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1) + weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1) lenet = hcl.placeholder((batch_size, 10), "lenet") # create a quantization scheme - # scheme = hcl.create_scheme( - # [input_image, weight_conv1, weight_conv2, - # weight_fc1, weight_fc2, lenet], build_lenet) + scheme = hcl.create_scheme( + [input_image, weight_conv1, weight_conv2, + weight_fc1, weight_fc2, lenet], build_lenet) # quantize the three activation layers - #scheme.quantize( - # [build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3], qtype2) - #s = hcl.create_schedule_from_scheme(scheme) - s = hcl.create_schedule([input_image, weight_conv1, weight_conv2, - weight_fc1, weight_fc2, lenet], build_lenet) + scheme.quantize( + [build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3], qtype2) + s = hcl.create_schedule_from_scheme(scheme) return hcl.build(s, target=target) -# f = build_lenet_inf() -code = build_lenet_inf(batch_size, 'aocl') -with open('lenet_aocl.cl', 'w') as f: - f.write(code) -assert 1==2 - +f = build_lenet_inf() ############################################################################### # Prepare the numpy arrays for testing. Remember that we need to set the input @@ -135,11 +119,11 @@ def build_lenet_inf(batch_size=batch_size, target=None): # remove downloaded files import os -#os.remove("t10k-images-idx3-ubyte.gz") -#os.remove("t10k-labels-idx1-ubyte.gz") -#os.remove("train-images-idx3-ubyte.gz") -#os.remove("train-labels-idx1-ubyte.gz") -#os.remove("lenet-0010.params") -#os.remove("lenet-symbol.json") +os.remove("t10k-images-idx3-ubyte.gz") +os.remove("t10k-labels-idx1-ubyte.gz") +os.remove("train-images-idx3-ubyte.gz") +os.remove("train-labels-idx1-ubyte.gz") +os.remove("lenet-0010.params") +os.remove("lenet-symbol.json") assert correct_sum == 9882 diff --git a/samples/smith_waterman/smith_waterman_main.py b/samples/smith_waterman/smith_waterman_main.py index 9a50d2a89..f1300ee54 100644 --- a/samples/smith_waterman/smith_waterman_main.py +++ b/samples/smith_waterman/smith_waterman_main.py @@ -10,12 +10,9 @@ import numpy as np import time -#lenA = 128 -lenA = 28 -#lenB = 128 -lenB = 28 -#num = 1024 -num = 64 +lenA = 128 +lenB = 128 +num = 1024 penalty = -4 hcl.init() @@ -121,18 +118,12 @@ def batch_sw(seqAs, seqBs, outAs, outBs): outAs = hcl.placeholder((num, lenA+lenB), "outAs", dtype) outBs = hcl.placeholder((num, lenA+lenB), "outBs", dtype) - # seqAs = hcl.placeholder((num, lenA), "seqAs") - # seqBs = hcl.placeholder((num, lenB,), "seqBs") - # outAs = hcl.placeholder((num, lenA+lenB), "outAs") - # outBs = hcl.placeholder((num, lenA+lenB), "outBs") - scheme = hcl.create_scheme([seqAs, seqBs, outAs, outBs], batch_sw) scheme.downsize([batch_sw.B.matrix, batch_sw.B.action], mtype) s = hcl.create_schedule_from_scheme(scheme) o, p = s[batch_sw.B].split(batch_sw.B.axis[0], factor=32) s[batch_sw.B].pipeline(o) - # s[batch_sw.B].parallel(p) - s[batch_sw.B].unroll(p) + s[batch_sw.B].parallel(p) return hcl.build(s, target=target) ############################################################################### @@ -142,41 +133,7 @@ def batch_sw(seqAs, seqBs, outAs, outBs): _consA = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) _consB = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) -# _seqA = hcl.asarray(np.random.randint(1, 5, size=(num, lenA))) -# _seqB = hcl.asarray(np.random.randint(1, 5, size=(num, lenB))) -# _consA = hcl.asarray(np.zeros((num, (lenA + lenB)))) -# _consB = hcl.asarray(np.zeros((num, (lenA + lenB)))) - - - - -# f = top() -code = top('sdaccel'); -with open('sdaccel_code.cl', 'w') as f: - f.write(code) - -code2 = top('aocl') -with open('smith_aocl.cl', 'w') as fin: - fin.write(code2) - -code3 = top('vhls') -with open('smith_vhls.cl', 'w') as fin: - fin.write(code3) - -assert 1==2 - - -# code3 = top('vhls'); -# with open('vhls_code.cl', 'w') as f: -# f.write(code3) - - -# code2 = top('merlinc') -# with open('merlinc_code.cl', 'w') as f: -# f.write(code2) - - - +f = top() start = time.time() f(_seqA, _seqB, _consA, _consB) total_time = time.time() - start @@ -192,17 +149,9 @@ def batch_sw(seqAs, seqBs, outAs, outBs): _seqB = hcl.asarray(_seqB_np, dtype) _consA = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) _consB = hcl.asarray(np.zeros((num, (lenA + lenB))), dtype) - -# _seqA = hcl.asarray(_seqA_np) -# _seqB = hcl.asarray(_seqB_np) -# _consA = hcl.asarray(np.zeros((num, (lenA + lenB)))) -# _consB = hcl.asarray(np.zeros((num, (lenA + lenB)))) - - f(_seqA, _seqB, _consA, _consB) _consA_np = _consA.asnumpy() _consB_np = _consB.asnumpy() - for i in range(0, 256): if i < 124: assert _consA_np[0][i] == 1 From eefae89a37fdc557dd5769063f3d0292c5e4f8a8 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 9 Dec 2019 22:02:53 -0500 Subject: [PATCH 101/103] [update] future --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index ba119097d..4fa87539c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,6 +11,7 @@ test: &test key: v1.03-libhcl- - run: make build-python - run: pip install --user pytest + - run: pip install --user future - run: python -m pytest tests - run: pip install --user mxnet - run: python -m pytest samples From e53cb1eaf37256dce849ed096299261ab0312389 Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 9 Dec 2019 22:06:22 -0500 Subject: [PATCH 102/103] [fix] metaclass --- python/heterocl/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/heterocl/devices.py b/python/heterocl/devices.py index 6b7148df0..a5d81df86 100644 --- a/python/heterocl/devices.py +++ b/python/heterocl/devices.py @@ -175,7 +175,7 @@ def __getattr__(cls, key): tool = tool_table[key] return cls(key, devs, host, xcel, tool) -class platform(metaclass=env): +class platform(with_metaclass(env, object)): def __init__(self, name, devs, host, xcel, tool): self.name = name self.devs = devs From 378069f55c0118f8aca349dc0eea1112bad18b6d Mon Sep 17 00:00:00 2001 From: Hecmay Date: Mon, 9 Dec 2019 22:19:46 -0500 Subject: [PATCH 103/103] [fix] test import issue --- samples/digitrec/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 samples/digitrec/__init__.py diff --git a/samples/digitrec/__init__.py b/samples/digitrec/__init__.py new file mode 100644 index 000000000..e69de29bb