Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Relay][External Codegen] Support data types for CSourceModuleCodegen args and output #4934

Merged
merged 7 commits into from
Feb 25, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 34 additions & 23 deletions src/relay/backend/contrib/codegen_c/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
explicit CodegenC(const std::string& id) { this->ext_func_id_ = id; }

void VisitExpr_(const VarNode* node) {
ext_func_args_.push_back(node->name_hint());
ext_func_args_.push_back(GetRef<Var>(node));
out_.clear();
out_.push_back({node->name_hint(), 0});
Output output;
output.name = node->name_hint();
out_.push_back(output);
}

void VisitExpr_(const CallNode* call) final {
Expand All @@ -70,6 +72,12 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
for (size_t i = 0; i < in_shape.size(); ++i) {
macro_stream << ", " << in_shape[i];
}

auto type_node = call->checked_type().as<TensorTypeNode>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const auto* type_node

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

CHECK(type_node);
const auto& dtype = GetDtypeString(type_node);
macro_stream << ", " << dtype;

macro_stream << ");";
func_decl_.push_back(macro_stream.str());

Expand All @@ -83,28 +91,31 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
decl_stream << ", ";
}
first = false;
decl_stream << out.first;
decl_stream << out.name;
}
}

auto type_node = call->checked_type().as<TensorTypeNode>();
CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
<< "Only support single output tensor with float type";
std::string out = "buf_" + std::to_string(buf_idx_++);
auto out_shape = GetShape(call->checked_type());
int out_size = 1;
for (size_t i = 0; i < out_shape.size(); ++i) {
out_size *= out_shape[i];
}
buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");";
buf_stream << dtype << "* " << out <<
" = (" << dtype << "*)std::malloc(4 * " << out_size << ");";
buf_decl_.push_back(buf_stream.str());

decl_stream << ", " << out << ");";
ext_func_body.push_back(decl_stream.str());

// Update output buffer
out_.clear();
out_.push_back({out, out_size});
Output output;
output.name = out;
output.dtype = dtype;
output.need_copy = true;
output.size = out_size;
out_.push_back(output);
}

/*!
Expand All @@ -128,15 +139,15 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
/*! \brief The index of allocated buffers. */
int buf_idx_ = 0;
/*! \brief The arguments of a C compiler compatible function. */
std::vector<std::string> ext_func_args_;
Array<Var> ext_func_args_;
/*! \brief The statements of a C compiler compatible function. */
std::vector<std::string> ext_func_body;
/*! \brief The declaration statements of a C compiler compatible function. */
std::vector<std::string> func_decl_;
/*! \brief The declaration statements of buffers. */
std::vector<std::string> buf_decl_;
/*! \brief The name and index pairs for output. */
std::vector<std::pair<std::string, int>> out_;
std::vector<Output> out_;
};

class CSourceCodegen : public CSourceModuleCodegenBase {
Expand All @@ -161,21 +172,21 @@ class CSourceCodegen : public CSourceModuleCodegenBase {

// Append some common macro for operator definition.
const char* operator_macro = R"op_macro(
#define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_) \
extern "C" void p_ID_(float* a, float* b, float* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
out[i] = a[i] p_OP_ b[i]; \
} \
#define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_, p_DTYPE) \
extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
out[i] = a[i] p_OP_ b[i]; \
} \
}

#define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_) \
extern "C" void p_ID_(float* a, float* b, float* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
for (int64_t j = 0; j < p_DIM2_; ++j) { \
int64_t k = i * p_DIM2_ + j; \
out[k] = a[k] p_OP_ b[k]; \
} \
} \
#define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_, p_DTYPE) \
extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
for (int64_t j = 0; j < p_DIM2_; ++j) { \
int64_t k = i * p_DIM2_ + j; \
out[k] = a[k] p_OP_ b[k]; \
} \
} \
}
)op_macro";

Expand Down
89 changes: 68 additions & 21 deletions src/relay/backend/contrib/codegen_c/codegen_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ namespace tvm {
namespace relay {
namespace contrib {

struct Output {
std::string name;
std::string dtype;
int size;
bool need_copy;
};

class CSourceModuleCodegenBase {
public:
CSourceModuleCodegenBase() = default;
Expand Down Expand Up @@ -98,7 +105,7 @@ class CodegenCBase {
* \brief Gerenate C code for the external function.
*
* \param func_name The name of the external function.
* \param arg_cnt The expected number of arguments.
* \param args arguments to the external function.
*
* \code
*
Expand All @@ -116,29 +123,30 @@ class CodegenCBase {
*
* \endcode
*/
void GenerateBackendCFunc(const std::string& func_name, int arg_cnt) {
void GenerateBackendCFunc(const std::string& func_name, Array<Var> args, const Output& out) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const Array& args

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

// Print signature
code_stream_ << "\n";
code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";
for (int i = 0; i < arg_cnt - 1; i++) {
for (size_t i = 0; i < args.size(); i++) {
code_stream_ << "DLTensor* arg" << i << ",\n";
code_stream_ << "\t";
}
if (arg_cnt > 0) {
code_stream_ << "DLTensor* arg" << arg_cnt - 1 << ") {\n";
if (args.size() > 0) {
code_stream_ << "DLTensor* arg" << args.size() << ") {\n";
}

EnterScope();

// Generate the internal call.
PrintIndents();
code_stream_ << func_name << "_(";
for (int i = 0; i < arg_cnt - 1; i++) {
code_stream_ << "static_cast<float*>(arg" << i << "->data),\n";
for (size_t i = 0; i < args.size(); i++) {
const auto& dtype_str = GetDtypeString(args[i]);
code_stream_ << "static_cast<" << dtype_str << "*>(arg" << i << "->data),\n";
PrintIndents();
}
if (arg_cnt > 0) {
code_stream_ << "static_cast<float*>(arg" << arg_cnt - 1 << "->data)";
if (args.size() > 0) {
code_stream_ << "static_cast<" << out.dtype << "*>(arg" << args.size() << "->data)";
}
code_stream_ << ");\n";
PrintIndents();
Expand Down Expand Up @@ -207,17 +215,20 @@ class CodegenCBase {
*
* \return The emitted code string.
*/
std::string JitImpl(std::string ext_func_id, std::vector<std::string> args,
std::string JitImpl(std::string ext_func_id, Array<Var> args,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also use const reference here for all parameters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

std::vector<std::string> buf_decl, std::vector<std::string> body,
std::vector<std::pair<std::string, int>> out) {
std::vector<Output> out) {
// Create the signature. For example, it could be:
// extern "C" void dnnl_0_(float* input0, float* input1, float* out, int M, int N) {}
code_stream_ << "extern \"C\" void " << ext_func_id << "_(";

CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support.";

for (const auto& arg : args) {
code_stream_ << "float* " << arg << ", ";
const auto& dtype_str = GetDtypeString(arg);
code_stream_ << dtype_str << "* " << arg->name_hint() << ", ";
}
code_stream_ << "float* out) {\n";
code_stream_ << out[0].dtype << "* out) {\n";
this->EnterScope();

// Function body
Expand All @@ -232,24 +243,60 @@ class CodegenCBase {
}

// Copy output
CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support.";
this->PrintIndents();
code_stream_ << "std::memcpy(out, " << out[0].first << ", 4 * " << out[0].second << ");\n";

// Free buffers
for (size_t i = 0; i < buf_decl.size(); i++) {
if (out[0].need_copy) {
this->PrintIndents();
code_stream_ << "std::free(buf_" << i << ");\n";
code_stream_ << "std::memcpy(out, " << out[0].name << ", 4 * " << out[0].size << ");\n";

// Free buffers
for (size_t i = 0; i < buf_decl.size(); i++) {
this->PrintIndents();
code_stream_ << "std::free(buf_" << i << ");\n";
}
}

this->ExitScope();
code_stream_ << "}\n";

// Create the wrapper to call the ext_func
this->GenerateBackendCFunc(ext_func_id, args.size() + 1 /* output */);
this->GenerateBackendCFunc(ext_func_id, args, out[0]);
return code_stream_.str();
}

/*!
* \brief Returns dtype string
*
* \param var Var to get the dtype of
*
* \return The dtype string.
*/
std::string GetDtypeString(Var var) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const Var&

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

auto ttype = var->checked_type().as<TensorTypeNode>();
CHECK(ttype) << "Expect TensorTypeNode";
return GetDtypeString(ttype);
}

/*!
* \brief Returns dtype string
*
* \param ttype TensorTypeNode* to get the dtype of
*
* \return The dtype string.
*/
std::string GetDtypeString(const TensorTypeNode* ttype) {
std::string dtype;
if (runtime::TypeMatch(ttype->dtype, kDLFloat, 32)) {
dtype = "float";
} else if (runtime::TypeMatch(ttype->dtype, kDLInt, 32)) {
dtype = "int";
} else if (runtime::TypeMatch(ttype->dtype, kDLInt, 64)) {
dtype = "int64_t";
} else {
LOG(FATAL) << "Unsupported dtype " << ttype->dtype;
}

return dtype;
}

/*! \brief The external function source code stream. */
std::ostringstream code_stream_;

Expand Down
23 changes: 15 additions & 8 deletions src/relay/backend/contrib/dnnl/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
explicit CodegenDNNL(const std::string& id) { this->ext_func_id_ = id; }

void VisitExpr_(const VarNode* node) final {
ext_func_args_.push_back(node->name_hint());
ext_func_args_.push_back(GetRef<Var>(node));
out_.clear();
out_.push_back({node->name_hint(), 0});
Output output;
output.name = node->name_hint();
out_.push_back(output);
}

void VisitExpr_(const TupleGetItemNode* op) final {
Expand Down Expand Up @@ -90,14 +92,14 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
decl_stream << ", ";
}
first = false;
decl_stream << out.first;
decl_stream << out.name;
}
}

// Analyze the output buffer
auto type_node = call->checked_type().as<TensorTypeNode>();
CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
<< "Only support single output tensor with float type";
CHECK(type_node);
const auto& dtype = GetDtypeString(type_node);
std::string out = "buf_" + std::to_string(buf_idx_++);
auto out_shape = GetShape(call->checked_type());
int out_size = 1;
Expand All @@ -118,7 +120,12 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {

// Update output buffer
out_.clear();
out_.push_back({out, out_size});
Output output;
output.name = out;
output.dtype = dtype;
output.need_copy = true;
output.size = out_size;
out_.push_back(output);
}

std::string JIT(void) {
Expand Down Expand Up @@ -213,13 +220,13 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
*/
int buf_idx_{0};
/*! \brief The arguments used by a wrapped function that calls DNNL kernels. */
std::vector<std::string> ext_func_args_;
Array<Var> ext_func_args_;
/*! \brief statement of the function that will be compiled using DNNL kernels. */
std::vector<std::string> ext_func_body;
/*! \brief The declaration of intermeidate buffers. */
std::vector<std::string> buf_decl_;
/*! \brief The name of the the outputs. */
std::vector<std::pair<std::string, int>> out_;
std::vector<Output> out_;
};

/*!
Expand Down
18 changes: 18 additions & 0 deletions tests/python/relay/test_external_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,23 @@ def test_extern_gcc_single_op():
check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data)


def test_extern_gcc_single_op_int():
x = relay.var('x', shape=(8, 8), dtype="int32")
y = relay.var('y', shape=(8, 8), dtype="int32")

x0 = relay.var('x0', shape=(8, 8), dtype="int32")
y0 = relay.var('y0', shape=(8, 8), dtype="int32")
z = x0 + y0
f = relay.Function([x0, y0], z)
f = set_external_func_attr(f, "ccompiler", "ccompiler_0")
call = relay.Call(f, [x, y])
mod = tvm.IRModule.from_expr(call)
x_data = np.random.rand(8, 8).astype('int32')
y_data = np.random.rand(8, 8).astype('int32')

check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data)


def test_extern_gcc():
x = relay.var('x', shape=(2, 2))
y = relay.var('y', shape=(2, 2))
Expand Down Expand Up @@ -242,5 +259,6 @@ def test_extern_dnnl():
if __name__ == "__main__":
test_multi_node_subgraph()
test_extern_gcc_single_op()
test_extern_gcc_single_op_int()
test_extern_gcc()
test_extern_dnnl()