Skip to content

Commit

Permalink
[Relay][External Codegen] Support data types for CSourceModuleCodegen…
Browse files Browse the repository at this point in the history
… args and output (apache#4934)

* Support int args and no extra buffers

* Fixes

* remove testing code

* fix style

* more style

* use const args

* style

Co-authored-by: Jon Soifer <[email protected]>
  • Loading branch information
2 people authored and zhiics committed Mar 2, 2020
1 parent dc2dcf7 commit d420abf
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 53 deletions.
57 changes: 34 additions & 23 deletions src/relay/backend/contrib/codegen_c/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
explicit CodegenC(const std::string& id) { this->ext_func_id_ = id; }

void VisitExpr_(const VarNode* node) {
ext_func_args_.push_back(node->name_hint());
ext_func_args_.push_back(GetRef<Var>(node));
out_.clear();
out_.push_back({node->name_hint(), 0});
Output output;
output.name = node->name_hint();
out_.push_back(output);
}

void VisitExpr_(const CallNode* call) final {
Expand All @@ -70,6 +72,12 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
for (size_t i = 0; i < in_shape.size(); ++i) {
macro_stream << ", " << in_shape[i];
}

const auto* type_node = call->checked_type().as<TensorTypeNode>();
CHECK(type_node);
const auto& dtype = GetDtypeString(type_node);
macro_stream << ", " << dtype;

macro_stream << ");";
func_decl_.push_back(macro_stream.str());

Expand All @@ -83,28 +91,31 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
decl_stream << ", ";
}
first = false;
decl_stream << out.first;
decl_stream << out.name;
}
}

auto type_node = call->checked_type().as<TensorTypeNode>();
CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
<< "Only support single output tensor with float type";
std::string out = "buf_" + std::to_string(buf_idx_++);
auto out_shape = GetShape(call->checked_type());
int out_size = 1;
for (size_t i = 0; i < out_shape.size(); ++i) {
out_size *= out_shape[i];
}
buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");";
buf_stream << dtype << "* " << out <<
" = (" << dtype << "*)std::malloc(4 * " << out_size << ");";
buf_decl_.push_back(buf_stream.str());

decl_stream << ", " << out << ");";
ext_func_body.push_back(decl_stream.str());

// Update output buffer
out_.clear();
out_.push_back({out, out_size});
Output output;
output.name = out;
output.dtype = dtype;
output.need_copy = true;
output.size = out_size;
out_.push_back(output);
}

/*!
Expand All @@ -128,15 +139,15 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
/*! \brief The index of allocated buffers. */
int buf_idx_ = 0;
/*! \brief The arguments of a C compiler compatible function. */
std::vector<std::string> ext_func_args_;
Array<Var> ext_func_args_;
/*! \brief The statements of a C compiler compatible function. */
std::vector<std::string> ext_func_body;
/*! \brief The declaration statements of a C compiler compatible function. */
std::vector<std::string> func_decl_;
/*! \brief The declaration statements of buffers. */
std::vector<std::string> buf_decl_;
/*! \brief The name and index pairs for output. */
std::vector<std::pair<std::string, int>> out_;
std::vector<Output> out_;
};

class CSourceCodegen : public CSourceModuleCodegenBase {
Expand All @@ -161,21 +172,21 @@ class CSourceCodegen : public CSourceModuleCodegenBase {

// Append some common macro for operator definition.
const char* operator_macro = R"op_macro(
#define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_) \
extern "C" void p_ID_(float* a, float* b, float* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
out[i] = a[i] p_OP_ b[i]; \
} \
#define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_, p_DTYPE) \
extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
out[i] = a[i] p_OP_ b[i]; \
} \
}
#define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_) \
extern "C" void p_ID_(float* a, float* b, float* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
for (int64_t j = 0; j < p_DIM2_; ++j) { \
int64_t k = i * p_DIM2_ + j; \
out[k] = a[k] p_OP_ b[k]; \
} \
} \
#define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_, p_DTYPE) \
extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
for (int64_t j = 0; j < p_DIM2_; ++j) { \
int64_t k = i * p_DIM2_ + j; \
out[k] = a[k] p_OP_ b[k]; \
} \
} \
}
)op_macro";

Expand Down
94 changes: 72 additions & 22 deletions src/relay/backend/contrib/codegen_c/codegen_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ namespace tvm {
namespace relay {
namespace contrib {

struct Output {
std::string name;
std::string dtype;
int size;
bool need_copy;
};

class CSourceModuleCodegenBase {
public:
CSourceModuleCodegenBase() = default;
Expand Down Expand Up @@ -98,7 +105,7 @@ class CodegenCBase {
* \brief Gerenate C code for the external function.
*
* \param func_name The name of the external function.
* \param arg_cnt The expected number of arguments.
* \param args arguments to the external function.
*
* \code
*
Expand All @@ -116,29 +123,32 @@ class CodegenCBase {
*
* \endcode
*/
void GenerateBackendCFunc(const std::string& func_name, int arg_cnt) {
void GenerateBackendCFunc(const std::string& func_name,
const Array<Var>& args,
const Output& out) {
// Print signature
code_stream_ << "\n";
code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";
for (int i = 0; i < arg_cnt - 1; i++) {
for (size_t i = 0; i < args.size(); i++) {
code_stream_ << "DLTensor* arg" << i << ",\n";
code_stream_ << "\t";
}
if (arg_cnt > 0) {
code_stream_ << "DLTensor* arg" << arg_cnt - 1 << ") {\n";
if (args.size() > 0) {
code_stream_ << "DLTensor* arg" << args.size() << ") {\n";
}

EnterScope();

// Generate the internal call.
PrintIndents();
code_stream_ << func_name << "_(";
for (int i = 0; i < arg_cnt - 1; i++) {
code_stream_ << "static_cast<float*>(arg" << i << "->data),\n";
for (size_t i = 0; i < args.size(); i++) {
const auto& dtype_str = GetDtypeString(args[i]);
code_stream_ << "static_cast<" << dtype_str << "*>(arg" << i << "->data),\n";
PrintIndents();
}
if (arg_cnt > 0) {
code_stream_ << "static_cast<float*>(arg" << arg_cnt - 1 << "->data)";
if (args.size() > 0) {
code_stream_ << "static_cast<" << out.dtype << "*>(arg" << args.size() << "->data)";
}
code_stream_ << ");\n";
PrintIndents();
Expand Down Expand Up @@ -207,17 +217,21 @@ class CodegenCBase {
*
* \return The emitted code string.
*/
std::string JitImpl(std::string ext_func_id, std::vector<std::string> args,
std::vector<std::string> buf_decl, std::vector<std::string> body,
std::vector<std::pair<std::string, int>> out) {
std::string JitImpl(std::string ext_func_id, const Array<Var>& args,
const std::vector<std::string>& buf_decl,
const std::vector<std::string>& body,
const std::vector<Output>& out) {
// Create the signature. For example, it could be:
// extern "C" void dnnl_0_(float* input0, float* input1, float* out, int M, int N) {}
code_stream_ << "extern \"C\" void " << ext_func_id << "_(";

CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support.";

for (const auto& arg : args) {
code_stream_ << "float* " << arg << ", ";
const auto& dtype_str = GetDtypeString(arg);
code_stream_ << dtype_str << "* " << arg->name_hint() << ", ";
}
code_stream_ << "float* out) {\n";
code_stream_ << out[0].dtype << "* out) {\n";
this->EnterScope();

// Function body
Expand All @@ -232,24 +246,60 @@ class CodegenCBase {
}

// Copy output
CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support.";
this->PrintIndents();
code_stream_ << "std::memcpy(out, " << out[0].first << ", 4 * " << out[0].second << ");\n";

// Free buffers
for (size_t i = 0; i < buf_decl.size(); i++) {
if (out[0].need_copy) {
this->PrintIndents();
code_stream_ << "std::free(buf_" << i << ");\n";
code_stream_ << "std::memcpy(out, " << out[0].name << ", 4 * " << out[0].size << ");\n";

// Free buffers
for (size_t i = 0; i < buf_decl.size(); i++) {
this->PrintIndents();
code_stream_ << "std::free(buf_" << i << ");\n";
}
}

this->ExitScope();
code_stream_ << "}\n";

// Create the wrapper to call the ext_func
this->GenerateBackendCFunc(ext_func_id, args.size() + 1 /* output */);
this->GenerateBackendCFunc(ext_func_id, args, out[0]);
return code_stream_.str();
}

/*!
* \brief Returns dtype string
*
* \param var Var to get the dtype of
*
* \return The dtype string.
*/
std::string GetDtypeString(const Var& var) {
auto ttype = var->checked_type().as<TensorTypeNode>();
CHECK(ttype) << "Expect TensorTypeNode";
return GetDtypeString(ttype);
}

/*!
* \brief Returns dtype string
*
* \param ttype TensorTypeNode* to get the dtype of
*
* \return The dtype string.
*/
std::string GetDtypeString(const TensorTypeNode* ttype) {
std::string dtype;
if (runtime::TypeMatch(ttype->dtype, kDLFloat, 32)) {
dtype = "float";
} else if (runtime::TypeMatch(ttype->dtype, kDLInt, 32)) {
dtype = "int";
} else if (runtime::TypeMatch(ttype->dtype, kDLInt, 64)) {
dtype = "int64_t";
} else {
LOG(FATAL) << "Unsupported dtype " << ttype->dtype;
}

return dtype;
}

/*! \brief The external function source code stream. */
std::ostringstream code_stream_;

Expand Down
23 changes: 15 additions & 8 deletions src/relay/backend/contrib/dnnl/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
explicit CodegenDNNL(const std::string& id) { this->ext_func_id_ = id; }

void VisitExpr_(const VarNode* node) final {
ext_func_args_.push_back(node->name_hint());
ext_func_args_.push_back(GetRef<Var>(node));
out_.clear();
out_.push_back({node->name_hint(), 0});
Output output;
output.name = node->name_hint();
out_.push_back(output);
}

void VisitExpr_(const TupleGetItemNode* op) final {
Expand Down Expand Up @@ -90,14 +92,14 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
decl_stream << ", ";
}
first = false;
decl_stream << out.first;
decl_stream << out.name;
}
}

// Analyze the output buffer
auto type_node = call->checked_type().as<TensorTypeNode>();
CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
<< "Only support single output tensor with float type";
CHECK(type_node);
const auto& dtype = GetDtypeString(type_node);
std::string out = "buf_" + std::to_string(buf_idx_++);
auto out_shape = GetShape(call->checked_type());
int out_size = 1;
Expand All @@ -118,7 +120,12 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {

// Update output buffer
out_.clear();
out_.push_back({out, out_size});
Output output;
output.name = out;
output.dtype = dtype;
output.need_copy = true;
output.size = out_size;
out_.push_back(output);
}

std::string JIT(void) {
Expand Down Expand Up @@ -213,13 +220,13 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
*/
int buf_idx_{0};
/*! \brief The arguments used by a wrapped function that calls DNNL kernels. */
std::vector<std::string> ext_func_args_;
Array<Var> ext_func_args_;
/*! \brief statement of the function that will be compiled using DNNL kernels. */
std::vector<std::string> ext_func_body;
/*! \brief The declaration of intermeidate buffers. */
std::vector<std::string> buf_decl_;
/*! \brief The name of the the outputs. */
std::vector<std::pair<std::string, int>> out_;
std::vector<Output> out_;
};

/*!
Expand Down
18 changes: 18 additions & 0 deletions tests/python/relay/test_external_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,23 @@ def test_extern_gcc_single_op():
check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data)


def test_extern_gcc_single_op_int():
x = relay.var('x', shape=(8, 8), dtype="int32")
y = relay.var('y', shape=(8, 8), dtype="int32")

x0 = relay.var('x0', shape=(8, 8), dtype="int32")
y0 = relay.var('y0', shape=(8, 8), dtype="int32")
z = x0 + y0
f = relay.Function([x0, y0], z)
f = set_external_func_attr(f, "ccompiler", "ccompiler_0")
call = relay.Call(f, [x, y])
mod = tvm.IRModule.from_expr(call)
x_data = np.random.rand(8, 8).astype('int32')
y_data = np.random.rand(8, 8).astype('int32')

check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data)


def test_extern_gcc():
x = relay.var('x', shape=(2, 2))
y = relay.var('y', shape=(2, 2))
Expand Down Expand Up @@ -242,5 +259,6 @@ def test_extern_dnnl():
if __name__ == "__main__":
test_multi_node_subgraph()
test_extern_gcc_single_op()
test_extern_gcc_single_op_int()
test_extern_gcc()
test_extern_dnnl()

0 comments on commit d420abf

Please sign in to comment.