Skip to content

Commit

Permalink
Enable set_input_zero_copy in GraphRuntime
Browse files Browse the repository at this point in the history
  • Loading branch information
Yinghai Lu committed Jun 25, 2019
1 parent cbec5b9 commit cc6d164
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 34 deletions.
2 changes: 2 additions & 0 deletions include/tvm/runtime/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
namespace tvm {
namespace runtime {

size_t GetDataAlignment(const DLTensor& arr);

/*!
* \brief Managed NDArray.
* The array is backed by reference counted blocks.
Expand Down
144 changes: 118 additions & 26 deletions src/runtime/graph/graph_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand All @@ -23,6 +23,7 @@
*/
#include "graph_runtime.h"

#include <tvm/runtime/device_api.h>
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
Expand All @@ -39,6 +40,12 @@
namespace tvm {
namespace runtime {

namespace {
void DefaultDeleter(DLManagedTensor* ptr) {
delete ptr;
}
} // namespace

/*!
* \brief Run all the operations one by one.
*/
Expand Down Expand Up @@ -96,6 +103,58 @@ void GraphRuntime::SetInput(int index, DLTensor* data_in) {
uint32_t eid = this->entry_id(input_nodes_[index], 0);
data_entry_[eid].CopyFrom(data_in);
}
/*!
* \brief set index-th input to the graph without copying the data.
* \param index The input index.
* \param data_ref The input data that is referred.
*/
void GraphRuntime::SetInputZeroCopy(int index, DLTensor* data_ref) {
CHECK_LT(static_cast<size_t>(index), input_nodes_.size());
uint32_t eid = this->entry_id(input_nodes_[index], 0);
auto& shared = dltensor_entry_[eid];
auto& shape = dltensor_entry_shapes_[eid];

// check the consistency of input shape
CHECK_EQ(GetDataAlignment(*shared), GetDataAlignment(*data_ref));
CHECK(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment == 0);
if (shape.size() == static_cast<size_t>(data_ref->ndim)) {
for (auto i = 0; i < data_ref->ndim; ++i) {
CHECK_EQ(shape[i], data_ref->shape[i]);
}
} else {
int64_t acc_prev =
std::accumulate(shape.data(), shape.data() + shape.size(), 1, std::multiplies<int64_t>());
int64_t acc = std::accumulate(data_ref->shape, data_ref->shape + data_ref->ndim, 1,
std::multiplies<int64_t>());
CHECK_EQ(acc_prev, acc);
}

// Update the data_entry_
DLManagedTensor* dl_managed = new DLManagedTensor();
dl_managed->dl_tensor = *data_ref;
dl_managed->manager_ctx = nullptr;
dl_managed->deleter = DefaultDeleter;
data_entry_[eid] = NDArray::FromDLPack(dl_managed);
shared = std::make_shared<DLTensor>(*data_ref);
for (auto& op_arg : op_args_) {
if (op_arg) {
const auto it = op_arg->input_entry_ids.find(eid);
if (it != op_arg->input_entry_ids.end()) {
for (const auto i : it->second) {
op_arg->args[i] = shared;
TVMValue v;
DLTensor* t = op_arg->args[i].get();
v.v_handle = t;
op_arg->arg_values[i] = v;
if (op_arg->flatten_data) {
t->ndim = 1;
t->shape = &(op_arg->shape_data[i]);
}
}
}
}
}
}
/*!
* \brief Get the number of outputs
*
Expand Down Expand Up @@ -268,53 +327,70 @@ void GraphRuntime::SetupStorage() {
// memory assignment for each node entry. The allocated memory on each device
// is mapped to this pool.
data_entry_.resize(num_node_entries());
dltensor_entry_.resize(num_node_entries());
dltensor_entry_shapes_.resize(num_node_entries());
for (size_t i = 0; i < data_entry_.size(); ++i) {
int storage_id = attrs_.storage_id[i];
CHECK_LT(static_cast<size_t>(storage_id), storage_pool_.size());
data_entry_[i] =
storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]);
dltensor_entry_[i] = std::make_shared<DLTensor>(*(data_entry_[i].operator->()));
dltensor_entry_shapes_[i].resize(dltensor_entry_[i]->ndim);
for (size_t j = 0; j < dltensor_entry_shapes_[i].size(); ++j) {
dltensor_entry_shapes_[i][j] = dltensor_entry_[i]->shape[j];
}
std::make_shared<DLTensor>(*(data_entry_[i].operator->()));
}
}

void GraphRuntime::SetupOpExecs() {
op_execs_.resize(this->GetNumOfNodes());
op_args_.resize(this->GetNumOfNodes());
// setup the array and requirements.
for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) {
const auto& inode = nodes_[nid];
if (inode.op_type == "null") continue;
std::vector<DLTensor> args;
std::vector<std::shared_ptr<DLTensor> > args;
std::vector<uint32_t> input_entry_ids;
for (const auto& e : inode.inputs) {
args.push_back(*(data_entry_[this->entry_id(e)].operator->()));
uint32_t eid = this->entry_id(e);
args.push_back(dltensor_entry_[eid]);
input_entry_ids.push_back(eid);
}
for (uint32_t index = 0; index < inode.param.num_outputs; ++index) {
uint32_t eid = this->entry_id(nid, index);
args.push_back(*(data_entry_[eid].operator->()));
args.push_back(dltensor_entry_[eid]);
}
CHECK(inode.op_type == "tvm_op") << "Can only take tvm_op as op";

op_execs_[nid] = CreateTVMOp(inode.param, args, inode.inputs.size());
std::tie(op_execs_[nid], op_args_[nid]) = CreateTVMOp(inode.param, args, inode.inputs.size());
auto& entry_to_input_pos = op_args_[nid]->input_entry_ids;
for (uint32_t i = 0; i < input_entry_ids.size(); ++i) {
const auto eid = input_entry_ids[i];
auto it = entry_to_input_pos.find(eid);
if (it == entry_to_input_pos.end()) {
entry_to_input_pos.emplace(eid, std::vector<uint32_t>{i});
} else {
it->second.push_back(i);
}
}
}
}

std::function<void()> GraphRuntime::CreateTVMOp(
std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> > GraphRuntime::CreateTVMOp(
const TVMOpParam& param,
const std::vector<DLTensor>& args,
const std::vector<std::shared_ptr<DLTensor> >& args,
size_t num_inputs) {
struct OpArgs {
std::vector<DLTensor> args;
std::vector<TVMValue> arg_values;
std::vector<int> arg_tcodes;
std::vector<int64_t> shape_data;
};
std::shared_ptr<OpArgs> arg_ptr = std::make_shared<OpArgs>();
std::shared_ptr<GraphRuntime::OpArgs> arg_ptr = std::make_shared<GraphRuntime::OpArgs>();
// setup address.
arg_ptr->args = std::move(args);
arg_ptr->args = args;
if (param.flatten_data) {
arg_ptr->flatten_data = true;
arg_ptr->shape_data.resize(arg_ptr->args.size());
}
for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
TVMValue v;
DLTensor* t = &(arg_ptr->args[i]);
DLTensor* t = arg_ptr->args[i].get();
v.v_handle = t;
arg_ptr->arg_values.push_back(v);
arg_ptr->arg_tcodes.push_back(kArrayHandle);
Expand All @@ -327,7 +403,7 @@ std::function<void()> GraphRuntime::CreateTVMOp(
}

if (param.func_name == "__nop") {
return [](){};
return {[](){}, arg_ptr};
} else if (param.func_name == "__copy") {
// Perform cross device data copy.
// Directly copy data from the input to the output.
Expand All @@ -336,7 +412,7 @@ std::function<void()> GraphRuntime::CreateTVMOp(
DLTensor* to = static_cast<DLTensor*>(arg_ptr->arg_values[1].v_handle);
TVM_CCALL(TVMArrayCopyFromTo(from, to, nullptr));
};
return fexec;
return {fexec, arg_ptr};
}

// Get compiled function from the module that contains both host and device
Expand All @@ -351,7 +427,7 @@ std::function<void()> GraphRuntime::CreateTVMOp(
static_cast<int>(arg_ptr->arg_values.size()));
pf.CallPacked(targs, &rv);
};
return fexec;
return {fexec, arg_ptr};
}

PackedFunc GraphRuntime::GetFunction(
Expand All @@ -367,14 +443,23 @@ PackedFunc GraphRuntime::GetFunction(
this->SetInput(args[0], args[1]);
}
});
} else if (name == "set_input_zero_copy") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (args[0].type_code() == kStr) {
int in_idx = this->GetInputIndex(args[0]);
if (in_idx >= 0) this->SetInputZeroCopy(in_idx, args[1]);
} else {
this->SetInputZeroCopy(args[0], args[1]);
}
});
} else if (name == "get_output") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (args.num_args == 2) {
this->CopyOutputTo(args[0], args[1]);
} else {
*rv = this->GetOutput(args[0]);
}
});
if (args.num_args == 2) {
this->CopyOutputTo(args[0], args[1]);
} else {
*rv = this->GetOutput(args[0]);
}
});
} else if (name == "get_input") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
int in_idx = 0;
Expand All @@ -390,6 +475,13 @@ PackedFunc GraphRuntime::GetFunction(
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
*rv = this->NumOutputs();
});
} else if (name == "get_output_name") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
int index = args[0];
CHECK_LT(static_cast<size_t>(index), outputs_.size());
uint32_t eid = this->entry_id(outputs_[index]);
*rv = this->GetNodeName(eid);
});
} else if (name == "run") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
this->Run();
Expand Down
32 changes: 27 additions & 5 deletions src/runtime/graph/graph_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand All @@ -34,6 +34,7 @@
#include <tvm/runtime/packed_func.h>

#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include <string>
Expand Down Expand Up @@ -67,6 +68,15 @@ struct TVMOpParam {
* TVM runtime PackedFunc API.
*/
class GraphRuntime : public ModuleNode {
struct OpArgs {
std::vector<std::shared_ptr<DLTensor> > args;
std::vector<TVMValue> arg_values;
std::vector<int> arg_tcodes;
std::vector<int64_t> shape_data;
std::unordered_map<uint32_t, std::vector<uint32_t> > input_entry_ids;
bool flatten_data{false};
};

public:
/*!
* \brief Get member function to front-end
Expand Down Expand Up @@ -111,6 +121,12 @@ class GraphRuntime : public ModuleNode {
* \param data_in The input data.
*/
void SetInput(int index, DLTensor* data_in);
/*!
* \brief set index-th input to the graph without copying the data
* \param index The input index.
* \param data_ref The input data that is referred.
*/
void SetInputZeroCopy(int index, DLTensor* data_ref);
/*!
* \brief Get the number of outputs
*
Expand Down Expand Up @@ -365,9 +381,9 @@ class GraphRuntime : public ModuleNode {
* \param num_inputs Number of inputs.
* \return The created executor.
*/
std::function<void()> CreateTVMOp(const TVMOpParam& attrs,
const std::vector<DLTensor>& args,
size_t num_inputs);
std::pair<std::function<void()>, std::shared_ptr<OpArgs> > CreateTVMOp(
const TVMOpParam& attrs, const std::vector<std::shared_ptr<DLTensor> >& args,
size_t num_inputs);
// Get node entry index.
uint32_t entry_id(uint32_t nid, uint32_t index) const {
return node_row_ptr_[nid] + index;
Expand Down Expand Up @@ -398,8 +414,14 @@ class GraphRuntime : public ModuleNode {
std::vector<NDArray> storage_pool_;
/*! \brief Data entry of each node. */
std::vector<NDArray> data_entry_;
/*! \brief DLTensor entry of each node. */
std::vector<std::shared_ptr<DLTensor> > dltensor_entry_;
/*! \brief Shape of each DLTensor of each node. */
std::vector<std::vector<int64_t> > dltensor_entry_shapes_;
/*! \brief Operator on each node. */
std::vector<std::function<void()> > op_execs_;
/*! \brief Arg info of TVM ops */
std::vector<std::shared_ptr<OpArgs> > op_args_;
};

std::vector<TVMContext> GetAllContext(const TVMArgs& args);
Expand Down
6 changes: 3 additions & 3 deletions src/runtime/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand Down Expand Up @@ -46,7 +46,7 @@ inline void VerifyDataType(DLDataType dtype) {
CHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
}

inline size_t GetDataAlignment(const DLTensor& arr) {
size_t GetDataAlignment(const DLTensor& arr) {
size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
Expand Down

0 comments on commit cc6d164

Please sign in to comment.