diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile new file mode 100644 index 0000000000000..0bf1613c8d66e --- /dev/null +++ b/apps/bundle_deploy/Makefile @@ -0,0 +1,39 @@ +# Makefile Example to bundle TVM modules. +TVM_ROOT=$(shell cd ../..; pwd) +NNVM_PATH=nnvm +DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core +PKG_CFLAGS = -std=c++14 -Oz -fPIC\ + -I${TVM_ROOT}/include\ + -I${DMLC_CORE}/include\ + -I${TVM_ROOT}/3rdparty/dlpack/include\ + +PKG_LDFLAGS = -L${TVM_ROOT}/build + +build_dir := build + +test: $(build_dir)/demo $(build_dir)/bundle.so + $(build_dir)/demo $(build_dir)/bundle.so + +$(build_dir)/demo: demo.cc + @mkdir -p $(@D) + $(CXX) $(PKG_CFLAGS) -o $@ $^ + +# Serialize our graph.json file. +$(build_dir)/graph.json.cc: $(build_dir)/graph.json + xxd -i $^ > $@ + +# Serialize our params.bin file. +$(build_dir)/params.bin.cc: $(build_dir)/params.bin + xxd -i $^ > $@ + +$(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin: build_model.py + python $< -o $(build_dir) + +# Build our bundle against the serialized bundle.cc API, the runtime.cc API, and +# the serialized graph.json and params.bin +$(build_dir)/bundle.so: bundle.cc runtime.cc $(build_dir)/model.o $(build_dir)/graph.json.cc $(build_dir)/params.bin.cc + @mkdir -p $(@D) + $(CXX) $(PKG_CFLAGS) -fvisibility=hidden -o $@ $^ $(PKG_LDFLAGS) -shared + +clean: + rm -r $(build_dir) diff --git a/apps/bundle_deploy/README.md b/apps/bundle_deploy/README.md new file mode 100644 index 0000000000000..94bf2c208104e --- /dev/null +++ b/apps/bundle_deploy/README.md @@ -0,0 +1,31 @@ +How to Bundle TVM Modules +========================= + +This folder contains an example on how to bundle a TVM module (with the required +interpreter runtime modules such as `runtime::GraphRuntime`, the graph JSON, and +the params) into a single, self-contained shared object (`bundle.so`) which +exposes a C API wrapping the appropriate `runtime::GraphRuntime` instance. + +This is useful for cases where we'd like to avoid deploying the TVM runtime +components to the target host in advance - instead, we simply deploy the bundled +shared-object to the host, which embeds both the model and the runtime +components. The bundle should only depend on libc/libc++. + +It also contains an example code to load this shared object and invoke the +packaged TVM model instance. + +Type the following command to run the sample code under the current folder, +after building TVM first. + +```bash +make demo +``` + +This will: + +- Download the mobilenet0.25 model from the MXNet Gluon Model Zoo +- Compile the model with NNVM +- Build a `bundle.so` shared object containing the model specification and parameters +- Build a `demo` executable that `dlopen`'s `bundle.so`, instantiates the + contained graph runtime, and invokes the `GraphRuntime::Run` function on a + random input, then prints the output tensor to `stderr`. diff --git a/apps/bundle_deploy/build_model.py b/apps/bundle_deploy/build_model.py new file mode 100644 index 0000000000000..901996b8774e9 --- /dev/null +++ b/apps/bundle_deploy/build_model.py @@ -0,0 +1,40 @@ +"""Creates a simple TVM modules.""" + +import argparse +import os +import nnvm.compiler +import nnvm.testing +import tvm +import logging + + +def main(): + logging.basicConfig(level=logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument('-o', '--out-dir', default='.') + opts = parser.parse_args() + + dshape = (1, 3, 224, 224) + from mxnet.gluon.model_zoo.vision import get_model + block = get_model('mobilenet0.25', pretrained=True) + net, params = nnvm.frontend.from_mxnet(block) + net = nnvm.sym.softmax(net) + + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, 'llvm --system-lib', shape={'data': dshape}, params=params) + print(graph.symbol().debug_str()) + build_dir = os.path.abspath(opts.out_dir) + if not os.path.isdir(build_dir): + os.makedirs(build_dir) + + lib.save(os.path.join(build_dir, 'model.o')) + with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json: + f_graph_json.write(graph.json()) + with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params: + f_params.write(nnvm.compiler.save_param_dict(params)) + + +if __name__ == '__main__': + main() diff --git a/apps/bundle_deploy/bundle.cc b/apps/bundle_deploy/bundle.cc new file mode 100644 index 0000000000000..af1ef7225bcbb --- /dev/null +++ b/apps/bundle_deploy/bundle.cc @@ -0,0 +1,47 @@ +#include +#include +#include + +extern unsigned char build_graph_json[]; +extern unsigned int build_graph_json_len; +extern unsigned char build_params_bin[]; +extern unsigned int build_params_bin_len; + +#define TVM_BUNDLE_FUNCTION __attribute__((visibility("default"))) extern "C" + +TVM_BUNDLE_FUNCTION void *tvm_runtime_create() { + const std::string json_data(&build_graph_json[0], + &build_graph_json[0] + build_graph_json_len); + tvm::runtime::Module mod_syslib = + (*tvm::runtime::Registry::Get("module._GetSystemLib"))(); + int device_type = kDLCPU; + int device_id = 0; + tvm::runtime::Module mod = + (*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))( + json_data, mod_syslib, device_type, device_id); + TVMByteArray params; + params.data = reinterpret_cast(&build_params_bin[0]); + params.size = build_params_bin_len; + mod.GetFunction("load_params")(params); + return new tvm::runtime::Module(mod); +} + +TVM_BUNDLE_FUNCTION void tvm_runtime_destroy(void *handle) { + delete reinterpret_cast(handle); +} + +TVM_BUNDLE_FUNCTION void tvm_runtime_set_input(void *handle, const char *name, + void *tensor) { + reinterpret_cast(handle)->GetFunction("set_input")( + name, reinterpret_cast(tensor)); +} + +TVM_BUNDLE_FUNCTION void tvm_runtime_run(void *handle) { + reinterpret_cast(handle)->GetFunction("run")(); +} + +TVM_BUNDLE_FUNCTION void tvm_runtime_get_output(void *handle, int index, + void *tensor) { + reinterpret_cast(handle)->GetFunction("get_output")( + index, reinterpret_cast(tensor)); +} diff --git a/apps/bundle_deploy/demo.cc b/apps/bundle_deploy/demo.cc new file mode 100644 index 0000000000000..f6c50c9444115 --- /dev/null +++ b/apps/bundle_deploy/demo.cc @@ -0,0 +1,64 @@ +#include "tvm/runtime/c_runtime_api.h" +#include +#include //dlopen +#include +#include +#include +#include + +template auto getFunc(void *bundle, const char *name) { + dlerror(); + auto *f = + reinterpret_cast::type>(dlsym(bundle, name)); + assert(!dlerror()); + return f; +} + +int main(int argc, char **argv) { + auto *bundle = dlopen(argv[1], RTLD_LAZY | RTLD_LOCAL); + assert(bundle); + + auto *handle = getFunc(bundle, "tvm_runtime_create")(); + + std::vector input_storage(1 * 3 * 224 * 224); + std::mt19937 gen(0); + for (auto &e : input_storage) { + e = std::uniform_real_distribution(0.0, 1.0)(gen); + } + + std::vector input_shape = {1, 3, 224, 224}; + DLTensor input; + input.data = input_storage.data(); + input.ctx = DLContext{kDLCPU, 0}; + input.ndim = 4; + input.dtype = DLDataType{kDLFloat, 32, 1}; + input.shape = input_shape.data(); + input.strides = nullptr; + input.byte_offset = 0; + getFunc(bundle, "tvm_runtime_set_input")( + handle, "data", &input); + + auto *ftvm_runtime_run = + (auto (*)(void *)->void)dlsym(bundle, "tvm_runtime_run"); + assert(!dlerror()); + ftvm_runtime_run(handle); + + std::vector output_storage(1000); + std::vector output_shape = {1, 1000}; + DLTensor output; + output.data = output_storage.data(); + output.ctx = DLContext{kDLCPU, 0}; + output.ndim = 2; + output.dtype = DLDataType{kDLFloat, 32, 1}; + output.shape = output_shape.data(); + output.strides = nullptr; + output.byte_offset = 0; + + getFunc(bundle, "tvm_runtime_get_output")( + handle, 0, &output); + for (auto i = 0; i < output_storage.size(); ++i) { + std::cerr << "output[" << i << "]: " << output_storage[i] << std::endl; + } + getFunc(bundle, "tvm_runtime_destroy")(handle); + return 0; +} diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc new file mode 100644 index 0000000000000..2284953b8c16c --- /dev/null +++ b/apps/bundle_deploy/runtime.cc @@ -0,0 +1,17 @@ +#include +#include +#include +#include + +#include "../../src/runtime/c_runtime_api.cc" +#include "../../src/runtime/cpu_device_api.cc" +#include "../../src/runtime/workspace_pool.cc" +#include "../../src/runtime/module_util.cc" +#include "../../src/runtime/module.cc" +#include "../../src/runtime/registry.cc" +#include "../../src/runtime/file_util.cc" +#include "../../src/runtime/threading_backend.cc" +#include "../../src/runtime/thread_pool.cc" +#include "../../src/runtime/ndarray.cc" +#include "../../src/runtime/system_lib_module.cc" +#include "../../src/runtime/graph/graph_runtime.cc"