-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
187 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
""" | ||
Get Started with NNVM | ||
===================== | ||
**Author**: `Tianqi Chen <https://tqchen.github.io/>`_ | ||
This article is an introductory tutorial to workflow in NNVM. | ||
""" | ||
import nnvm.compiler | ||
import nnvm.symbol as sym | ||
|
||
###################################################################### | ||
# Declare Computation | ||
# ------------------- | ||
# We start by describing our need using computational graph. | ||
# Most deep learning frameworks use computation graph to describe | ||
# their computation. In this example, we directly use | ||
# NNVM's API to construct the computational graph. | ||
# | ||
# .. note:: | ||
# | ||
# In a typical deep learning compilation workflow, | ||
# we can get the models from :any:`nnvm.frontend` | ||
# | ||
# The following code snippet describes :math:`z = x + \sqrt{y}` | ||
# and creates a nnvm graph from the description. | ||
# We can print out the graph ir to check the graph content. | ||
|
||
x = sym.Variable("x") | ||
y = sym.Variable("y") | ||
z = sym.elemwise_add(x, sym.sqrt(y)) | ||
compute_graph = nnvm.graph.create(z) | ||
print("-------compute graph-------") | ||
print(compute_graph.ir()) | ||
|
||
###################################################################### | ||
# Compile | ||
# ------- | ||
# We can call :any:`nnvm.compiler.build` to compile the graph. | ||
# The build function takes a shape parameter which specifies the | ||
# input shape requirement. Here we only need to pass in shape of ``x`` | ||
# and the other one will be inferred automatically by NNVM. | ||
# | ||
# The function returns three values. ``deploy_graph`` contains | ||
# the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module` | ||
# that contains compiled CUDA functions. We do not need the ``params`` | ||
# in this case. | ||
shape = (4,) | ||
deploy_graph, lib, params = nnvm.compiler.build( | ||
compute_graph, target="cuda", shape={"x": shape}, dtype="float32") | ||
|
||
###################################################################### | ||
# We can print out the IR of ``deploy_graph`` to understand what just | ||
# happened under the hood. We can find that ``deploy_graph`` only | ||
# contains a single operator ``tvm_op``. This is because NNVM | ||
# automatically fused the operator together into one operator. | ||
# | ||
print("-------deploy graph-------") | ||
print(deploy_graph.ir()) | ||
|
||
###################################################################### | ||
# Let us also peek into content of ``lib``. | ||
# Typically a compiled TVM CUDA module contains a host module(lib) | ||
# and a device module(``lib.imported_modules[0]``) that contains the CUDA code. | ||
# We print out the the generated device code here. | ||
# This is exactly a fused CUDA version of kernel that the graph points to. | ||
# | ||
print("-------deploy library-------") | ||
print(lib.imported_modules[0].get_source()) | ||
|
||
###################################################################### | ||
# Deploy and Run | ||
# -------------- | ||
# Now that we have have compiled module, let us run it. | ||
# We can use :any:`graph_runtime <tvm.contrib.graph_runtime.create>` | ||
# in tvm to create a deployable :any:`GraphModule <tvm.contrib.graph_runtime.GraphModule>`. | ||
# We can use the :any:`set_input <tvm.contrib.graph_runtime.GraphModule.set_input>`, | ||
# :any:`run <tvm.contrib.graph_runtime.GraphModule.run>` and | ||
# :any:`get_output <tvm.contrib.graph_runtime.GraphModule.get_output>` function | ||
# to set the input, execute the graph and get the output we need. | ||
# | ||
import tvm | ||
import numpy as np | ||
from tvm.contrib import graph_runtime, util | ||
|
||
module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0)) | ||
x_np = np.array([1, 2, 3, 4]).astype("float32") | ||
y_np = np.array([4, 4, 4, 4]).astype("float32") | ||
# set input to the graph module | ||
module.set_input(x=x_np, y=y_np) | ||
# run forward computation | ||
module.run() | ||
# get the first output | ||
out = module.get_output(0, out=tvm.nd.empty(shape)) | ||
print(out.asnumpy()) | ||
|
||
###################################################################### | ||
# Provide Model Parameters | ||
# ------------------------ | ||
# Most deep learning models contains two types of inputs: parameters | ||
# that remains fixed during inference and data input that need to | ||
# change for each inference task. It is helpful to provide these | ||
# information to NNVM. Let us assume that ``y`` is the parameter | ||
# in our example. We can provide the model parameter information | ||
# by the params argument to :any:`nnvm.compiler.build`. | ||
# | ||
deploy_graph, lib, params = nnvm.compiler.build( | ||
compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np}) | ||
|
||
###################################################################### | ||
# This time we will need params value returned by :any:`nnvm.compiler.build`. | ||
# NNVM applys optimization to pre-compute the intermediate values in | ||
# the graph that can be determined by parameters. In this case | ||
# :math:`\sqrt{y}` can be pre-computed. The pre-computed values | ||
# are returned as new params. We can print out the new compiled library | ||
# to confirm that the fused kernel only now contains add. | ||
# | ||
print("-----optimized params-----") | ||
print(params) | ||
print("-------deploy library-------") | ||
print(lib.imported_modules[0].get_source()) | ||
|
||
###################################################################### | ||
# Save the Deployed Module | ||
# ------------------------ | ||
# We can save the ``deploy_graph``, ``lib`` and ``params`` separately | ||
# and load them back later. We can use :any:`tvm.module.Module` to export | ||
# the compiled library. ``deploy_graph`` is saved in json format and ``params`` | ||
# is serialized into a bytearray. | ||
# | ||
temp = util.tempdir() | ||
path_lib = temp.relpath("deploy.so") | ||
lib.export_library(path_lib) | ||
with open(temp.relpath("deploy.json"), "w") as fo: | ||
fo.write(deploy_graph.json()) | ||
with open(temp.relpath("deploy.params"), "wb") as fo: | ||
fo.write(nnvm.compiler.save_param_dict(params)) | ||
print(temp.listdir()) | ||
|
||
###################################################################### | ||
# We can load the module back. | ||
loaded_lib = tvm.module.load(path_lib) | ||
loaded_json = open(temp.relpath("deploy.json")).read() | ||
loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read()) | ||
module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0)) | ||
params = nnvm.compiler.load_param_dict(loaded_params) | ||
# directly load from byte array | ||
module.load_params(loaded_params) | ||
module.run(x=x_np) | ||
# get the first output | ||
out = module.get_output(0, out=tvm.nd.empty(shape)) | ||
print(out.asnumpy()) | ||
|
||
###################################################################### | ||
# Deploy using Another Language | ||
# ----------------------------- | ||
# We use python in this example for demonstration. | ||
# We can also deploy the compiled modules with other languages | ||
# supported by TVM such as c++, java, javascript. | ||
# The graph module itself is fully embedded in TVM runtime. | ||
# | ||
# The following block demonstrates how we can directly use TVM's | ||
# runtime API to execute the compiled module. | ||
# You can find similar runtime API in TVMRuntime of other languages. | ||
# | ||
fcreate = tvm.get_global_func("tvm.graph_runtime.create") | ||
ctx = tvm.gpu(0) | ||
gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id) | ||
set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"] | ||
set_input("x", tvm.nd.array(x_np)) | ||
gmodule["load_params"](loaded_params) | ||
run() | ||
out = tvm.nd.empty(shape) | ||
get_output(0, out) | ||
print(out.asnumpy()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters