Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TensorRT integration and add TRT tests to CI #41

Merged
merged 1 commit into from
Dec 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions cmake/modules/contrib/TensorRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,20 @@

# TensorRT Module

if(IS_DIRECTORY ${USE_TENSORRT})
set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT_DIR}/include)
set(TENSORRT_LIB_DIR ${TENSORRT_ROOT_DIR}/lib)
if(USE_TENSORRT)
if(IS_DIRECTORY ${USE_TENSORRT})
set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
endif()
find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib)
find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR)
if(NOT TENSORRT_FOUND)
message(ERROR "Could not find TensorRT.")
endif()
file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc)
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS})
find_library(TENSORRT_NVINFER_LIBRARY nvinfer ${TENSORRT_LIB_DIR})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_NVINFER_LIBRARY})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})
set_source_files_properties(${RUNTIME_GRAPH_SRCS}
PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
endif()
2 changes: 1 addition & 1 deletion nnvm/python/nnvm/compiler/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def build(graph, target=None, shape=None, dtype="float32",
graph = _annotate_graph(graph, device_target,
AnnotationType.DEVICE_TARGET)
# Apply optimization
graph = optimize(graph, shape, dtype, layout)
graph = optimize(graph, shape, dtype, layout, target)

# Clear extra params without nodes.
_remove_noref_params(params, graph)
Expand Down
14 changes: 9 additions & 5 deletions nnvm/src/compiler/graph_compile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,15 @@ nnvm::Graph GraphCompile(const nnvm::Graph& g) {
ret.attrs["device_index"] = std::make_shared<any>(std::move(device_vec));
}
// Setup module.
static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
tvm::runtime::Module module =
fbuild(tvm::Map<std::string, Array<tvm::LoweredFunc>>(
tar_func_map.begin(), tar_func_map.end()),
"", target_host);
tvm::runtime::Module module;
// When using external accelerators such as TensorRT, there might not be any
// functions to compile in the graph. In that case, an empty module is used.
if (!tar_func_map.empty()) {
static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
module = fbuild(tvm::Map<std::string, Array<tvm::LoweredFunc>>(
tar_func_map.begin(), tar_func_map.end()),
"", target_host);
}

ret.attrs["module"] = std::make_shared<any>(std::move(module));
ret = nnvm::ApplyPass(ret, "PlanMemory");
Expand Down
15 changes: 13 additions & 2 deletions tests/python/tensorrt/test_avg_pool2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@
import nnvm
import tvm
from tvm.contrib import graph_runtime
import json


def test_avg_pool2d():

# Generate the data
np.random.seed(0)
input_shape = [1, 1, 28, 28]
output_shape = [1, 10]
output_shape = [1, 1, 28, 28]
data = np.random.random(input_shape).astype('float32')

# Baseline model in MXNet
net = gluon.nn.HybridSequential()
with net.name_scope():
net.add(gluon.nn.AvgPool2D(pool_size=3, strides=1, padding=1))
net.add(gluon.nn.Dense(10))
net.collect_params().initialize(mx.init.Xavier(), ctx=mx.cpu())
net.hybridize()
baseline_input = mx.nd.array(data, ctx=mx.cpu())
Expand All @@ -48,6 +48,17 @@ def test_avg_pool2d():
graph, lib, params = nnvm.compiler.build(sym, target,
shape={'data': input_shape},
params=params)

# Verify that TRT subgraphs are partitioned
def check_trt_used(graph):
graph = json.loads(graph.json())
num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
assert num_trt_subgraphs == 1
check_trt_used(graph)

# Execute
if not tvm.module.enabled("gpu"):
return
compiled_model = graph_runtime.create(graph, lib, tvm.gpu())
compiled_input = tvm.nd.array(data, ctx=tvm.gpu())
compiled_model.set_input('data', compiled_input)
Expand Down
9 changes: 9 additions & 0 deletions tests/python/tensorrt/test_cross_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import tvm
from tvm.contrib import graph_runtime
from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
import json

batch_size = 1

Expand Down Expand Up @@ -96,6 +97,14 @@ def get_data_shape(model_name):
with nnvm.compiler.build_config(opt_level=opt_level, ext_accel=ext_accel):
graph, lib, params = nnvm.compiler.build(
net, target, shape={"data": data_shape}, params=params, target_host=target_host)

# Verify that TRT subgraphs are partitioned
def check_trt_used(graph):
graph = json.loads(graph.json())
num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
assert num_trt_subgraphs >= 1
check_trt_used(graph)

print("===========Compiling model %s took %.3fs" % (network, time.time() - start))

print("===========Saving lowered graph for model %s" % network)
Expand Down
23 changes: 15 additions & 8 deletions tests/python/tensorrt/test_tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
logging.basicConfig(level=logging.INFO)
import numpy as np
import json

import nnvm.compiler
import nnvm.testing
Expand All @@ -30,15 +31,11 @@


def test_tensorrt_image_classification_models():
def compile_model(graph, params, data_shapes, subgraph_backend=None, op_names=None, **kwargs):
def compile_model(graph, params, data_shapes, **kwargs):
_, output_shapes = nnvm.compiler.graph_util.infer_shape(graph, **data_shapes)
assert len(output_shapes) == 1
flags = kwargs
if subgraph_backend is not None and op_names is not None:
graph = nnvm.subgraph._partition(graph, subgraph_backend, op_names)
flags = {}
target = tvm.target.cuda()
with nnvm.compiler.build_config(opt_level=3, **flags):
with nnvm.compiler.build_config(opt_level=3, **kwargs):
graph, lib, params = nnvm.compiler.build(
graph, target, shape=data_shapes, params=params)
return graph, lib, params, output_shapes[0]
Expand All @@ -60,7 +57,16 @@ def copy_params(params):
def check_trt_model(baseline_module, baseline_params, graph, params, data_shape,
subgraph_backend=None, op_names=None, **kwargs):
trt_graph, trt_lib, trt_params, output_shape = compile_model(graph, params, {'data': data_shape},
subgraph_backend, op_names, **kwargs)
**kwargs)
# Verify that TRT subgraphs are partitioned
def check_trt_used(graph):
graph = json.loads(graph.json())
num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
assert num_trt_subgraphs >= 1
check_trt_used(trt_graph)

if not tvm.module.enabled("gpu"):
return
data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
baseline_out = get_output(baseline_module, data, baseline_params, output_shape)
trt_module = graph_runtime.create(trt_graph, trt_lib, tvm.gpu())
Expand Down Expand Up @@ -94,7 +100,8 @@ def check_trt_model(baseline_module, baseline_params, graph, params, data_shape,
shape={'data': data_shape}, params=copy_params(params))
baseline_module = graph_runtime.create(baseline_graph, baseline_lib, tvm.gpu())

# test whole graph run using tensorrt, nnvm.compiler.build_config has graph partitioning turned on
# Test whole graph run using tensorrt. nnvm.compiler.build_config has
# graph partitioning turned on when ext_accel='tensorrt'.
check_trt_model(baseline_module, baseline_params, nnvm.graph.load_json(graph_json_str),
copy_params(params), data_shape, ext_accel='tensorrt')

Expand Down
27 changes: 27 additions & 0 deletions tests/scripts/task_python_tensorrt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e
set -u

export PYTHONPATH=nnvm/python:python:topi/python
export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}"

rm -rf python/tvm/*.pyc python/tvm/*/*.pyc python/tvm/*/*/*.pyc

TVM_FFI=ctypes python3 -m nose -v tests/python/tensorrt