diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake index 7c52cd77abdf..8cb3d5b553b6 100644 --- a/cmake/modules/contrib/TensorRT.cmake +++ b/cmake/modules/contrib/TensorRT.cmake @@ -17,16 +17,20 @@ # TensorRT Module -if(IS_DIRECTORY ${USE_TENSORRT}) - set(TENSORRT_ROOT_DIR ${USE_TENSORRT}) - message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR}) - set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT_DIR}/include) - set(TENSORRT_LIB_DIR ${TENSORRT_ROOT_DIR}/lib) +if(USE_TENSORRT) + if(IS_DIRECTORY ${USE_TENSORRT}) + set(TENSORRT_ROOT_DIR ${USE_TENSORRT}) + endif() + find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include) + find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib) + find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR) + if(NOT TENSORRT_FOUND) + message(ERROR "Could not find TensorRT.") + endif() file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc) include_directories(${TENSORRT_INCLUDE_DIR}) list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS}) - find_library(TENSORRT_NVINFER_LIBRARY nvinfer ${TENSORRT_LIB_DIR}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_NVINFER_LIBRARY}) + list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR}) set_source_files_properties(${RUNTIME_GRAPH_SRCS} PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT") endif() diff --git a/nnvm/python/nnvm/compiler/build_module.py b/nnvm/python/nnvm/compiler/build_module.py index a49e9741a901..d9ef6018cded 100644 --- a/nnvm/python/nnvm/compiler/build_module.py +++ b/nnvm/python/nnvm/compiler/build_module.py @@ -335,7 +335,7 @@ def build(graph, target=None, shape=None, dtype="float32", graph = _annotate_graph(graph, device_target, AnnotationType.DEVICE_TARGET) # Apply optimization - graph = optimize(graph, shape, dtype, layout) + graph = optimize(graph, shape, dtype, layout, target) # Clear extra params without nodes. _remove_noref_params(params, graph) diff --git a/nnvm/src/compiler/graph_compile.cc b/nnvm/src/compiler/graph_compile.cc index f3d04ff6c7b5..1774a411832c 100644 --- a/nnvm/src/compiler/graph_compile.cc +++ b/nnvm/src/compiler/graph_compile.cc @@ -321,11 +321,15 @@ nnvm::Graph GraphCompile(const nnvm::Graph& g) { ret.attrs["device_index"] = std::make_shared(std::move(device_vec)); } // Setup module. - static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target"); - tvm::runtime::Module module = - fbuild(tvm::Map>( - tar_func_map.begin(), tar_func_map.end()), - "", target_host); + tvm::runtime::Module module; + // When using external accelerators such as TensorRT, there might not be any + // functions to compile in the graph. In that case, an empty module is used. + if (!tar_func_map.empty()) { + static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target"); + module = fbuild(tvm::Map>( + tar_func_map.begin(), tar_func_map.end()), + "", target_host); + } ret.attrs["module"] = std::make_shared(std::move(module)); ret = nnvm::ApplyPass(ret, "PlanMemory"); diff --git a/tests/python/tensorrt/test_avg_pool2d.py b/tests/python/tensorrt/test_avg_pool2d.py index 7c20ebf4989f..553f0e0cd33d 100644 --- a/tests/python/tensorrt/test_avg_pool2d.py +++ b/tests/python/tensorrt/test_avg_pool2d.py @@ -21,6 +21,7 @@ import nnvm import tvm from tvm.contrib import graph_runtime +import json def test_avg_pool2d(): @@ -28,14 +29,13 @@ def test_avg_pool2d(): # Generate the data np.random.seed(0) input_shape = [1, 1, 28, 28] - output_shape = [1, 10] + output_shape = [1, 1, 28, 28] data = np.random.random(input_shape).astype('float32') # Baseline model in MXNet net = gluon.nn.HybridSequential() with net.name_scope(): net.add(gluon.nn.AvgPool2D(pool_size=3, strides=1, padding=1)) - net.add(gluon.nn.Dense(10)) net.collect_params().initialize(mx.init.Xavier(), ctx=mx.cpu()) net.hybridize() baseline_input = mx.nd.array(data, ctx=mx.cpu()) @@ -48,6 +48,17 @@ def test_avg_pool2d(): graph, lib, params = nnvm.compiler.build(sym, target, shape={'data': input_shape}, params=params) + + # Verify that TRT subgraphs are partitioned + def check_trt_used(graph): + graph = json.loads(graph.json()) + num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op']) + assert num_trt_subgraphs == 1 + check_trt_used(graph) + + # Execute + if not tvm.module.enabled("gpu"): + return compiled_model = graph_runtime.create(graph, lib, tvm.gpu()) compiled_input = tvm.nd.array(data, ctx=tvm.gpu()) compiled_model.set_input('data', compiled_input) diff --git a/tests/python/tensorrt/test_cross_compile.py b/tests/python/tensorrt/test_cross_compile.py index 07d39aab81ec..4ea2e33c5e08 100644 --- a/tests/python/tensorrt/test_cross_compile.py +++ b/tests/python/tensorrt/test_cross_compile.py @@ -22,6 +22,7 @@ import tvm from tvm.contrib import graph_runtime from tvm.autotvm.measure.measure_methods import set_cuda_target_arch +import json batch_size = 1 @@ -96,6 +97,14 @@ def get_data_shape(model_name): with nnvm.compiler.build_config(opt_level=opt_level, ext_accel=ext_accel): graph, lib, params = nnvm.compiler.build( net, target, shape={"data": data_shape}, params=params, target_host=target_host) + + # Verify that TRT subgraphs are partitioned + def check_trt_used(graph): + graph = json.loads(graph.json()) + num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op']) + assert num_trt_subgraphs >= 1 + check_trt_used(graph) + print("===========Compiling model %s took %.3fs" % (network, time.time() - start)) print("===========Saving lowered graph for model %s" % network) diff --git a/tests/python/tensorrt/test_tensorrt.py b/tests/python/tensorrt/test_tensorrt.py index 12f7972d60ee..be41bdc90a11 100644 --- a/tests/python/tensorrt/test_tensorrt.py +++ b/tests/python/tensorrt/test_tensorrt.py @@ -19,6 +19,7 @@ import logging logging.basicConfig(level=logging.INFO) import numpy as np +import json import nnvm.compiler import nnvm.testing @@ -30,15 +31,11 @@ def test_tensorrt_image_classification_models(): - def compile_model(graph, params, data_shapes, subgraph_backend=None, op_names=None, **kwargs): + def compile_model(graph, params, data_shapes, **kwargs): _, output_shapes = nnvm.compiler.graph_util.infer_shape(graph, **data_shapes) assert len(output_shapes) == 1 - flags = kwargs - if subgraph_backend is not None and op_names is not None: - graph = nnvm.subgraph._partition(graph, subgraph_backend, op_names) - flags = {} target = tvm.target.cuda() - with nnvm.compiler.build_config(opt_level=3, **flags): + with nnvm.compiler.build_config(opt_level=3, **kwargs): graph, lib, params = nnvm.compiler.build( graph, target, shape=data_shapes, params=params) return graph, lib, params, output_shapes[0] @@ -60,7 +57,16 @@ def copy_params(params): def check_trt_model(baseline_module, baseline_params, graph, params, data_shape, subgraph_backend=None, op_names=None, **kwargs): trt_graph, trt_lib, trt_params, output_shape = compile_model(graph, params, {'data': data_shape}, - subgraph_backend, op_names, **kwargs) + **kwargs) + # Verify that TRT subgraphs are partitioned + def check_trt_used(graph): + graph = json.loads(graph.json()) + num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op']) + assert num_trt_subgraphs >= 1 + check_trt_used(trt_graph) + + if not tvm.module.enabled("gpu"): + return data = np.random.uniform(-1, 1, size=data_shape).astype("float32") baseline_out = get_output(baseline_module, data, baseline_params, output_shape) trt_module = graph_runtime.create(trt_graph, trt_lib, tvm.gpu()) @@ -94,7 +100,8 @@ def check_trt_model(baseline_module, baseline_params, graph, params, data_shape, shape={'data': data_shape}, params=copy_params(params)) baseline_module = graph_runtime.create(baseline_graph, baseline_lib, tvm.gpu()) - # test whole graph run using tensorrt, nnvm.compiler.build_config has graph partitioning turned on + # Test whole graph run using tensorrt. nnvm.compiler.build_config has + # graph partitioning turned on when ext_accel='tensorrt'. check_trt_model(baseline_module, baseline_params, nnvm.graph.load_json(graph_json_str), copy_params(params), data_shape, ext_accel='tensorrt') diff --git a/tests/scripts/task_python_tensorrt.sh b/tests/scripts/task_python_tensorrt.sh new file mode 100755 index 000000000000..57ec1dc7e3c0 --- /dev/null +++ b/tests/scripts/task_python_tensorrt.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -u + +export PYTHONPATH=nnvm/python:python:topi/python +export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}" + +rm -rf python/tvm/*.pyc python/tvm/*/*.pyc python/tvm/*/*/*.pyc + +TVM_FFI=ctypes python3 -m nose -v tests/python/tensorrt