Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include bin and headers to packaging and provide option to ensure tests can use precompiled trtorch libs #670

Merged
merged 11 commits into from
Oct 20, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ pkg_tar(
name = "libtrtorch",
srcs = [
"//:LICENSE",
"//bzl_def:BUILD",
"//bzl_def:WORKSPACE"
],
extension = "tar.gz",
package_dir = "trtorch",
Expand Down
7 changes: 7 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ git_repository(
shallow_since = "1570114335 -0400",
)

# External dependency for trtorch if you already have precompiled binaries.
# This is currently used in pytorch NGC container CI testing.
local_repository(
name = "trtorch",
path = "/opt/conda/lib/python3.8/site-packages/trtorch"
)

# CUDA should be installed on the system locally
new_local_repository(
name = "cuda",
Expand Down
79 changes: 79 additions & 0 deletions bzl_def/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package(default_visibility = ["//visibility:public"])

config_setting(
name = "aarch64_linux",
constraint_values = [
"@platforms//cpu:aarch64",
"@platforms//os:linux",
],
)

config_setting(
name = "windows",
constraint_values = [
"@platforms//os:windows",
],
)

cc_library(
name = "libtrtorch",
srcs = select({
":windows": [
"lib/x64/trtorch.dll",
],
"//conditions:default": [
"lib/libtrtorch.so",
],
}),
hdrs = glob([
"include/**/*.h",
]),
strip_include_prefix = "include",
includes = ["include/"]
)

cc_library(
name = "libtrtorchrt",
srcs = select({
":windows": [
"lib/x64/trtorchrt.dll"
],
"//conditions:default": [
"lib/libtrtorchrt.so"
]
})
)

cc_library(
name = "libtrtorch_plugins",
srcs = select({
":windows": [
"lib/x64/trtorch_plugins.dll"
],
"//conditions:default": [
"lib/libtrtorch_plugins.so"
]
}),
hdrs = glob([
"include/trtorch/core/plugins/**/*.h",
]),
strip_include_prefix = "include",
includes = ["include/"]
)

cc_library(
name = "trtorch_core_hdrs",
hdrs = glob([
"include/trtorch/core/**/*.h"
]),
strip_include_prefix = "include/trtorch",
includes = ["include/trtorch/"]
)

# Alias for ease of use
cc_library(
name = "trtorch",
deps = [
":libtrtorch",
]
)
6 changes: 6 additions & 0 deletions bzl_def/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package(default_visibility = ["//visibility:public"])

exports_files([
"WORKSPACE",
"BUILD"
])
1 change: 1 addition & 0 deletions bzl_def/WORKSPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
workspace(name = "trtorch")
7 changes: 7 additions & 0 deletions docker/WORKSPACE.cu.docker
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ git_repository(
shallow_since = "1570114335 -0400"
)

# External dependency for trtorch if you already have precompiled binaries.
# This is currently used in pytorch NGC container CI testing.
local_repository(
name = "trtorch",
path = "/opt/conda/lib/python3.8/site-packages/trtorch"
)

# CUDA should be installed on the system locally
new_local_repository(
name = "cuda",
Expand Down
29 changes: 24 additions & 5 deletions py/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def is_exe(fpath):

def build_libtrtorch_pre_cxx11_abi(develop=True, use_dist_dir=True, cxx11_abi=False):
cmd = [BAZEL_EXE, "build"]
cmd.append("//cpp/lib:libtrtorch.so")
cmd.append("//:libtrtorch")
if develop:
cmd.append("--compilation_mode=dbg")
else:
Expand Down Expand Up @@ -106,7 +106,6 @@ def gen_version_file():
print("creating version file")
f.write("__version__ = \"" + __version__ + '\"')


def copy_libtrtorch(multilinux=False):
if not os.path.exists(dir_path + '/trtorch/lib'):
os.makedirs(dir_path + '/trtorch/lib')
Expand All @@ -115,7 +114,7 @@ def copy_libtrtorch(multilinux=False):
if multilinux:
copyfile(dir_path + "/build/libtrtorch_build/libtrtorch.so", dir_path + '/trtorch/lib/libtrtorch.so')
else:
copyfile(dir_path + "/../bazel-bin/cpp/lib/libtrtorch.so", dir_path + '/trtorch/lib/libtrtorch.so')
os.system("tar -xzf ../bazel-bin/libtrtorch.tar.gz --strip-components=2 -C " + dir_path + "/trtorch")


class DevelopCommand(develop):
Expand Down Expand Up @@ -258,9 +257,29 @@ def run(self):
python_requires='>=3.6',
include_package_data=True,
package_data={
'trtorch': ['lib/*.so'],
'trtorch': ['lib/*',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to list every directory, can we not glob?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably can use python glob library (glob.glob(pattern) - requires 3/4 patterns).
Naive globbing (eg: providing directly include/**/*.h, include/**/**/*.h etc in trtorch key doesn't work. Referred to pytorch setup.py which included all directories https://github.com/pytorch/pytorch/blob/master/setup.py#L918

'include/trtorch/*.h',
'include/trtorch/core/*.h',
'include/trtorch/core/conversion/*.h',
'include/trtorch/core/conversion/conversionctx/*.h',
'include/trtorch/core/conversion/converters/*.h',
'include/trtorch/core/conversion/evaluators/*.h',
'include/trtorch/core/conversion/tensorcontainer/*.h',
'include/trtorch/core/conversion/var/*.h',
'include/trtorch/core/ir/*.h',
'include/trtorch/core/lowering/*.h',
'include/trtorch/core/lowering/passes/*.h',
'include/trtorch/core/partitioning/*.h',
'include/trtorch/core/plugins/*.h',
'include/trtorch/core/plugins/impl/*.h',
'include/trtorch/core/runtime/*.h',
'include/trtorch/core/util/*.h',
'include/trtorch/core/util/logging/*.h',
'bin/*',
'BUILD',
'WORKSPACE'],
},
exclude_package_data={
'': ['*.cpp', '*.h'],
'': ['*.cpp'],
'trtorch': ['csrc/*.cpp'],
})
7 changes: 7 additions & 0 deletions tests/BUILD
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
config_setting(
name = "ci_build_testing",
values = {
"define": "trtorch_src=pre_built"
}
)

test_suite(
name = "tests",
tests = [
Expand Down
29 changes: 28 additions & 1 deletion tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,37 @@ The goal of Converter tests are to tests individual converters againsts specific

Module tests are designed to test the compiler against common network architectures and verify the integration of converters together into a single engine.

In addition to the above, we have lowering tests (`//core/lowering`) which test the functionality of lowering passes and partitioning tests (`//core/partitioning `) which test different cases of torch fallback on test networks.

You can run the whole test suite with bazel. But be aware you may exhaust GPU memory (this may be seen as a cuDNN initialization error) running them naively, you therefore may need to limit the number of concurrent tests. Also because the inputs to tests are random it may make sense to run tests a few times.

Here are some settings that work well the current test suite on a TITAN V.
Here are some settings that we usually test with:

```
bazel test //tests --compilation_mode=dbg --test_output=errors --jobs=4 --runs_per_test=5
```

`--runs_per_test` is optional and can be performed to check if numerical issues in outputs persist across multiple runs.

`--jobs=4` is useful and is sometimes required to prevent too many processes to use GPU memory and cause CUDA out of memory issues.

### Testing using pre-built TRTorch library

Currently, the default strategy when we run all the tests (`bazel test //tests`) is to build the testing scripts along with the full TRTorch library (`libtrtorch.so`) from scratch. This can lead to increased testing time and might not be needed incase you already have a pre-built TRTorch library that you want to link against.

In order to **not** build the entire TRTorch library and only build the test scripts, please use the following command.

```
bazel test //tests --compilation_mode=dbg --test_output=summary --define trtorch_src=pre_built --jobs 2
```

The flag `--define trtorch_src=pre_built` signals bazel to use pre-compiled library as an external dependency for tests. The pre-compiled library path is defined as a `local_repository` rule in root `WORKSPACE` file (`https://github.com/NVIDIA/TRTorch/blob/master/WORKSPACE`).

```
# External dependency for trtorch if you already have precompiled binaries.
# This is currently used in pytorch NGC container CI testing.
local_repository(
name = "trtorch",
path = "/opt/pytorch/trtorch"
)
```
1 change: 0 additions & 1 deletion tests/core/conversion/converters/converter_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ def converter_test(name, visibility = None):
visibility = visibility,
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand Down
1 change: 0 additions & 1 deletion tests/core/conversion/evaluators/evaluator_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ def evaluator_test(name, visibility = None):
visibility = visibility,
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand Down
1 change: 0 additions & 1 deletion tests/core/lowering/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ cc_test(
srcs = ["test_module_fallback_passes.cpp"],
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand Down
1 change: 0 additions & 1 deletion tests/core/lowering/lowering_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ def lowering_test(name, visibility = None):
visibility = visibility,
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand Down
4 changes: 1 addition & 3 deletions tests/core/partitioning/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ cc_test(
srcs = ["test_fallback_graph_output.cpp"],
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand Down Expand Up @@ -69,7 +68,6 @@ cc_test(
srcs = ["test_conditionals.cpp"],
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand All @@ -91,4 +89,4 @@ test_suite(
":test_loop_fallback",
":test_conditionals"
]
)
)
1 change: 0 additions & 1 deletion tests/core/partitioning/partitioning_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ def partitioning_test(name, visibility=None):
visibility = visibility,
deps = [
"//tests/util",
"//core",
"@googletest//:gtest_main",
] + select({
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
Expand Down
3 changes: 0 additions & 3 deletions tests/cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ cc_test(
"//tests/modules:jit_models",
],
deps = [
"//cpp:trtorch",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did we not have to replace any of these dependencies?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't get your question. We replaced //cpp:trtorch with //tests/util which adds //cpp:trtorch based on the trtorch_src flag

"//tests/util",
"@googletest//:gtest_main",
] + select({
Expand Down Expand Up @@ -115,7 +114,6 @@ cc_test(
"//tests/modules:jit_models",
],
deps = [
"//cpp:trtorch",
"//tests/util",
"@googletest//:gtest_main",
] + select({
Expand Down Expand Up @@ -150,7 +148,6 @@ cc_library(
name = "cpp_api_test",
hdrs = ["cpp_api_test.h"],
deps = [
"//cpp:trtorch",
"//tests/util",
"@googletest//:gtest_main",
] + select({
Expand Down
13 changes: 10 additions & 3 deletions tests/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ cc_library(
"util.h",
],
deps = [
"//core/conversion",
"//core/util:prelude",
"//cpp:trtorch",
"@tensorrt//:nvinfer",
] + select({
":use_pre_cxx11_abi": [
Expand All @@ -33,5 +30,15 @@ cc_library(
"@libtorch//:libtorch",
"@libtorch//:caffe2",
],
}) + select({
"//tests:ci_build_testing": [
"@trtorch//:trtorch",
"@trtorch//:trtorch_core_hdrs"
],
"//conditions:default": [
"//cpp:trtorch",
"//core/conversion",
"//core/util:prelude"
]
}),
)