merge: resolve the confilct in AddEngineToGraph argument

Signed-off-by: Bo Wang <[email protected]>
pytorch · Apr 23, 2021 · de3ba23 · de3ba23
2 parents c1934c1 + 72cb449
commit de3ba23
Show file tree

Hide file tree

Showing 45 changed files with 997 additions and 145 deletions.
diff --git a/.github/scripts/run_cpp_linter.py b/.github/scripts/run_cpp_linter.py
@@ -26,4 +26,7 @@
 
 pr.create_review(commit, comment, approval)
 
-
+if output.returncode != 0:
+    exit(1)
+else:
+    exit(0)
diff --git a/.github/scripts/run_py_linter.py b/.github/scripts/run_py_linter.py
@@ -25,3 +25,8 @@
     approval = 'REQUEST_CHANGES'
 
 pr.create_review(commit, comment, approval)
+
+if output.returncode != 0:
+    exit(1)
+else:
+    exit(0)
diff --git a/README.md b/README.md
@@ -74,7 +74,7 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
 These are the following dependencies used to verify the testcases. TRTorch can work with other versions, but the tests are not guaranteed to pass.
 
 - Bazel 4.0.0
-- Libtorch 1.8.0 (built with CUDA 11.1)
+- Libtorch 1.8.1 (built with CUDA 11.1)
 - CUDA 11.1 (10.2 on Jetson)
 - cuDNN 8.1
 - TensorRT 7.2.3

diff --git a/WORKSPACE b/WORKSPACE
@@ -3,23 +3,21 @@ workspace(name = "TRTorch")
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 
-git_repository(
-    name = "rules_python",
-    remote = "https://github.com/bazelbuild/rules_python.git",
-    commit = "4fcc24fd8a850bdab2ef2e078b1de337eea751a6",
-    shallow_since = "1589292086 -0400"
-)
-
-load("@rules_python//python:repositories.bzl", "py_repositories")
-py_repositories()
+http_archive(
+        name = "rules_python",
+        url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz",
+        sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f",
+    )
 
-load("@rules_python//python:pip.bzl", "pip_repositories", "pip3_import")
-pip_repositories()
+load("@rules_python//python:pip.bzl", "pip_install")
 
 http_archive(
     name = "rules_pkg",
-    url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.2.4/rules_pkg-0.2.4.tar.gz",
-    sha256 = "4ba8f4ab0ff85f2484287ab06c0d871dcb31cc54d439457d28fd4ae14b18450a",
+    urls = [
+    	"https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
+	"https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
+    ],
+    sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d",
 )
 
 load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
@@ -39,12 +37,6 @@ new_local_repository(
     build_file = "@//third_party/cuda:BUILD",
 )
 
-new_local_repository(
-    name = "cublas",
-    path = "/usr",
-    build_file = "@//third_party/cublas:BUILD",
-)
-
 #############################################################################################################
 # Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs)
 #############################################################################################################
@@ -53,16 +45,16 @@ http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    sha256 = "62a2c06761c32576b30f5884240cf675b937945d929e4b13cc776de8d9c2236c",
-    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.8.0%2Bcu111.zip"],
+    sha256 = "1f8aec376f9343538bd7c2fd3abb81ed3af11f575efe3aa72777c4d62044b832",
+    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.8.1%2Bcu111.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    sha256 = "1c8b0c0883dd17f5ce952d42ec5f7f0cc7ceb370307535cee26a66c10419f1f6",
-    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.8.0%2Bcu111.zip"],
+    sha256 = "3a6e0dc11859111e75caa640c8ce9bf904fbb6e9992b4345e444ed5410e4d77e",
+    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.8.1%2Bcu111.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
@@ -71,15 +63,19 @@ http_archive(
 
 http_archive(
     name = "cudnn",
-    urls = ["https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.1.1.33/11.2_20210301/cudnn-11.2-linux-x64-v8.1.1.33.tgz",],
+    urls = [
+        "https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.1.1.33/11.2_20210301/cudnn-11.2-linux-x64-v8.1.1.33.tgz",
+    ],
     build_file = "@//third_party/cudnn/archive:BUILD",
     sha256 = "98a8784e92862f20018d20c281b30d4a0cd951f93694f6433ccf4ae9c502ba6a",
     strip_prefix = "cuda"
 )
 
 http_archive(
     name = "tensorrt",
-    urls = ["https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/7.2.3/tars/TensorRT-7.2.3.4.Ubuntu-18.04.x86_64-gnu.cuda-11.1.cudnn8.1.tar.gz",],
+    urls = [
+        "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/7.2.3/tars/TensorRT-7.2.3.4.Ubuntu-18.04.x86_64-gnu.cuda-11.1.cudnn8.1.tar.gz",
+    ],
     build_file = "@//third_party/tensorrt/archive:BUILD",
     strip_prefix = "TensorRT-7.2.3.4",
     sha256 = "d3a1f478e304b48878604fac70ce7920fece71f9cac62f925c9c59c197f5d087"
@@ -123,26 +119,17 @@ http_archive(
 #########################################################################
 # Testing Dependencies (optional - comment out on aarch64)
 #########################################################################
-pip3_import(
+pip_install(
     name = "trtorch_py_deps",
-    requirements = "//py:requirements.txt"
+    requirements = "//py:requirements.txt",
 )
 
-load("@trtorch_py_deps//:requirements.bzl", "pip_install")
-pip_install()
-
-pip3_import(
+pip_install(
     name = "py_test_deps",
-    requirements = "//tests/py:requirements.txt"
+    requirements = "//tests/py:requirements.txt",
 )
 
-load("@py_test_deps//:requirements.bzl", "pip_install")
-pip_install()
-
-pip3_import(
-   name = "pylinter_deps",
-   requirements = "//tools/linter:requirements.txt",
+pip_install(
+    name = "pylinter_deps",
+    requirements = "//tools/linter:requirements.txt",
 )
-
-load("@pylinter_deps//:requirements.bzl", "pip_install")
-pip_install()
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -30,7 +30,7 @@ namespace core {
 void AddEngineToGraph(
     torch::jit::script::Module mod,
     std::shared_ptr<torch::jit::Graph>& g,
-    std::string& serialized_engine,
+    const std::string& serialized_engine,
     int engine_id = 0) {
   auto engine_ptr =
       c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + std::to_string(engine_id), serialized_engine);
@@ -267,6 +267,20 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
   return new_mod;
 }
 
+torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine) {
+  std::ostringstream engine_id;
+  engine_id << reinterpret_cast<const int*>(&engine);
+  torch::jit::script::Module new_mod("tensorrt_engine_mod_" + engine_id.str());
+  auto new_g = std::make_shared<torch::jit::Graph>();
+  AddEngineToGraph(new_mod, new_g, engine);
+  auto new_method = new_mod._ivalue()->compilation_unit()->create_function("forward", new_g);
+  auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
+  new_mod.type()->addMethod(new_method);
+  new_method->setSchema(schema);
+
+  return new_mod;
+}
+
 void set_device(const int gpu_id) {
   TRTORCH_ASSERT(cudaSetDevice(gpu_id) == cudaSuccess, "Unable to set CUDA device: " << gpu_id);
 }

diff --git a/core/compiler.h b/core/compiler.h
@@ -22,6 +22,8 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::
 
 torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg);
 
+torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine);
+
 void set_device(const int gpu_id);
 
 } // namespace core

diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -13,6 +13,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
     os << "Settings requested for TensorRT engine:"                                        \
        << "\n    Operating Precision: " << s.op_precision                                  \
        << "\n    TF32 Floating Point Computation Enabled: " << !s.disable_tf32             \
+       << "\n    Truncate Long and Double: " << s.truncate_long_and_double                 \
        << "\n    Make Refittable Engine: " << s.refit                                      \
        << "\n    Debuggable Engine: " << s.debug                                           \
        << "\n    Strict Types: " << s.strict_types                                         \

diff --git a/core/conversion/evaluators/aten.cpp b/core/conversion/evaluators/aten.cpp
@@ -468,11 +468,21 @@ auto aten_registrations TRTORCH_UNUSED =
                     })})
         .evaluator({c10::Symbol::fromQualString("aten::floor"),
                     [](const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
-                      auto el = args.at(n->input(0)).unwrapToDouble();
-
-                      return static_cast<int64_t>(std::floor(el));
+                      if (args.at(n->input(0)).IValue()->isInt()) {
+                        auto el = args.at(n->input(0)).unwrapToInt();
+                        return static_cast<int64_t>(std::floor(el));
+                      } else if (args.at(n->input(0)).IValue()->isDouble()) {
+                        auto el = args.at(n->input(0)).unwrapToDouble();
+                        return static_cast<int64_t>(std::floor(el));
+                      } else {
+                        TRTORCH_THROW_ERROR(
+                            "Unimplemented data type for aten::floor evaluator: "
+                            << args.at(n->input(0)).IValue()->type()->str());
+                        return {};
+                      }
                     },
                     EvalOptions().validSchemas({
+                        "aten::floor.int(int a) -> (int)",
                         "aten::floor.float(float a) -> (int)",
                     })})
         .evaluator({c10::Symbol::fromQualString("aten::warn"),

diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
@@ -511,6 +511,21 @@ TRTORCH_API std::string ConvertGraphToTRTEngine(
     const torch::jit::Module& module,
     std::string method_name,
     CompileSpec info);
+
+/**
+ * @brief Take a previously created TensorRT engine and embed it in
+ * in a TorchScript module
+ *
+ * @param engine: std::string - Pre-built serialized TensorRT engine
+ *
+ * Takes a pre-built serialized TensorRT engine and embeds it in a TorchScript
+ * module. Registers execution of the engine as the forward method of the module
+ * Forward is defined as: forward(Tensor[]) -> Tensor[]
+ *
+ * @return: A new module trageting a TensorRT engine
+ */
+TRTORCH_API torch::jit::Module EmbedEngineInNewModule(const std::string& engine);
+
 /**
  * @brief Set gpu device id
  *

diff --git a/cpp/api/src/trtorch.cpp b/cpp/api/src/trtorch.cpp
@@ -31,6 +31,10 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module
   return core::CompileGraph(module, to_internal_compile_spec(info));
 }
 
+torch::jit::Module EmbedEngineInNewModule(const std::string& engine) {
+  return core::EmbedEngineInNewModule(engine);
+}
+
 std::string get_build_info() {
   auto info = core::util::get_build_info();
   return std::string("TRTorch Version: ") + TRTORCH_VERSION + '\n' + info;

diff --git a/cpp/ptq/training/vgg16/export_ckpt.py b/cpp/ptq/training/vgg16/export_ckpt.py
@@ -22,7 +22,7 @@ def test(model, dataloader, crit):
 
     with torch.no_grad():
         for data, labels in dataloader:
-            data, labels = data.cuda(), labels.cuda(async=True)
+            data, labels = data.cuda(), labels.cuda(non_blocking=True)
             out = model(data)
             loss += crit(out, labels)
             preds = torch.max(out, 1)[1]

diff --git a/cpp/ptq/training/vgg16/main.py b/cpp/ptq/training/vgg16/main.py
@@ -141,7 +141,7 @@ def train(model, dataloader, crit, opt, epoch):
     model.train()
     running_loss = 0.0
     for batch, (data, labels) in enumerate(dataloader):
-        data, labels = data.cuda(), labels.cuda(async=True)
+        data, labels = data.cuda(), labels.cuda(non_blocking=True)
         opt.zero_grad()
         out = model(data)
         loss = crit(out, labels)
@@ -167,7 +167,7 @@ def test(model, dataloader, crit, epoch):
     model.eval()
     with torch.no_grad():
         for data, labels in dataloader:
-            data, labels = data.cuda(), labels.cuda(async=True)
+            data, labels = data.cuda(), labels.cuda(non_blocking=True)
             out = model(data)
             loss += crit(out, labels)
             preds = torch.max(out, 1)[1]

diff --git a/docker/Dockerfile.21.02 b/docker/Dockerfile.21.02
@@ -0,0 +1,40 @@
+FROM nvcr.io/nvidia/pytorch:21.02-py3
+
+RUN apt-get update && apt-get install -y curl gnupg  && rm -rf /var/lib/apt/lists/*
+
+RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
+    echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list
+
+RUN apt-get update && apt-get install -y bazel-4.0.0 && rm -rf /var/lib/apt/lists/*
+RUN ln -s /usr/bin/bazel-4.0.0 /usr/bin/bazel
+
+RUN pip install notebook
+
+COPY . /opt/trtorch
+RUN rm /opt/trtorch/WORKSPACE
+COPY ./docker/WORKSPACE.cu.docker  /opt/trtorch/WORKSPACE
+
+# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container
+RUN cp /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer_static.a
+
+WORKDIR /opt/trtorch
+RUN bazel build //:libtrtorch --compilation_mode opt
+
+WORKDIR /opt/trtorch/py
+
+RUN pip install ipywidgets
+RUN jupyter nbextension enable --py widgetsnbextension
+
+# Locale is not set by default
+RUN apt-get update && apt-get install -y locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US:en
+ENV LC_ALL en_US.UTF-8
+RUN python3 setup.py install --use-cxx11-abi
+
+RUN conda init bash
+
+ENV LD_LIBRARY_PATH /opt/conda/lib/python3.8/site-packages/torch/lib:$LD_LIBRARY_PATH
+
+WORKDIR /opt/trtorch/
+CMD /bin/bash
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,4 +26,7 @@ @@
     pr.create_review(commit, comment, approval)
+    if output.returncode != 0:
+        exit(1)
+    else:
+        exit(0)