Merge pull request #778 from NVIDIA/torchtrtc_cli_cleanup

Refactor the CLI
pytorch · Feb 24, 2022 · b798c7f · b798c7f
2 parents 7223fc8 + a182c0e
commit b798c7f
Show file tree

Hide file tree

Showing 11 changed files with 599 additions and 428 deletions.
diff --git a/cpp/bin/torchtrtc/BUILD b/cpp/bin/torchtrtc/BUILD
@@ -10,7 +10,14 @@ config_setting(
 cc_binary(
     name = "torchtrtc",
     srcs = [
+        "accuracy.h",
+        "accuracy.cpp",
+        "fileio.h",
+        "fileio.cpp",
+        "luts.h",
         "main.cpp",
+        "parser_util.h",
+        "parser_util.cpp"
     ],
     deps = [
         "//third_party/args",

diff --git a/cpp/bin/torchtrtc/README.md b/cpp/bin/torchtrtc/README.md
@@ -14,108 +14,110 @@ to standard TorchScript. Load with `torch.jit.load()` and run like you would run
 
 ```
 torchtrtc [input_file_path] [output_file_path]
-  [input_specs...] {OPTIONS}
+    [input_specs...] {OPTIONS}
 
-  Torch-TensorRT is a compiler for TorchScript, it will compile and optimize
-  TorchScript programs to run on NVIDIA GPUs using TensorRT
+    torchtrtc is a compiler for TorchScript, it will compile and optimize
+    TorchScript programs to run on NVIDIA GPUs using TensorRT
 
-OPTIONS:
+  OPTIONS:
+
+      -h, --help                        Display this help menu
+      Verbiosity of the compiler
+        -v, --verbose                     Dumps debugging information about the
+                                          compilation process onto the console
+        -w, --warnings                    Disables warnings generated during
+                                          compilation onto the console (warnings
+                                          are on by default)
+        --i, --info                       Dumps info messages generated during
+                                          compilation onto the console
+      --build-debuggable-engine         Creates a debuggable engine
+      --allow-gpu-fallback              (Only used when targeting DLA
+                                        (device-type)) Lets engine run layers on
+                                        GPU if they are not supported on DLA
+      --require-full-compilation        Require that the model should be fully
+                                        compiled to TensorRT or throw an error
+      --check-method-support=[method_name]
+                                        Check the support for end to end
+                                        compilation of a specified method in the
+                                        TorchScript module
+      --disable-tf32                    Prevent Float32 layers from using the
+                                        TF32 data format
+      --sparse-weights                  Enable sparsity for weights of conv and
+                                        FC layers
+      -p[precision...],
+      --enable-precision=[precision...] (Repeatable) Enabling an operating
+                                        precision for kernels to use when
+                                        building the engine (Int8 requires a
+                                        calibration-cache argument) [ float |
+                                        float32 | f32 | fp32 | half | float16 |
+                                        f16 | fp16 | int8 | i8 | char ]
+                                        (default: float)
+      -d[type], --device-type=[type]    The type of device the engine should be
+                                        built for [ gpu | dla ] (default: gpu)
+      --gpu-id=[gpu_id]                 GPU id if running on multi-GPU platform
+                                        (defaults to 0)
+      --dla-core=[dla_core]             DLACore id if running on available DLA
+                                        (defaults to 0)
+      --engine-capability=[capability]  The type of device the engine should be
+                                        built for [ standard | safety |
+                                        dla_standalone ]
+      --calibration-cache-file=[file_path]
+                                        Path to calibration cache file to use
+                                        for post training quantization
+      --teo=[op_name...],
+      --torch-executed-op=[op_name...]  (Repeatable) Operator in the graph that
+                                        should always be run in PyTorch for
+                                        execution (partial compilation must be
+                                        enabled)
+      --tem=[module_name...],
+      --torch-executed-mod=[module_name...]
+                                        (Repeatable) Module that should always
+                                        be run in Pytorch for execution (partial
+                                        compilation must be enabled)
+      --mbs=[num_ops],
+      --min-block-size=[num_ops]        Minimum number of contiguous TensorRT
+                                        supported ops to compile a subgraph to
+                                        TensorRT
+      --embed-engine                    Whether to treat input file as a
+                                        serialized TensorRT engine and embed it
+                                        into a TorchScript module (device spec
+                                        must be provided)
+      --num-min-timing-iter=[num_iters] Number of minimization timing iterations
+                                        used to select kernels
+      --num-avg-timing-iters=[num_iters]
+                                        Number of averaging timing iterations
+                                        used to select kernels
+      --workspace-size=[workspace_size] Maximum size of workspace given to
+                                        TensorRT
+      -t[threshold],
+      --threshold=[threshold]           Maximum acceptable numerical deviation
+                                        from standard torchscript output
+                                        (default 2e-5)
+      --no-threshold-check              Skip checking threshold compliance
+      --truncate-long-double,
+      --truncate, --truncate-64bit      Truncate weights that are provided in
+                                        64bit to 32bit (Long, Double to Int,
+                                        Float)
+      --save-engine                     Instead of compiling a full a
+                                        TorchScript program, save the created
+                                        engine to the path specified as the
+                                        output path
+      input_file_path                   Path to input TorchScript file
+      output_file_path                  Path for compiled TorchScript (or
+                                        TensorRT engine) file
+      input_specs...                    Specs for inputs to engine, can either
+                                        be a single size or a range defined by
+                                        Min, Optimal, Max sizes, e.g.
+                                        "(N,..,C,H,W)"
+                                        "[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]".
+                                        Data Type and format can be specified by
+                                        adding an "@" followed by dtype and "%"
+                                        followed by format to the end of the
+                                        shape spec. e.g. "(3, 3, 32,
+                                        32)@f16%NHWC"
+      "--" can be used to terminate flag options and force all following
+      arguments to be treated as positional options
 
-    -h, --help                        Display this help menu
-    Verbiosity of the compiler
-      -v, --verbose                     Dumps debugging information about the
-                                        compilation process onto the console
-      -w, --warnings                    Disables warnings generated during
-                                        compilation onto the console (warnings
-                                        are on by default)
-      --i, --info                       Dumps info messages generated during
-                                        compilation onto the console
-    --build-debuggable-engine         Creates a debuggable engine
-    --allow-gpu-fallback              (Only used when targeting DLA
-                                      (device-type)) Lets engine run layers on
-                                      GPU if they are not supported on DLA
-    --require-full-compilation        Require that the model should be fully
-                                      compiled to TensorRT or throw an error
-    --disable-tf32                    Prevent Float32 layers from using the
-                                      TF32 data format
-    --sparse-weights                  Enable sparsity for weights of conv and
-                                      FC layers
-    -p[precision...],
-    --enabled-precision=[precision...]
-                                      (Repeatable) Enabling an operating
-                                      precision for kernels to use when
-                                      building the engine (Int8 requires a
-                                      calibration-cache argument) [ float |
-                                      float32 | f32 | fp32 | half | float16 |
-                                      f16 | fp16 | int8 | i8 | char ]
-                                      (default: float)
-    -d[type], --device-type=[type]    The type of device the engine should be
-                                      built for [ gpu | dla ] (default: gpu)
-    --gpu-id=[gpu_id]                 GPU id if running on multi-GPU platform
-                                      (defaults to 0)
-    --dla-core=[dla_core]             DLACore id if running on available DLA
-                                      (defaults to 0)
-    --engine-capability=[capability]  The type of device the engine should be
-                                      built for [ standard | safety |
-                                      dla_standalone ]
-    --calibration-cache-file=[file_path]
-                                      Path to calibration cache file to use
-                                      for post training quantization
-    --teo=[torch-executed-ops...],
-    --torch-executed-ops=[torch-executed-ops...]
-                                      (Repeatable) Operator in the graph that
-                                      should always be run in PyTorch for
-                                      execution (partial compilation must be
-                                      enabled)
-    --tem=[torch-executed-mods...],
-    --torch-executed-mods=[torch-executed-mods...]
-                                      (Repeatable) Module that should always
-                                      be run in Pytorch for execution (partial
-                                      compilation must be enabled)
-    --mbs=[torch-executed-mods...],
-    --min-block-size=[torch-executed-mods...]
-                                      Minimum number of contiguous TensorRT
-                                      supported ops to compile a subgraph to
-                                      TensorRT
-    --embed-engine                    Whether to treat input file as a
-                                      serialized TensorRT engine and embed it
-                                      into a TorchScript module (device spec
-                                      must be provided)
-    --num-min-timing-iter=[num_iters] Number of minimization timing iterations
-                                      used to select kernels
-    --num-avg-timing-iters=[num_iters]
-                                      Number of averaging timing iterations
-                                      used to select kernels
-    --workspace-size=[workspace_size] Maximum size of workspace given to
-                                      TensorRT
-    -t[threshold],
-    --threshold=[threshold]           Maximum acceptable numerical deviation
-                                      from standard torchscript output
-                                      (default 2e-5)
-    --no-threshold-check              Skip checking threshold compliance
-    --truncate-long-double,
-    --truncate, --truncate-64bit      Truncate weights that are provided in
-                                      64bit to 32bit (Long, Double to Int,
-                                      Float)
-    --save-engine                     Instead of compiling a full a
-                                      TorchScript program, save the created
-                                      engine to the path specified as the
-                                      output path
-    input_file_path                   Path to input TorchScript file
-    output_file_path                  Path for compiled TorchScript (or
-                                      TensorRT engine) file
-    input_specs...                    Specs for inputs to engine, can either
-                                      be a single size or a range defined by
-                                      Min, Optimal, Max sizes, e.g.
-                                      "(N,..,C,H,W)"
-                                      "[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]".
-                                      Data Type and format can be specified by
-                                      adding an "@" followed by dtype and "%"
-                                      followed by format to the end of the
-                                      shape spec. e.g. "(3, 3, 32,
-                                      32)@f16%NHWC"
-    "--" can be used to terminate flag options and force all following
-    arguments to be treated as positional options
 ```
 
 e.g.

diff --git a/cpp/bin/torchtrtc/accuracy.cpp b/cpp/bin/torchtrtc/accuracy.cpp
@@ -0,0 +1,27 @@
+#include "accuracy.h"
+
+#include "torch_tensorrt/logging.h"
+#include "torch_tensorrt/torch_tensorrt.h"
+
+namespace torchtrtc {
+namespace accuracy {
+
+bool check_rtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs, float threshold) {
+  double maxValue = 0.0;
+  for (auto& tensor : inputs) {
+    maxValue = fmax(tensor.abs().max().item<float>(), maxValue);
+  }
+  torchtrt::logging::log(
+      torchtrt::logging::Level::kDEBUG,
+      std::string("Max Difference: ") + std::to_string(diff.abs().max().item<float>()));
+  torchtrt::logging::log(
+      torchtrt::logging::Level::kDEBUG, std::string("Acceptable Threshold: ") + std::to_string(threshold));
+  return diff.abs().max().item<float>() <= threshold * maxValue;
+}
+
+bool almost_equal(const at::Tensor& a, const at::Tensor& b, float threshold) {
+  return check_rtol(a - b, {a, b}, threshold);
+}
+
+} // namespace accuracy
+} // namespace torchtrtc
diff --git a/cpp/bin/torchtrtc/accuracy.h b/cpp/bin/torchtrtc/accuracy.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <stdlib.h>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include "torch/script.h"
+#include "torch/torch.h"
+
+namespace torchtrtc {
+namespace accuracy {
+
+bool check_rtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs, float threshold);
+bool almost_equal(const at::Tensor& a, const at::Tensor& b, float threshold);
+
+} // namespace accuracy
+} // namespace torchtrtc
diff --git a/cpp/bin/torchtrtc/fileio.cpp b/cpp/bin/torchtrtc/fileio.cpp
@@ -0,0 +1,50 @@
+#include "fileio.h"
+
+namespace torchtrtc {
+namespace fileio {
+
+std::string read_buf(std::string const& path) {
+  std::string buf;
+  std::ifstream stream(path.c_str(), std::ios::binary);
+
+  if (stream) {
+    stream >> std::noskipws;
+    std::copy(std::istream_iterator<char>(stream), std::istream_iterator<char>(), std::back_inserter(buf));
+  }
+
+  return buf;
+}
+
+std::string get_cwd() {
+  char buff[FILENAME_MAX]; // create string buffer to hold path
+  if (getcwd(buff, FILENAME_MAX)) {
+    std::string current_working_dir(buff);
+    return current_working_dir;
+  } else {
+    torchtrt::logging::log(torchtrt::logging::Level::kERROR, "Unable to get current directory");
+    exit(1);
+  }
+}
+
+std::string real_path(std::string path) {
+  auto abs_path = path;
+  char real_path_c[PATH_MAX];
+  char* res = realpath(abs_path.c_str(), real_path_c);
+  if (res) {
+    return std::string(real_path_c);
+  } else {
+    torchtrt::logging::log(torchtrt::logging::Level::kERROR, std::string("Unable to find file ") + abs_path);
+    exit(1);
+  }
+}
+
+std::string resolve_path(std::string path) {
+  auto rpath = path;
+  if (!(rpath.rfind("/", 0) == 0)) {
+    rpath = get_cwd() + '/' + rpath;
+  }
+  return rpath;
+}
+
+} // namespace fileio
+} // namespace torchtrtc
diff --git a/cpp/bin/torchtrtc/fileio.h b/cpp/bin/torchtrtc/fileio.h
@@ -0,0 +1,38 @@
+#pragma once
+#include <stdlib.h>
+#include <iostream>
+#include <sstream>
+
+#ifdef __linux__
+#include <linux/limits.h>
+#else
+#define PATH_MAX 260
+#endif
+
+#if defined(_WIN32)
+#include <direct.h>
+#define getcwd _getcwd
+#define realpath(N, R) _fullpath((R), (N), PATH_MAX)
+#else
+#include <unistd.h>
+#endif
+
+#include "NvInfer.h"
+#include "third_party/args/args.hpp"
+#include "torch/script.h"
+#include "torch/torch.h"
+
+#include "torch_tensorrt/logging.h"
+#include "torch_tensorrt/ptq.h"
+#include "torch_tensorrt/torch_tensorrt.h"
+
+namespace torchtrtc {
+namespace fileio {
+
+std::string read_buf(std::string const& path);
+std::string get_cwd();
+std::string real_path(std::string path);
+std::string resolve_path(std::string path);
+
+} // namespace fileio
+} // namespace torchtrtc