diff --git a/cpp/trtorchc/README.md b/cpp/trtorchc/README.md index 3d065dd021..f1c162820a 100644 --- a/cpp/trtorchc/README.md +++ b/cpp/trtorchc/README.md @@ -57,6 +57,10 @@ trtorchc [input_file_path] [output_file_path] --calibration-cache-file=[file_path] Path to calibration cache file to use for post training quantization + --embed-engine Whether to treat input file as a + serialized TensorRT engine and embed it + into a TorchScript module (device spec + must be provided) --num-min-timing-iter=[num_iters] Number of minimization timing iterations used to select kernels --num-avg-timing-iters=[num_iters] diff --git a/cpp/trtorchc/main.cpp b/cpp/trtorchc/main.cpp index df179caf09..f5412eb969 100644 --- a/cpp/trtorchc/main.cpp +++ b/cpp/trtorchc/main.cpp @@ -135,6 +135,18 @@ std::vector> parseDynamicDim(std::string shape_str) { return shape; } +std::string read_buf(std::string const& path) { + std::string buf; + std::ifstream stream(path.c_str(), std::ios::binary); + + if (stream) { + stream >> std::noskipws; + std::copy(std::istream_iterator(stream), std::istream_iterator(), std::back_inserter(buf)); + } + + return buf; +} + std::string get_cwd() { char buff[FILENAME_MAX]; // create string buffer to hold path if (getcwd(buff, FILENAME_MAX)) { @@ -224,6 +236,13 @@ int main(int argc, char** argv) { "file_path", "Path to calibration cache file to use for post training quantization", {"calibration-cache-file"}); + + args::Flag embed_engine( + parser, + "embed-engine", + "Whether to treat input file as a serialized TensorRT engine and embed it into a TorchScript module (device spec must be provided)", + {"embed-engine"}); + args::ValueFlag num_min_timing_iters( parser, "num_iters", "Number of minimization timing iterations used to select kernels", {"num-min-timing-iter"}); args::ValueFlag num_avg_timing_iters( @@ -484,6 +503,14 @@ int main(int argc, char** argv) { auto real_input_path = resolve_path(args::get(input_path)); auto real_output_path = resolve_path(args::get(output_path)); + // Instead of compiling, just embed engine in a PyTorch module + if (embed_engine) { + std::string serialized_engine = read_buf(real_input_path); + auto trt_mod = trtorch::EmbedEngineInNewModule(serialized_engine, compile_settings.device); + trt_mod.save(real_output_path); + return 0; + } + torch::jit::Module mod; try { // Deserialize the ScriptModule from a file using torch::jit::load(). diff --git a/docsrc/tutorials/trtorchc.rst b/docsrc/tutorials/trtorchc.rst index a83571f64b..c78bf1fbe3 100644 --- a/docsrc/tutorials/trtorchc.rst +++ b/docsrc/tutorials/trtorchc.rst @@ -19,79 +19,83 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r trtorchc [input_file_path] [output_file_path] [input_specs...] {OPTIONS} - TRTorch is a compiler for TorchScript, it will compile and optimize - TorchScript programs to run on NVIDIA GPUs using TensorRT + TRTorch is a compiler for TorchScript, it will compile and optimize + TorchScript programs to run on NVIDIA GPUs using TensorRT - OPTIONS: + OPTIONS: - -h, --help Display this help menu - Verbiosity of the compiler - -v, --verbose Dumps debugging information about the - compilation process onto the console - -w, --warnings Disables warnings generated during - compilation onto the console (warnings - are on by default) - --i, --info Dumps info messages generated during - compilation onto the console - --build-debuggable-engine Creates a debuggable engine - --use-strict-types Restrict operating type to only use set - operation precision - --allow-gpu-fallback (Only used when targeting DLA - (device-type)) Lets engine run layers on - GPU if they are not supported on DLA - --disable-tf32 Prevent Float32 layers from using the - TF32 data format - -p[precision...], - --enabled-precison=[precision...] (Repeatable) Enabling an operating - precision for kernels to use when - building the engine (Int8 requires a - calibration-cache argument) [ float | - float32 | f32 | half | float16 | f16 | - int8 | i8 ] (default: float) - -d[type], --device-type=[type] The type of device the engine should be - built for [ gpu | dla ] (default: gpu) - --gpu-id=[gpu_id] GPU id if running on multi-GPU platform - (defaults to 0) - --dla-core=[dla_core] DLACore id if running on available DLA - (defaults to 0) - --engine-capability=[capability] The type of device the engine should be - built for [ default | safe_gpu | - safe_dla ] - --calibration-cache-file=[file_path] - Path to calibration cache file to use - for post training quantization - --num-min-timing-iter=[num_iters] Number of minimization timing iterations - used to select kernels - --num-avg-timing-iters=[num_iters] - Number of averaging timing iterations - used to select kernels - --workspace-size=[workspace_size] Maximum size of workspace given to - TensorRT - --max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be - set, 0 means not set) - -t[threshold], - --threshold=[threshold] Maximum acceptable numerical deviation - from standard torchscript output - (default 2e-5) - --save-engine Instead of compiling a full a - TorchScript program, save the created - engine to the path specified as the - output path - input_file_path Path to input TorchScript file - output_file_path Path for compiled TorchScript (or - TensorRT engine) file - input_specs... Specs for inputs to engine, can either - be a single size or a range defined by - Min, Optimal, Max sizes, e.g. - "(N,..,C,H,W)" - "[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]". - Data Type and format can be specified by - adding an "@" followed by dtype and "%" - followed by format to the end of the - shape spec. e.g. "(3, 3, 32, - 32)@f16%NHWC" - "--" can be used to terminate flag options and force all following - arguments to be treated as positional options + -h, --help Display this help menu + Verbiosity of the compiler + -v, --verbose Dumps debugging information about the + compilation process onto the console + -w, --warnings Disables warnings generated during + compilation onto the console (warnings + are on by default) + --i, --info Dumps info messages generated during + compilation onto the console + --build-debuggable-engine Creates a debuggable engine + --use-strict-types Restrict operating type to only use set + operation precision + --allow-gpu-fallback (Only used when targeting DLA + (device-type)) Lets engine run layers on + GPU if they are not supported on DLA + --disable-tf32 Prevent Float32 layers from using the + TF32 data format + -p[precision...], + --enabled-precison=[precision...] (Repeatable) Enabling an operating + precision for kernels to use when + building the engine (Int8 requires a + calibration-cache argument) [ float | + float32 | f32 | half | float16 | f16 | + int8 | i8 ] (default: float) + -d[type], --device-type=[type] The type of device the engine should be + built for [ gpu | dla ] (default: gpu) + --gpu-id=[gpu_id] GPU id if running on multi-GPU platform + (defaults to 0) + --dla-core=[dla_core] DLACore id if running on available DLA + (defaults to 0) + --engine-capability=[capability] The type of device the engine should be + built for [ default | safe_gpu | + safe_dla ] + --calibration-cache-file=[file_path] + Path to calibration cache file to use + for post training quantization + --embed-engine Whether to treat input file as a + serialized TensorRT engine and embed it + into a TorchScript module (device spec + must be provided) + --num-min-timing-iter=[num_iters] Number of minimization timing iterations + used to select kernels + --num-avg-timing-iters=[num_iters] + Number of averaging timing iterations + used to select kernels + --workspace-size=[workspace_size] Maximum size of workspace given to + TensorRT + --max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be + set, 0 means not set) + -t[threshold], + --threshold=[threshold] Maximum acceptable numerical deviation + from standard torchscript output + (default 2e-5) + --save-engine Instead of compiling a full a + TorchScript program, save the created + engine to the path specified as the + output path + input_file_path Path to input TorchScript file + output_file_path Path for compiled TorchScript (or + TensorRT engine) file + input_specs... Specs for inputs to engine, can either + be a single size or a range defined by + Min, Optimal, Max sizes, e.g. + "(N,..,C,H,W)" + "[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]". + Data Type and format can be specified by + adding an "@" followed by dtype and "%" + followed by format to the end of the + shape spec. e.g. "(3, 3, 32, + 32)@f16%NHWC" + "--" can be used to terminate flag options and force all following + arguments to be treated as positional options e.g.