feat(//cpp/api): Functional Dataloader based PTQ

- Couple assorted fixes in conversion implementation - Set up the space to have phase specific settings inside the compiler - PTQ Calibrator implementation moved to the public API, means Python will need its own but it probably did anyway - PTQ now works with dataloader and all the overrides for Calibration algorithm work - CIFAR10 Dataloader implementation - Application still has bugs in reporting accuracy and reading from calibration cache Signed-off-by: Naren Dasan <[email protected]> Signed-off-by: Naren Dasan <[email protected]>
pytorch · Apr 22, 2020 · f022dfe · f022dfe
1 parent 676bf56
commit f022dfe
Show file tree

Hide file tree

Showing 28 changed files with 758 additions and 261 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,4 +18,8 @@ py/.eggs
 ._DS_Store
 *.pth
 *.pyc
-cpp/ptq/training/vgg16/data/
+cpp/ptq/training/vgg16/data/*
+*.bin
+cpp/ptq/datasets/data/
+._.DS_Store
+*.tar.gz
diff --git a/core/BUILD b/core/BUILD
@@ -16,7 +16,7 @@ cc_library(
         "@libtorch//:libtorch",
         "@tensorrt//:nvinfer"
     ],
-    alwayslink=True, 
+    alwayslink=True,
 )
 
 

diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -24,24 +24,24 @@
 namespace trtorch {
 namespace core {
 
-c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::string method_name, std::shared_ptr<torch::jit::Graph>& g) {    
+c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::string method_name, std::shared_ptr<torch::jit::Graph>& g) {
 
     std::vector<c10::Argument> args;
     for (auto in : g->inputs()) {
         args.push_back(c10::Argument(in->debugName(), in->type()));
     }
-    
+
     std::vector<c10::Argument> returns;
     for (auto out : g->outputs()) {
         returns.push_back(c10::Argument(out->debugName(), out->type()));
     }
-    
+
     return c10::FunctionSchema(method_name, method_name, args, returns);
 }
 
 
 void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit::Graph>& g, std::string& serialized_engine) {
-    execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);    
+    execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);
     auto schema = execution::GetEngineFunctionSchema(uid);
     auto num_io = execution::GetEngineIO(uid);
 
@@ -53,14 +53,14 @@ void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit
         in_val->setType(c10::TensorType::get());
         graph_inputs.push_back(in_val);
     }
-    
+
     auto engine_node = g->create(c10::Symbol::fromQualString(schema.name()), torch::jit::ArrayRef<torch::jit::Value*>(graph_inputs), num_io.second);
     g->block()->appendNode(engine_node);
 
     for (auto o : engine_node->outputs()) {
         g->registerOutput(o);
     }
-    
+
     return;
 }
 
@@ -69,48 +69,50 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod,
     auto g = mod.get_method(method_name).graph();
     // Go through PyTorch Lowering to simplify graph and extract weight parameters
     auto graph_and_parameters = torch::jit::LowerGraph(*g, mod._ivalue());
-    
+
     g = graph_and_parameters.first;
-    
+
     // Go through TRTorch Lowering to reformat graph to be conversion friendly
     // and also segment for accelerators and executors (TRT-DLA, TRT-GPU, PYT)
     lowering::LowerGraph(g);
-    
+
     auto params = graph_and_parameters.second;
     auto named_params = conversion::get_named_params(g->inputs(), params);
     LOG_DEBUG(*g << "(CheckMethodOperatorSupport)\n");
-    
+
     // Is this necessary?
     lowering::LowerBlock(g->block());
-    
+
     return conversion::VerifyConverterSupportForBlock(g->block());
 }
 
 std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
                                     std::string method_name,
-                                    conversion::ExtraInfo cfg) {
+                                    ExtraInfo cfg) {
+    auto convert_cfg = std::move(cfg.convert_info);
+
     auto g = mod.get_method(method_name).graph();
     // Go through PyTorch Lowering to simplify graph and extract weight parameters
     auto graph_and_parameters = torch::jit::LowerGraph(*g, mod._ivalue());
-    
+
     g = graph_and_parameters.first;
-    
+
     // Go through TRTorch Lowering to reformat graph to be conversion friendly
     // and also segment for accelerators and executors (TRT-DLA, TRT-GPU, PYT)
     lowering::LowerGraph(g);
-    
+
     auto params = graph_and_parameters.second;
     auto named_params = conversion::get_named_params(g->inputs(), params);
     LOG_INFO(*g << "(CompileGraph)\n");
-    
+
     // Is this necessary?
     lowering::LowerBlock(g->block());
-    auto engine = ConvertBlockToEngine(g->block(), cfg, named_params);
+    auto engine = ConvertBlockToEngine(g->block(), convert_cfg, named_params);
     return std::move(engine);
 }
 
 torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,
-                                        conversion::ExtraInfo cfg) {
+                                        ExtraInfo cfg) {
     // TODO: Should be doing a functional transform but need PR #31978
     // [jit] More robust mangling
     // torch::jit::script::Module new_mod = mod.clone();
@@ -128,7 +130,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,
 
     return new_mod;
 }
-                   
+
 } // namespace core
 } // namespace trtorch
 
diff --git a/core/compiler.h b/core/compiler.h
@@ -6,12 +6,19 @@
 
 namespace trtorch {
 namespace core {
+
+struct ExtraInfo {
+    ExtraInfo(std::vector<conversion::InputRange> input_ranges)
+        : convert_info(std::move(input_ranges)) {}
+    conversion::ConversionInfo convert_info;
+};
+
 bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name);
 
 std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
-                                    std::string method_name, conversion::ExtraInfo cfg);
+                                    std::string method_name, ExtraInfo cfg);
 
-torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, conversion::ExtraInfo cfg);
+torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo cfg);
 
 } // namespace core
-} // namespace trtorch 
+} // namespace trtorch
diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
@@ -179,7 +179,7 @@ void AddParamsToCtxValueMap(ConversionCtx* ctx, GraphParams& params) {
     }
 }
 
-void ConvertBlockToNetDef(ConversionCtx* ctx, const torch::jit::Block* b, ExtraInfo build_info, GraphParams& static_params) {
+void ConvertBlockToNetDef(ConversionCtx* ctx, const torch::jit::Block* b, ConversionInfo build_info, GraphParams& static_params) {
      LOG_INFO(ctx->logger, "Converting Block");
 
     auto inputs = b->inputs();
@@ -221,7 +221,7 @@ void ConvertBlockToNetDef(ConversionCtx* ctx, const torch::jit::Block* b, ExtraI
 // a serialized TensorRT engine that can be deserialized and run
 
 // Probably should consolidate these two functions
-std::string ConvertBlockToEngine(const torch::jit::Block* b, ExtraInfo build_info, GraphParams& static_params) {
+std::string ConvertBlockToEngine(const torch::jit::Block* b, ConversionInfo build_info, GraphParams& static_params) {
     ConversionCtx ctx(build_info.engine_settings);
     ConvertBlockToNetDef(&ctx, b, build_info, static_params);
     std::string engine = ctx.SerializeEngine();

diff --git a/core/conversion/conversion.h b/core/conversion/conversion.h
@@ -30,10 +30,10 @@ struct InputRange {
                std::vector<int64_t> max_shape);
 };
 
-struct ExtraInfo {
+struct ConversionInfo {
     std::vector<InputRange> input_ranges;
     BuilderSettings engine_settings;
-    ExtraInfo(std::vector<InputRange> input_ranges)
+    ConversionInfo(std::vector<InputRange> input_ranges)
         : input_ranges(std::move(input_ranges)), engine_settings(BuilderSettings()) {}
 };
 
@@ -43,7 +43,7 @@ GraphParams get_named_params(c10::ArrayRef<torch::jit::Value*> inputs, std::vect
 
 // Converts a already lowered block (blocks with no sub blocks) to
 // a serialized TensorRT engine that can be deserialized and run
-std::string ConvertBlockToEngine(const torch::jit::Block* b, ExtraInfo build_info, GraphParams& static_params);
+std::string ConvertBlockToEngine(const torch::jit::Block* b, ConversionInfo build_info, GraphParams& static_params);
 
 bool OpSupported(const torch::jit::Node* n);
 

diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -20,7 +20,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
        << "\n    Max Workspace Size: " << s.workspace_size                         \
        << "\n    Device Type: " << s.device                                        \
        << "\n    Engine Capability: " << s.capability                              \
-       << "\n    Calibrator Created: " << s.calibrator ? true : false;
+       << "\n    Calibrator Created: " << (s.calibrator != nullptr);
     return os;
 }
 

diff --git a/core/conversion/converters/impl/batch_norm.cpp b/core/conversion/converters/impl/batch_norm.cpp
@@ -83,7 +83,8 @@ volatile auto batch_norm_registrations = RegisterNodeConversionPatterns()
                 auto gamma = args[1].unwrapToTensor();
 
                 if (/*training*/ args[5].unwrapToBool()) {
-                    LOG_WARNING("TensorRT only converts forward pass of graphs, but saw training = True, may see undefined behavior, consider placing module in eval mode");
+                    LOG_WARNING(R"WARN(TRTorch only converts forward pass of graphs, but saw training = True, may see
+                    unexpected behavior, consider placing module in eval mode before exporting the TorchScript module)WARN");
                 }
 
                 // If gamma is None this fails

diff --git a/core/conversion/converters/impl/pooling.cpp b/core/conversion/converters/impl/pooling.cpp
@@ -79,20 +79,17 @@ auto pooling_registrations = RegisterNodeConversionPatterns()
             for (size_t i = 0; i < out_shape.size(); i++) {
                 stride[(stride.size() - 1) - i] = in_shape[(in_shape.size() - 1) - i] / out_shape[(out_shape.size() - 1) - i];
             }
-            LOG_DEBUG("Stride" << util::toDims(stride));
+            LOG_DEBUG("Stride: " << util::toDims(stride));
 
             std::vector<int64_t> window(out_shape.size());
             for (size_t i = 0; i < out_shape.size(); i++) {
                 window[window.size() - 1 - i] = in_shape[in_shape.size() - 1 - i] - (out_shape[out_shape.size() - 1 - i] - 1) * stride[stride.size() - 1 - i];
             }
 
-            LOG_DEBUG("Window" << util::toDims(window));
+            LOG_DEBUG("Window: " << util::toDims(window));
 
             auto new_layer = ctx->net->addPoolingNd(*in, nvinfer1::PoolingType::kAVERAGE, util::toDims(window));
-            if (!new_layer) {
-                LOG_ERROR("Unable to create average pooling layer from node: " << *n);
-                return false;
-            }
+            TRTORCH_CHECK(new_layer, "Unable to create average pooling layer from node: " << *n);
 
             new_layer->setStrideNd(util::toDims(stride));
 

diff --git a/core/quantization/BUILD b/core/quantization/BUILD
diff --git a/core/quantization/TRTEntropyCalibrator.cpp b/core/quantization/TRTEntropyCalibrator.cpp
diff --git a/core/quantization/quantization.h b/core/quantization/quantization.h