diff --git a/src/common/transformations/src/transformations/mlir/convert.cpp b/src/common/transformations/src/transformations/mlir/convert.cpp index 63bafdc0d5fa1a..b5a076749ee5fc 100644 --- a/src/common/transformations/src/transformations/mlir/convert.cpp +++ b/src/common/transformations/src/transformations/mlir/convert.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include // TODO: Prune unused headers -- it's hard to understand needed ones @@ -187,7 +188,7 @@ mlir::OwningOpRef ngraph_to_mlir(MLIRContext* context, // This pass converts a group of nodes into a single MLIROp -NodePtr ngraph_to_mlir_op(MLIRContext* context, SubgraphPtr subgraph) { +NodePtr ngraph_to_mlir_op(MLIRContext* context, SubgraphPtr subgraph, bool tpp_mlir_enabled) { mlir::OwningOpRef module = ngraph_to_mlir(context, subgraph->inputs, subgraph->nodes, subgraph->outputs); const auto& inputs = subgraph->inputs; @@ -203,8 +204,9 @@ NodePtr ngraph_to_mlir_op(MLIRContext* context, SubgraphPtr subgraph) { if(0 == input_map.count(symbol)) { input_map[symbol] = Index(i, j); } else { - std::cerr << "[ DEBUG ] Lost equality constraint for dimensions in output " << input << "\n" - << " If the constraint is violated in runtime it will result in the undefined behaviour.\n"; + OPENVINO_MLIR_DEBUG_PRINT( + "[ DEBUG ] Lost equality constraint for dimensions in output " << input << ".\n" << + " If the constraint is violated in runtime it will result in the undefined behaviour.\n"); } } } @@ -230,7 +232,7 @@ NodePtr ngraph_to_mlir_op(MLIRContext* context, SubgraphPtr subgraph) { } return std::make_shared( subgraph->inputs, - std::make_shared(std::move(module)), + std::make_shared(std::move(module), tpp_mlir_enabled), output_types, output_map ); @@ -251,16 +253,20 @@ void replace_subgraph(SubgraphPtr subgraph, NodePtr node) { class Partitioner : public ov::pass::ModelPass { MLIRContext* context; + bool tpp_mlir_enabled; public: OPENVINO_RTTI("Partitioner"); - Partitioner(MLIRContext* context) : context(context) {} + Partitioner(MLIRContext* context, bool tpp_mlir_enabled) : + context(context), + tpp_mlir_enabled(tpp_mlir_enabled) + {} bool run_on_model(const std::shared_ptr& model) override { SubgraphTracker tracker([this](SubgraphPtr subgraph) { - auto mlir_op = ngraph_to_mlir_op(context, subgraph); + auto mlir_op = ngraph_to_mlir_op(context, subgraph, tpp_mlir_enabled); replace_subgraph(subgraph, mlir_op); - std::cerr << "Created MLIR op: " << mlir_op << "\n"; + OPENVINO_MLIR_DEBUG_PRINT("Created MLIR op: " << mlir_op << "\n"); } ); for(auto node: model->get_ordered_ops()) { @@ -272,7 +278,7 @@ class Partitioner : public ov::pass::ModelPass { }; -void injectMLIR(std::shared_ptr model, MLIRContext* context) { +void injectMLIR(std::shared_ptr model, MLIRContext* context, bool tpp_mlir_enabled) { ov::pass::Manager manager; using namespace ov::op; manager.set_per_pass_validation(false); @@ -283,17 +289,26 @@ void injectMLIR(std::shared_ptr model, MLIRContext* context) { manager.register_pass>(ov::element::f32); manager.register_pass(); manager.register_pass(); - manager.register_pass(context); + manager.register_pass(context, tpp_mlir_enabled); manager.run_passes(model); model->validate_nodes_and_infer_types(); } -MLIRContext* get_shared_mlir_context() { +MLIRContext* get_shared_mlir_context(bool tpp_mlir_enabled_current) { // Gives MLIRContext instance shared for entire OV process and initialized once upon the initial request // FIXME: Bind with OpenVINO lifetime in the sutable class instead of dirty tricking with static lifetime static std::shared_ptr context; + static bool tpp_mlir_enabled = tpp_mlir_enabled_current; + + if(context) { + if(tpp_mlir_enabled_current != tpp_mlir_enabled) { + OPENVINO_MLIR_DEBUG_PRINT("[ DEBUG ] Switched TPP mode, reinitialize MLIR context\n"); + tpp_mlir_enabled = tpp_mlir_enabled_current; + context.reset(); + } + } if (!context) { @@ -301,24 +316,28 @@ MLIRContext* get_shared_mlir_context() { llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); - std::cerr << "[ DEBUG ] Using TPP_MLIR: "; - #if TPP_MLIR + OPENVINO_MLIR_DEBUG_PRINT("[ DEBUG ] Using TPP_MLIR: "); + if(tpp_mlir_enabled) { + OPENVINO_MLIR_DEBUG_PRINT("YES\n"); // Initialize GPU-related LLVM machinery - tpp::initializeGpuTargets(); - std::cerr << "YES\n"; - #else - std::cerr << "NO\n"; - #endif + #ifdef TPP_MLIR + tpp::initializeGpuTargets(); + #endif + } else { + OPENVINO_MLIR_DEBUG_PRINT("NO\n"); + } // Add the following to include *all* MLIR Core dialects, or selectively // include what you need like above. You only need to register dialects that // will be *parsed* by the tool, not the one generated DialectRegistry registry; - #if TPP_MLIR - registry.insert(); - registry.insert(); - registry.insert(); - #endif + if(tpp_mlir_enabled) { + #ifdef TPP_MLIR + registry.insert(); + registry.insert(); + registry.insert(); + #endif + } registerAllDialects(registry); registerAllExtensions(registry); @@ -339,5 +358,20 @@ MLIRContext* get_shared_mlir_context() { } // namespace void ov::pass::transformMLIR(std::shared_ptr model) { - injectMLIR(model, get_shared_mlir_context()); + if(util::getenv_bool("OV_MLIR", true)) { + bool tpp_mlir_default = + #ifdef TPP_MLIR + true; + #else + false; + #endif + bool tpp_mlir_enabled = util::getenv_bool("OV_MLIR_TPP", tpp_mlir_default); + #ifndef TPP_MLIR + OPENVINO_ASSERT(!tpp_mlir_enabled, + "[ ERROR ] OpenVINO wasn't compiled with TPP_MLIR support, " + "but OV_MLIR_TPP environment variable is set to enable it."); + #endif + + injectMLIR(model, get_shared_mlir_context(tpp_mlir_enabled), tpp_mlir_enabled); + } } diff --git a/src/common/transformations/src/transformations/mlir/convert_common.cpp b/src/common/transformations/src/transformations/mlir/convert_common.cpp index 1499acb0ba844f..de8782c77c12bc 100644 --- a/src/common/transformations/src/transformations/mlir/convert_common.cpp +++ b/src/common/transformations/src/transformations/mlir/convert_common.cpp @@ -4,6 +4,9 @@ #include "convert_common.hpp" +#include + + namespace { using namespace mlir; @@ -58,6 +61,9 @@ IntegerType getBool8Type(MLIRContext* ctx) { namespace ov { namespace mlir { +bool is_debug() { + util::getenv_bool("OV_MLIR_DEBUG", false); +} Location createLayerLocation(MLIRContext* ctx, const std::string& layerName, const std::string& layerType) { const auto layerNameAttr = StringAttr::get(ctx, layerName); diff --git a/src/common/transformations/src/transformations/mlir/convert_common.hpp b/src/common/transformations/src/transformations/mlir/convert_common.hpp index 6622ea5ed70c0c..d6f794bb24bb12 100644 --- a/src/common/transformations/src/transformations/mlir/convert_common.hpp +++ b/src/common/transformations/src/transformations/mlir/convert_common.hpp @@ -4,6 +4,8 @@ #pragma once +#include + #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" @@ -17,6 +19,11 @@ namespace ov { namespace mlir { +bool is_debug(); + +#define OPENVINO_MLIR_DEBUG(X) do if(::ov::mlir::is_debug()) { X; } while(false) +#define OPENVINO_MLIR_DEBUG_PRINT(X) do if(::ov::mlir::is_debug()) { ::std::cerr << X; } while(false) + using namespace ::mlir; Location createLayerLocation(MLIRContext* ctx, const std::string& layerName, const std::string& layerType); diff --git a/src/common/transformations/src/transformations/mlir/mlir_op.cpp b/src/common/transformations/src/transformations/mlir/mlir_op.cpp index eea1019f60db91..74dee5e4db8393 100644 --- a/src/common/transformations/src/transformations/mlir/mlir_op.cpp +++ b/src/common/transformations/src/transformations/mlir/mlir_op.cpp @@ -65,77 +65,73 @@ using namespace mlir; using NodePtr = std::shared_ptr; using SymbolPtr = std::shared_ptr; -void prepareMLIRKernelWithoutWrapper(mlir::OwningOpRef& module) { - // A set of default passes that lower any input IR to LLVM +void prepareMLIRKernelWithoutWrapper(mlir::OwningOpRef& module, bool tpp_mlir_enabled) { PassManager pm(module->getContext()); - -#if TPP_MLIR - - tpp::DefaultPipelineOptions defPipelineOpts; - pm.addPass(tpp::createDefaultPipeline(defPipelineOpts)); - -#else // Simplified default lowering to LLVM from LLVM tests - - // Cleanup before bufferization. - // Simplifies IR to allow better bufferization. - pm.addNestedPass(createCanonicalizerPass()); - pm.addNestedPass(createCSEPass()); - - // Remove empty tensors to avoid converting them into temporary buffers. - pm.addPass(bufferization::createEmptyTensorEliminationPass()); - - pm.addPass(bufferization::createOneShotBufferizePass()); - pm.addNestedPass(bufferization::createFinalizingBufferizePass()); - - // Cleanup after bufferization - possibly remove redundant copies. - pm.addNestedPass(createCanonicalizerPass()); - pm.addNestedPass(createCSEPass()); - - // Deallocation pipeline to avoid memory leaks from created temporary buffers. - pm.addPass(memref::createExpandReallocPass(/*emitDeallocs=*/false)); - pm.addPass(createCanonicalizerPass()); - bufferization::DeallocationOptions deallocOpts; - deallocOpts.privateFuncDynamicOwnership = false; - pm.addPass(bufferization::createOwnershipBasedBufferDeallocationPass(deallocOpts)); - pm.addPass(createCanonicalizerPass()); - pm.addPass(bufferization::createBufferDeallocationSimplificationPass()); - pm.addPass(bufferization::createLowerDeallocationsPass()); - pm.addPass(createCSEPass()); - pm.addPass(createCanonicalizerPass()); - - // Blanket-convert any remaining high-level vector ops to loops if any remain. - pm.addNestedPass(createConvertVectorToSCFPass()); - // pm.addNestedPass(createLinalgGeneralizeNamedOpsPass()); - // Blanket-convert any remaining linalg ops to loops if any remain. - pm.addNestedPass(createConvertLinalgToLoopsPass()); - // Blanket-convert any remaining affine ops if any remain. - pm.addPass(createLowerAffinePass()); - // Convert SCF to CF (always needed). - pm.addPass(createConvertSCFToCFPass()); - // Sprinkle some cleanups. - pm.addPass(createCanonicalizerPass()); - pm.addPass(createCSEPass()); - // Blanket-convert any remaining linalg ops to LLVM if any remain. - // pm.addPass(createConvertLinalgToLLVMPass()); // no such pass - // Convert vector to LLVM (always needed). - pm.addPass(createConvertVectorToLLVMPass()); - // Convert Math to LLVM (always needed). - pm.addNestedPass(createConvertMathToLLVMPass()); - // Expand complicated MemRef operations before lowering them. - pm.addPass(memref::createExpandStridedMetadataPass()); - // The expansion may create affine expressions. Get rid of them. - pm.addPass(createLowerAffinePass()); - // Convert MemRef to LLVM (always needed). - // pm.addPass(memref::createExpandOpsPass()); - pm.addPass(createFinalizeMemRefToLLVMConversionPass()); - // Convert Func to LLVM (always needed). - pm.addPass(createConvertFuncToLLVMPass()); - // Convert Index to LLVM (always needed). - pm.addPass(createConvertIndexToLLVMPass()); - // Convert remaining unrealized_casts (always needed). - pm.addPass(createReconcileUnrealizedCastsPass()); - -#endif + if(tpp_mlir_enabled) { + #ifdef TPP_MLIR + tpp::DefaultPipelineOptions defPipelineOpts; + pm.addPass(tpp::createDefaultPipeline(defPipelineOpts)); + #endif + } else { + // Cleanup before bufferization. + // Simplifies IR to allow better bufferization. + pm.addNestedPass(createCanonicalizerPass()); + pm.addNestedPass(createCSEPass()); + + // Remove empty tensors to avoid converting them into temporary buffers. + pm.addPass(bufferization::createEmptyTensorEliminationPass()); + + pm.addPass(bufferization::createOneShotBufferizePass()); + pm.addNestedPass(bufferization::createFinalizingBufferizePass()); + + // Cleanup after bufferization - possibly remove redundant copies. + pm.addNestedPass(createCanonicalizerPass()); + pm.addNestedPass(createCSEPass()); + + // Deallocation pipeline to avoid memory leaks from created temporary buffers. + pm.addPass(memref::createExpandReallocPass(/*emitDeallocs=*/false)); + pm.addPass(createCanonicalizerPass()); + bufferization::DeallocationOptions deallocOpts; + deallocOpts.privateFuncDynamicOwnership = false; + pm.addPass(bufferization::createOwnershipBasedBufferDeallocationPass(deallocOpts)); + pm.addPass(createCanonicalizerPass()); + pm.addPass(bufferization::createBufferDeallocationSimplificationPass()); + pm.addPass(bufferization::createLowerDeallocationsPass()); + pm.addPass(createCSEPass()); + pm.addPass(createCanonicalizerPass()); + + // Blanket-convert any remaining high-level vector ops to loops if any remain. + pm.addNestedPass(createConvertVectorToSCFPass()); + // pm.addNestedPass(createLinalgGeneralizeNamedOpsPass()); + // Blanket-convert any remaining linalg ops to loops if any remain. + pm.addNestedPass(createConvertLinalgToLoopsPass()); + // Blanket-convert any remaining affine ops if any remain. + pm.addPass(createLowerAffinePass()); + // Convert SCF to CF (always needed). + pm.addPass(createConvertSCFToCFPass()); + // Sprinkle some cleanups. + pm.addPass(createCanonicalizerPass()); + pm.addPass(createCSEPass()); + // Blanket-convert any remaining linalg ops to LLVM if any remain. + // pm.addPass(createConvertLinalgToLLVMPass()); // no such pass + // Convert vector to LLVM (always needed). + pm.addPass(createConvertVectorToLLVMPass()); + // Convert Math to LLVM (always needed). + pm.addNestedPass(createConvertMathToLLVMPass()); + // Expand complicated MemRef operations before lowering them. + pm.addPass(memref::createExpandStridedMetadataPass()); + // The expansion may create affine expressions. Get rid of them. + pm.addPass(createLowerAffinePass()); + // Convert MemRef to LLVM (always needed). + // pm.addPass(memref::createExpandOpsPass()); + pm.addPass(createFinalizeMemRefToLLVMConversionPass()); + // Convert Func to LLVM (always needed). + pm.addPass(createConvertFuncToLLVMPass()); + // Convert Index to LLVM (always needed). + pm.addPass(createConvertIndexToLLVMPass()); + // Convert remaining unrealized_casts (always needed). + pm.addPass(createReconcileUnrealizedCastsPass()); + } auto result = pm.run(module.get()); if (failed(result)) { @@ -225,7 +221,7 @@ struct MemRef { assert(byte_strides[i] % element_size == 0); // TODO: handle case when stride is not aligned (restrict at OV API level) strides[i] = byte_strides[i] / element_size; - //std::cout << "stride [" << i << "] = " << strides[i] << "\n"; + //std::cerr << "stride [" << i << "] = " << strides[i] << "\n"; } } @@ -256,22 +252,24 @@ namespace mlir { using namespace ::mlir; -MLIREvaluate::MLIREvaluate(OwningOpRef _module) : module(std::move(_module)) { - if (true) { - std::cerr << "[ DEBUG ] Source MLIR:\n"; - std::cerr << "-----------------------------------------\n"; - module->dump(); - std::cerr << "-----------------------------------------\n"; - } +MLIREvaluate::MLIREvaluate(OwningOpRef _module, bool tpp_mlir_enabled) : + module(std::move(_module)) { - prepareMLIRKernelWithoutWrapper(module); + OPENVINO_MLIR_DEBUG_PRINT( + "[ DEBUG ] Source MLIR:\n" + "-----------------------------------------\n"); + OPENVINO_MLIR_DEBUG(module->dump()); + OPENVINO_MLIR_DEBUG_PRINT( + "-----------------------------------------\n"); - if (true) { - std::cerr << "[ DEBUG ] Target LLVM:\n"; - std::cerr << "-----------------------------------------\n"; - module->dump(); - std::cerr << "-----------------------------------------\n"; - } + prepareMLIRKernelWithoutWrapper(module, tpp_mlir_enabled); + + OPENVINO_MLIR_DEBUG_PRINT( + "[ DEBUG ] Target LLVM:\n" + "-----------------------------------------\n"); + OPENVINO_MLIR_DEBUG(module->dump()); + OPENVINO_MLIR_DEBUG_PRINT( + "-----------------------------------------\n"); auto optPipeline = mlir::makeOptimizingTransformer(2, /*sizeLevel=*/0, // FIXME: HARDCODED @@ -324,6 +322,7 @@ bool MLIROp::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) memref_args.push_back(MemRef(inputs[i])); } for (size_t i = 0; i < outputs.size(); ++i) { + // TODO: Optimize by adding all dimensions to dimensions_map, not only dynamic Shape target; PartialShape expected = get_output_partial_shape(i); for(size_t j = 0; j < expected.size(); ++j) { diff --git a/src/common/transformations/src/transformations/mlir/mlir_op.hpp b/src/common/transformations/src/transformations/mlir/mlir_op.hpp index 59248f1d14641d..72fbcc462cd805 100644 --- a/src/common/transformations/src/transformations/mlir/mlir_op.hpp +++ b/src/common/transformations/src/transformations/mlir/mlir_op.hpp @@ -29,7 +29,7 @@ class MLIREvaluate { public: - MLIREvaluate(OwningOpRef _module); + MLIREvaluate(OwningOpRef _module, bool tpp_mlir_enabled); bool invoke_packed(std::vector& args); };