From f9057b75cc0ef8b0c5d639ce7d7487b1a8272c8b Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 14:25:40 +0800 Subject: [PATCH 01/14] naive moving --- .gitignore | 91 +++++ .gitmodules | 3 + CMakeLists.txt | 110 ++++++ VERSION_NUMBER | 1 + onnx_opt/__init__.py | 59 +++ onnx_opt/cpp2py_export.cc | 37 ++ onnx_opt/optimize.cc | 45 +++ onnx_opt/optimize.h | 56 +++ onnx_opt/pass.cc | 103 ++++++ onnx_opt/pass.h | 212 +++++++++++ onnx_opt/pass_manager.cc | 47 +++ onnx_opt/pass_manager.h | 51 +++ onnx_opt/pass_registry.cc | 18 + onnx_opt/pass_registry.h | 88 +++++ onnx_opt/passes/eliminate_deadend.h | 39 ++ onnx_opt/passes/eliminate_identity.h | 38 ++ onnx_opt/passes/eliminate_nop_dropout.h | 45 +++ .../passes/eliminate_nop_monotone_argmax.h | 66 ++++ onnx_opt/passes/eliminate_nop_pad.h | 77 ++++ onnx_opt/passes/eliminate_nop_transpose.h | 46 +++ .../passes/eliminate_unused_initializer.h | 80 ++++ .../passes/extract_constant_to_initializer.h | 46 +++ onnx_opt/passes/fuse_add_bias_into_conv.h | 157 ++++++++ onnx_opt/passes/fuse_bn_into_conv.h | 190 ++++++++++ onnx_opt/passes/fuse_consecutive_concats.h | 76 ++++ .../passes/fuse_consecutive_log_softmax.h | 49 +++ .../fuse_consecutive_reduce_unsqueeze.h | 65 ++++ onnx_opt/passes/fuse_consecutive_squeezes.h | 80 ++++ onnx_opt/passes/fuse_consecutive_transposes.h | 74 ++++ .../passes/fuse_matmul_add_bias_into_gemm.h | 107 ++++++ onnx_opt/passes/fuse_pad_into_conv.h | 173 +++++++++ onnx_opt/passes/fuse_transpose_into_gemm.h | 46 +++ onnx_opt/passes/lift_lexical_references.h | 231 ++++++++++++ onnx_opt/passes/nop.h | 26 ++ onnx_opt/passes/split.h | 228 ++++++++++++ setup.py | 344 ++++++++++++++++++ third_party/onnx | 1 + 37 files changed, 3205 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 VERSION_NUMBER create mode 100644 onnx_opt/__init__.py create mode 100644 onnx_opt/cpp2py_export.cc create mode 100644 onnx_opt/optimize.cc create mode 100644 onnx_opt/optimize.h create mode 100644 onnx_opt/pass.cc create mode 100644 onnx_opt/pass.h create mode 100644 onnx_opt/pass_manager.cc create mode 100644 onnx_opt/pass_manager.h create mode 100644 onnx_opt/pass_registry.cc create mode 100644 onnx_opt/pass_registry.h create mode 100644 onnx_opt/passes/eliminate_deadend.h create mode 100644 onnx_opt/passes/eliminate_identity.h create mode 100644 onnx_opt/passes/eliminate_nop_dropout.h create mode 100644 onnx_opt/passes/eliminate_nop_monotone_argmax.h create mode 100644 onnx_opt/passes/eliminate_nop_pad.h create mode 100644 onnx_opt/passes/eliminate_nop_transpose.h create mode 100644 onnx_opt/passes/eliminate_unused_initializer.h create mode 100644 onnx_opt/passes/extract_constant_to_initializer.h create mode 100644 onnx_opt/passes/fuse_add_bias_into_conv.h create mode 100644 onnx_opt/passes/fuse_bn_into_conv.h create mode 100644 onnx_opt/passes/fuse_consecutive_concats.h create mode 100644 onnx_opt/passes/fuse_consecutive_log_softmax.h create mode 100644 onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h create mode 100644 onnx_opt/passes/fuse_consecutive_squeezes.h create mode 100644 onnx_opt/passes/fuse_consecutive_transposes.h create mode 100644 onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h create mode 100644 onnx_opt/passes/fuse_pad_into_conv.h create mode 100644 onnx_opt/passes/fuse_transpose_into_gemm.h create mode 100644 onnx_opt/passes/lift_lexical_references.h create mode 100644 onnx_opt/passes/nop.h create mode 100644 onnx_opt/passes/split.h create mode 100644 setup.py create mode 160000 third_party/onnx diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..512a0b353 --- /dev/null +++ b/.gitignore @@ -0,0 +1,91 @@ +## General + +# Compiled Object files +*.slo +*.lo +*.o +*.cuo + +# Compiled Dynamic libraries +*.so +*.dylib +*.pyd + +# Compiled Static libraries +*.lai +*.la +*.a + +# Compiled python +*.pyc + +# Compiled MATLAB +*.mex* + +# IPython notebook checkpoints +.ipynb_checkpoints + +# Editor temporaries +*.swn +*.swo +*.swp +*~ + +# Sublime Text settings +*.sublime-workspace +*.sublime-project + +# Eclipse Project settings +*.*project +.settings + +# QtCreator files +*.user + +# PyCharm files +.idea + +# Visual Studio Code files +.vscode + +# OSX dir files +.DS_Store + +## ONNX + +# build, distribute, and bins (+ python proto bindings) +build +build_* +.build_debug/* +.build_release/* +.setuptools-cmake-build/* + +# setup.py intermediates +.eggs +dist +onnx_opt.egg-info +*.ninja +.ninja_deps +.ninja_log +compile_commands.json + +# generated files +onnx/version.py +compile_commands.json + +# test generated files +.cache +.coverage +onnx/examples/.coverage.nbval +.pytest_cache +test_report + +# autocomplete +.ycm_extra_conf.py + +# test coverage data files +*.gcov + +.mypy_cache +virtualenv +venv diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..d2c597cd6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "third_party/onnx"] + path = third_party/onnx + url = https://github.com/onnx/onnx diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..2502e9224 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,110 @@ +cmake_minimum_required(VERSION 3.1) + +project(onnx_optimizer C CXX) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(ONNX_ROOT ${PROJECT_SOURCE_DIR}/third_party/onnx) +option(ONNX_BUILD_OPTIMIZER "" OFF) +add_subdirectory(${ONNX_ROOT}) + +file(GLOB_RECURSE onnx_opt_srcs "onnx_opt/*.cc" + "onnx_opt/*.h" + ) +list(REMOVE_ITEM onnx_opt_srcs "${PROJECT_SOURCE_DIR}/onnx_opt/cpp2py_export.cc") + +add_library(onnx_optimizer ${onnx_opt_srcs}) +target_link_libraries(onnx_optimizer PUBLIC onnx) + +if(BUILD_ONNX_PYTHON) + if("${PY_EXT_SUFFIX}" STREQUAL "") + if(MSVC) + set(PY_EXT_SUFFIX ".pyd") + else() + set(PY_EXT_SUFFIX ".so") + endif() + endif() + + add_library(onnx_opt_cpp2py_export MODULE "onnx_opt/cpp2py_export.cc") + set_target_properties(onnx_opt_cpp2py_export PROPERTIES PREFIX "") + set_target_properties(onnx_opt_cpp2py_export + PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") + set_target_properties(onnx_opt_cpp2py_export PROPERTIES SUFFIX ${PY_EXT_SUFFIX}) + set_target_properties(onnx_opt_cpp2py_export + PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + target_include_directories(onnx_opt_cpp2py_export PRIVATE + $ + $ + $ + ${PYTHON_INCLUDE_DIR}) + # pybind11 is a header only lib + find_package(pybind11 2.2) + if(pybind11_FOUND) + target_include_directories(onnx_opt_cpp2py_export PUBLIC + ${pybind11_INCLUDE_DIRS}) + else() + if(EXISTS ${ONNX_ROOT}/third_party/pybind11/include/pybind11/pybind11.h) + target_include_directories(onnx_opt_cpp2py_export PUBLIC + ${ONNX_ROOT}/third_party/pybind11/include) + else() + message(FATAL_ERROR "cannot find pybind") + endif() + endif() + + if(APPLE) + set_target_properties(onnx_opt_cpp2py_export + PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + target_link_libraries(onnx_opt_cpp2py_export + PRIVATE -Wl,-force_load,$) + elseif(MSVC) + # In MSVC, we will add whole archive in default + target_link_libraries(onnx_opt_cpp2py_export + PRIVATE -WHOLEARCHIVE:$) + elseif(CMAKE_SYSTEM_NAME STREQUAL "AIX") + # whole-archive linker option not available on AIX + target_sources(onnx_opt_cpp2py_export + PRIVATE $) + else() + # Assume everything else is like gcc + target_link_libraries(onnx_opt_cpp2py_export + PRIVATE "-Wl,--whole-archive" $ + "-Wl,--no-whole-archive") + set_target_properties(onnx_opt_cpp2py_export + PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL") + endif() + + target_link_libraries(onnx_opt_cpp2py_export PRIVATE onnx_optimizer) + + if(MSVC) + find_package(PythonInterp ${PY_VERSION} REQUIRED) + find_package(PythonLibs ${PY_VERSION} REQUIRED) + target_link_libraries(onnx_opt_cpp2py_export PRIVATE ${PYTHON_LIBRARIES}) + target_compile_options(onnx_opt_cpp2py_export + PRIVATE /MP + /WX + /wd4800 # disable warning type' : forcing + # value to bool 'true' or 'false' + # (performance warning) + /wd4503 # identifier' : decorated name length + # exceeded, name was truncated + /wd4146 # unary minus operator applied to + # unsigned type, result still + # unsigned from include\google\protob + # uf\wire_format_lite.h + /wd4244 # 'argument': conversion from 'google:: + # protobuf::uint64' to 'int', possible + # loss of data + /wd4267 # Conversion from 'size_t' to 'int', + # possible loss of data + /wd4996 # The second parameter is ignored. + ${EXTRA_FLAGS}) + if(ONNX_USE_PROTOBUF_SHARED_LIBS) + target_compile_options(onnx_opt_cpp2py_export + PRIVATE /wd4251 # 'identifier' : class 'type1' needs to + # have dll-interface to be used by + # clients of class 'type2' + ) + endif() + add_msvc_runtime_flag(onnx_opt_cpp2py_export) + endif() +endif() diff --git a/VERSION_NUMBER b/VERSION_NUMBER new file mode 100644 index 000000000..6e8bf73aa --- /dev/null +++ b/VERSION_NUMBER @@ -0,0 +1 @@ +0.1.0 diff --git a/onnx_opt/__init__.py b/onnx_opt/__init__.py new file mode 100644 index 000000000..c0c044827 --- /dev/null +++ b/onnx_opt/__init__.py @@ -0,0 +1,59 @@ +# ATTENTION: The code in this file is highly EXPERIMENTAL. +# Adventurous users should note that the APIs will probably change. + +"""onnx optimizer + +This enables users to optimize their models. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import onnx +import onnx_opt.onnx_opt_cpp2py_export as C +from onnx import ModelProto +from typing import Text, Sequence, Optional + +"""Apply the optimization on the serialized ModelProto. + +Arguments: + input (ModelProto): model + names (list of string): list of optimization names + +Return: + return (ModelProto) optimized model + +Supported pass names: + -- nop + -- eliminate_identity + -- eliminate_nop_transpose + -- eliminate_nop_pad + -- eliminate_unused_initializer + -- fuse_consecutive_squeezes + -- fuse_consecutive_transposes + -- fuse_add_bias_into_conv + -- fuse_transpose_into_gemm +""" + +get_available_passes = C.get_available_passes + + +def optimize(model, passes=None, fixed_point=False): # type: (ModelProto, Optional[Sequence[Text]], bool) -> ModelProto + if passes is None: + passes = ['eliminate_nop_transpose', + 'eliminate_nop_pad', + 'fuse_consecutive_transposes', + 'fuse_transpose_into_gemm'] + if not isinstance(model, ModelProto): + raise ValueError('Optimizer only accepts ModelProto, incorrect type: {}'.format(type(model))) + + model_str = model.SerializeToString() + if fixed_point: + optimized_model_str = C.optimize_fixedpoint(model_str, passes) + else: + optimized_model_str = C.optimize(model_str, passes) + + return onnx.load_from_string(optimized_model_str) + +__all__ = ['optimize', 'get_available_passes'] diff --git a/onnx_opt/cpp2py_export.cc b/onnx_opt/cpp2py_export.cc new file mode 100644 index 000000000..0a4f60af0 --- /dev/null +++ b/onnx_opt/cpp2py_export.cc @@ -0,0 +1,37 @@ +#include +#include + +#include "onnx/optimizer/optimize.h" +#include "onnx/py_utils.h" + +namespace ONNX_NAMESPACE { +namespace py = pybind11; +using namespace pybind11::literals; +PYBIND11_MODULE(onnx_opt_cpp2py_export, onnx_opt_cpp2py_export) { + onnx_opt_cpp2py_export.doc() = "ONNX Optimizer"; + + onnx_opt_cpp2py_export.def( + "optimize", + [](const py::bytes& bytes, const std::vector& names) { + ModelProto proto{}; + ParseProtoFromPyBytes(&proto, bytes); + auto const result = optimization::Optimize(proto, names); + std::string out; + result.SerializeToString(&out); + return py::bytes(out); + }); + + onnx_opt_cpp2py_export.def( + "optimize_fixedpoint", + [](const py::bytes& bytes, const std::vector& names) { + ModelProto proto{}; + ParseProtoFromPyBytes(&proto, bytes); + auto const result = + optimization::OptimizeFixed(proto, names); + std::string out; + result.SerializeToString(&out); + return py::bytes(out); + }); + onnx_opt_cpp2py_export.def("get_available_passes", &optimization::GetAvailablePasses); +} +} diff --git a/onnx_opt/optimize.cc b/onnx_opt/optimize.cc new file mode 100644 index 000000000..dbacb1a32 --- /dev/null +++ b/onnx_opt/optimize.cc @@ -0,0 +1,45 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#include "onnx/optimizer/optimize.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +GlobalPassRegistry Optimizer::passes; + +Optimizer::Optimizer( + const std::vector& names, + const bool fixed_point) { + if (fixed_point) { + this->pass_manager = + std::shared_ptr(new FixedPointPassManager()); + } else { + this->pass_manager = + std::shared_ptr(new GeneralPassManager()); + } + for (const auto& name : names) { + auto pass = passes.find(name); + this->pass_manager->add(pass); + } +} +Optimizer::~Optimizer() {} + +ModelProto Optimize( + const ModelProto& mp_in, + const std::vector& names) { + Optimizer current_opt(names, false); + return current_opt.optimize(mp_in); +} +ModelProto OptimizeFixed( + const ModelProto& mp_in, + const std::vector& names) { + Optimizer current_opt(names, true); + return current_opt.optimize(mp_in); +} +const std::vector GetAvailablePasses() { + return Optimizer::passes.GetAvailablePasses(); +} + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/optimize.h b/onnx_opt/optimize.h new file mode 100644 index 000000000..018a62f79 --- /dev/null +++ b/onnx_opt/optimize.h @@ -0,0 +1,56 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/common/ir.h" +#include "onnx/common/ir_pb_converter.h" +#include "onnx/common/stl_backports.h" +#include "onnx/optimizer/pass_manager.h" +#include "onnx/optimizer/pass_registry.h" +#include "onnx/proto_utils.h" + +#include "vector" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct Optimizer { + static GlobalPassRegistry passes; + + public: + Optimizer(const std::vector& names, const bool fixed_point); + ~Optimizer(); + + ModelProto optimize(const ModelProto& mp_in) { + std::shared_ptr g(ImportModelProto(mp_in)); + + if (g.get() == nullptr) { + std::cerr << "Warning: onnx optimizer is unable to parse input model. " + << "(The IR version of the ONNX model may be too old.)" + << std::endl; + // If we can't parse the file, just return the input. + return mp_in; + } + + ModelProto mp_out = PrepareOutput(mp_in); + this->pass_manager->run(*g); + ExportModelProto(&mp_out, g); + return mp_out; + } + + private: + std::shared_ptr pass_manager; +}; + +const std::vector GetAvailablePasses(); + +ModelProto Optimize( + const ModelProto& mp_in, + const std::vector& names); + +ModelProto OptimizeFixed( + const ModelProto& mp_in, + const std::vector& names); +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/pass.cc b/onnx_opt/pass.cc new file mode 100644 index 000000000..df0a89b5b --- /dev/null +++ b/onnx_opt/pass.cc @@ -0,0 +1,103 @@ +#include "onnx/optimizer/pass.h" +#include "onnx/common/assertions.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +Pass::Pass( + PassType pass_type, + PassEfficiency pass_efficiency, + PassOptimizationType pass_optimization_type) { + this->pass_type = pass_type; + this->pass_efficiency = pass_efficiency; + this->pass_optimization_type = pass_optimization_type; +} + +Pass::~Pass() {} + +unsigned int Pass::DescendOnGraphAttributesAndCount( + Node* n, + std::function fn) { + unsigned int num_changes = 0; + for (auto name : n->attributeNames()) { + auto kind = n->kindOf(name); + if (kind == AttributeKind::g) { + num_changes += fn(*n->g(name)); + } + if (kind == AttributeKind::gs) { + for (auto& g : n->gs(name)) { + num_changes += fn(*g); + } + } + } + return num_changes; +} + +void Pass::DescendOnGraphAttributesUnconstrained( + Node* n, + std::function fn) { + for (auto name : n->attributeNames()) { + auto kind = n->kindOf(name); + if (kind == AttributeKind::g) { + fn(*n->g(name)); + } + if (kind == AttributeKind::gs) { + for (auto& g : n->gs(name)) { + fn(*g); + } + } + } +} + +PredicateBasedPass::~PredicateBasedPass() {} + +unsigned int PredicateBasedPass::_runPassInternal(Graph& graph) { + unsigned int num_changes = false; + for (auto it = graph.begin(); it != graph.end(); ++it) { + auto* n = *it; + num_changes += this->DescendOnGraphAttributesAndCount( + n, [this](Graph& g) { return _runPassInternal(g); }); + if (this->patternMatchPredicate(n)) { + NodeDestroyType destroy_type = NodeDestroyType::DestroyZero; + num_changes += this->runTransform(n, graph, destroy_type); + + if (destroy_type == NodeDestroyType::DestroyOne) { + it.destroyCurrent(); + } + if (destroy_type == NodeDestroyType::DestroyTwo) { + it.destroyCurrent(); + it.destroyCurrent(); + } + } + } + return num_changes; +} + +PassAnalysisType PredicateBasedPass::getPassAnalysisType() const { + return PassAnalysisType::CountBased; +} + +std::shared_ptr PredicateBasedPass::runPass(Graph& graph) { + bool initialized_pass = this->initializePass(graph); + unsigned int touched_optimizations = this->_runPassInternal(graph); + bool finalized_pass = this->finalizePass(graph); + + return std::shared_ptr(new CountBasedPassAnalysis( + this, touched_optimizations, initialized_pass, finalized_pass)); +} + +CountBasedPassAnalysis::CountBasedPassAnalysis( + Pass* pass, + unsigned int num_positive_transforms, + bool initialization_done, + bool finalization_done) { + this->pass = pass; + this->num_positive_transforms = num_positive_transforms; + this->initialization_done = initialization_done; + this->finalization_done = finalization_done; +} + +FullGraphBasedPass::~FullGraphBasedPass() {} + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/pass.h b/onnx_opt/pass.h new file mode 100644 index 000000000..55d21c909 --- /dev/null +++ b/onnx_opt/pass.h @@ -0,0 +1,212 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include +#include "onnx/common/ir.h" +#include "onnx/onnx_pb.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +// Base struct representing result of a pass. +struct PostPassAnalysis { + virtual ~PostPassAnalysis() = default; +}; + +// Enum that represents the type of optimization it is. +enum PassType { + // Class of optimizations that fuses operations. + Fuse = 0, + // Class of optimizations that removes useless operations. + Nop = 1, + // Class of optimizations that includes some form of seperation. + Separate = 2, + // Immutable pass, also sometimes referred to as an analysis pass. + Immutable = 3, + // Other type of pass. + Other = 4 +}; + +// Enum that represents the return type of the analysis. +enum PassAnalysisType { + // An empty analysis is returned. Most likely will return PostPassAnalysis. + Empty = 0, + // A count based analysis is returned. Most likely of type + // CountBasedPassAnalysis + CountBased = 1 +}; + +enum PassEfficiency { + // A partially efficient optimization pass cannot guarantee that running two + // consecutive passes + // will return the same result as running a single pass. + Partial = 0, + // A completely efficient optimization guarantees that running two consecutive + // passes is equivalent + // to running a single pass. + Complete = 1 +}; + +// Describes what the optimization pass is attempting to optimize. +enum PassOptimizationType { + // Is not optimizing anything. Most likely will be used in an immutable pass. + None = 0, + // Optimizes for compute. + Compute = 1, + // Optimizes for memory. + Memory = 2, + // Optimizes for both compute and memory. + ComputeMemory = 3, + // Optimizes for stability (e.g. log-sum-exp trick). + Stability = 4 +}; + +enum NodeDestroyType { + // Does not destroy node + DestroyZero = 0, + // Equivalent to calling it.destroyCurrent() once. + DestroyOne = 1, + // Equivalent to calling it.destroyCurrent() twice. + DestroyTwo = 2 +}; + +// Base class for all optimizations within ONNX. A pass must contain the +// annotations described above. Furthermore each pass is given the ability to +// initialize and finalize it's pass. Each pass must have a unique name that +// pass managers/registry will use as identification. Finally the pass +// implements runPass which completes the pass inplace. +class Pass { + PassType pass_type; + PassEfficiency pass_efficiency; + PassOptimizationType pass_optimization_type; + + public: + Pass( + PassType pass_type, + PassEfficiency pass_efficiency, + PassOptimizationType pass_optimization_type); + virtual ~Pass(); + + PassType getPassType() const { + return this->pass_type; + } + PassEfficiency getPassEfficiency() const { + return this->pass_efficiency; + } + PassOptimizationType getPassOptimizationType() const { + return this->pass_optimization_type; + } + virtual PassAnalysisType getPassAnalysisType() const = 0; + virtual std::string getPassName() const = 0; + + virtual bool initializePass(Graph&) { + return false; + } + virtual bool finalizePass(Graph&) { + return false; + } + virtual std::shared_ptr runPass(Graph& graph) = 0; + + protected: + // Iterates through the elements in the graph and counts the number of times + // the transform is successfully run. + unsigned int DescendOnGraphAttributesAndCount( + Node* n, + std::function fn); + // A more general version of the function above that doesn't constrain the + // return type of fn. + void DescendOnGraphAttributesUnconstrained( + Node* n, + std::function fn); +}; + +class ImmutablePass : Pass { + public: + explicit ImmutablePass() + : Pass( + PassType::Immutable, + PassEfficiency::Complete, + PassOptimizationType::None) {} + ~ImmutablePass() override; +}; + +// Pass Analysis done after a predicate based pass. +struct CountBasedPassAnalysis : PostPassAnalysis { + // Have to use raw pointer here. The idea is that the pass will pass as + // a parameter to the constructor. We could use std::enable_shared_from_this + // but this complicates the memory model. Also since all passes come from + // GlobalPassRegistry which already utilizes smart pointers we don't have to + // worry about memory leaks from passes. + Pass* pass; + unsigned int num_positive_transforms; + bool initialization_done; + bool finalization_done; + + public: + explicit CountBasedPassAnalysis( + Pass* pass, + unsigned int num_positive_transforms, + bool initialization_done, + bool finalization_done); + + bool graphChanged() { + return this->num_positive_transforms > 0; + } + bool numSucceededTransforms() { + return this->num_positive_transforms; + } + + // Whether or not a repeated application of the pass might be useful. + bool fixedPointOptimizationNeeded() { + return this->graphChanged() && + pass->getPassEfficiency() == PassEfficiency::Partial; + } +}; + +// A pass that is based on pattern matching. The majority of passes will +// implement this pass. In order for the pass to work the patternMatchPredicate +// function must be implemented witch matches a subgraph to the respective +// optimization pass. Lastly the runTransform method must also be implemented +// which simply implements the pass on any node which passes +// patternMatchPredicate. +class PredicateBasedPass : public Pass { + public: + explicit PredicateBasedPass( + PassType pass_type, + PassEfficiency pass_efficiency, + PassOptimizationType pass_optimization_type) + : Pass(pass_type, pass_efficiency, pass_optimization_type) {} + ~PredicateBasedPass() override; + + virtual bool patternMatchPredicate(Node* node) = 0; + // Run transform is given the current node in the iterator, a reference to the + // current graph as well as a reference describing how to treat the current + // node in the iterator post transform. Run transform is then responsible for + // running the actual transform as well as describing how to treat the + // iterator node. By default the current node will not call destroy. Do not + // internally delete node instead set the correct destroy_current type. + virtual bool + runTransform(Node* node, Graph& graph, NodeDestroyType& destroy_current) = 0; + + std::shared_ptr runPass(Graph& graph) override; + PassAnalysisType getPassAnalysisType() const override; + + private: + unsigned int _runPassInternal(Graph& graph); +}; + +// The most general pass which allows the user to run a pass given only a graph. +class FullGraphBasedPass : public Pass { + public: + explicit FullGraphBasedPass( + PassType pass_type, + PassEfficiency pass_efficiency, + PassOptimizationType pass_optimization_type) + : Pass(pass_type, pass_efficiency, pass_optimization_type) {} + ~FullGraphBasedPass() override; +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/pass_manager.cc b/onnx_opt/pass_manager.cc new file mode 100644 index 000000000..7ad365a06 --- /dev/null +++ b/onnx_opt/pass_manager.cc @@ -0,0 +1,47 @@ +#include "onnx/optimizer/pass_manager.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +PassManager::PassManager() {} +PassManager::~PassManager() {} + +GeneralPassManager::~GeneralPassManager() { + this->passes.clear(); +} +void GeneralPassManager::add(std::shared_ptr pass) { + this->passes.push_back(std::move(pass)); +} + +std::shared_ptr GeneralPassManager::run(Graph& graph) { + for (const std::shared_ptr& pass : this->passes) { + auto pass_analysis = pass->runPass(graph); + } + return std::shared_ptr(new EmptyPassManagerAnalysis()); +} + +std::shared_ptr FixedPointPassManager::run(Graph& graph) { + bool fixed_point_optimization_done; + + do { + fixed_point_optimization_done = false; + for (const std::shared_ptr& pass : this->passes) { + std::shared_ptr analysis = pass->runPass(graph); + if (pass->getPassAnalysisType() == PassAnalysisType::Empty) { + continue; + } + std::shared_ptr count_analysis = + std::static_pointer_cast(analysis); + + while (count_analysis->fixedPointOptimizationNeeded()) { + count_analysis = std::static_pointer_cast( + pass->runPass(graph)); + fixed_point_optimization_done = true; + } + } + } while (fixed_point_optimization_done); + + return std::shared_ptr(new EmptyPassManagerAnalysis()); +} +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/pass_manager.h b/onnx_opt/pass_manager.h new file mode 100644 index 000000000..c7ba35ef0 --- /dev/null +++ b/onnx_opt/pass_manager.h @@ -0,0 +1,51 @@ +#pragma once +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#include +#include "onnx/optimizer/pass.h" +#include "onnx/optimizer/passes/eliminate_deadend.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +// An analysis returned from the run done by a manager +struct PassManagerAnalysis {}; +struct EmptyPassManagerAnalysis : PassManagerAnalysis {}; + +// Base class of all PassManager's. The class should be able to add new passes +// as well as run the passes given a graph. +class PassManager { + public: + PassManager(); + virtual ~PassManager(); + + virtual void add(std::shared_ptr P) = 0; + virtual std::shared_ptr run(Graph& graph) = 0; +}; + +// The GeneralPassManager has no restriction on type of Pass and runs the passes +// once in a linear fashion. +class GeneralPassManager : public PassManager { + public: + GeneralPassManager() {} + ~GeneralPassManager() override; + + void add(std::shared_ptr pass) override; + std::shared_ptr run(Graph& graph) override; + + protected: + // use vector here to ensure the order of the passes + // for some pass, order is critical, for example, + // split_init and split_predict should be the last in the list + std::vector> passes; +}; + +// Exhibits the same behavior as GeneralPassManager but will instead check +// whether or not fixed point optimization is needed. +class FixedPointPassManager : public GeneralPassManager { + std::shared_ptr run(Graph& graph) override; +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/pass_registry.cc b/onnx_opt/pass_registry.cc new file mode 100644 index 000000000..c2eee87f8 --- /dev/null +++ b/onnx_opt/pass_registry.cc @@ -0,0 +1,18 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#include "onnx/optimizer/pass_registry.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +const std::vector GlobalPassRegistry::GetAvailablePasses() { + std::vector names; + for (const auto& pass : this->passes) { + names.push_back(pass.first); + } + return names; +} + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/pass_registry.h b/onnx_opt/pass_registry.h new file mode 100644 index 000000000..a925b9142 --- /dev/null +++ b/onnx_opt/pass_registry.h @@ -0,0 +1,88 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/common/ir.h" +#include "onnx/common/ir_pb_converter.h" +#include "onnx/common/stl_backports.h" +#include "onnx/optimizer/passes/eliminate_deadend.h" +#include "onnx/optimizer/passes/eliminate_identity.h" +#include "onnx/optimizer/passes/eliminate_nop_dropout.h" +#include "onnx/optimizer/passes/eliminate_nop_monotone_argmax.h" +#include "onnx/optimizer/passes/eliminate_nop_pad.h" +#include "onnx/optimizer/passes/eliminate_nop_transpose.h" +#include "onnx/optimizer/passes/eliminate_unused_initializer.h" +#include "onnx/optimizer/passes/extract_constant_to_initializer.h" +#include "onnx/optimizer/passes/fuse_add_bias_into_conv.h" +#include "onnx/optimizer/passes/fuse_bn_into_conv.h" +#include "onnx/optimizer/passes/fuse_consecutive_concats.h" +#include "onnx/optimizer/passes/fuse_consecutive_log_softmax.h" +#include "onnx/optimizer/passes/fuse_consecutive_reduce_unsqueeze.h" +#include "onnx/optimizer/passes/fuse_consecutive_squeezes.h" +#include "onnx/optimizer/passes/fuse_consecutive_transposes.h" +#include "onnx/optimizer/passes/fuse_matmul_add_bias_into_gemm.h" +#include "onnx/optimizer/passes/fuse_pad_into_conv.h" +#include "onnx/optimizer/passes/fuse_transpose_into_gemm.h" +#include "onnx/optimizer/passes/lift_lexical_references.h" +#include "onnx/optimizer/passes/nop.h" +#include "onnx/optimizer/passes/split.h" +#include "onnx/proto_utils.h" + +#include +#include + +namespace ONNX_NAMESPACE { +namespace optimization { + +// Registry containing all passes available in ONNX. +struct GlobalPassRegistry { + std::map> passes; + + GlobalPassRegistry() { + // Register the optimization passes to the optimizer. + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + registerPass(); + } + + ~GlobalPassRegistry() { + this->passes.clear(); + } + + std::shared_ptr find(std::string pass_name) { + auto it = this->passes.find(pass_name); + ONNX_ASSERTM( + it != this->passes.end(), "pass %s is unknown.", pass_name.c_str()); + return it->second; + } + const std::vector GetAvailablePasses(); + + template + void registerPass() { + static_assert(std::is_base_of::value, "T must inherit from Pass"); + std::shared_ptr pass(new T()); + passes[pass->getPassName()] = pass; + } +}; +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/eliminate_deadend.h b/onnx_opt/passes/eliminate_deadend.h new file mode 100644 index 000000000..a70e2ec9d --- /dev/null +++ b/onnx_opt/passes/eliminate_deadend.h @@ -0,0 +1,39 @@ + +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. +#pragma once +#include "onnx/optimizer/pass.h" +namespace ONNX_NAMESPACE { +namespace optimization { +struct EliminateDeadEnd final : public FullGraphBasedPass { + explicit EliminateDeadEnd() + : FullGraphBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + std::string getPassName() const override { + return "eliminate_deadend"; + } + PassAnalysisType getPassAnalysisType() const override { + return PassAnalysisType::CountBased; + } + unsigned int EliminateDead(Graph& graph) { + unsigned int nodes_removed = 0; + auto nodes = graph.nodes().reverse(); + for (auto it = nodes.begin(); it != nodes.end(); it++) { + auto node = *it; + if (!node->hasUses()) { + nodes_removed++; + it.destroyCurrent(); + } + } + return nodes_removed; + } + std::shared_ptr runPass(Graph& graph) override { + auto nodes_removed = this->EliminateDead(graph); + return std::shared_ptr( + new CountBasedPassAnalysis(this, nodes_removed, false, false)); + } +}; +} // namespace optimization +} // namespace ONNX_NAMESPACE \ No newline at end of file diff --git a/onnx_opt/passes/eliminate_identity.h b/onnx_opt/passes/eliminate_identity.h new file mode 100644 index 000000000..3f5f8525b --- /dev/null +++ b/onnx_opt/passes/eliminate_identity.h @@ -0,0 +1,38 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct EliminateIdentity final : public PredicateBasedPass { + explicit EliminateIdentity() + : PredicateBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "eliminate_identity"; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kIdentity; + } + bool runTransform(Node* node, Graph&, NodeDestroyType& destroy_current) + override { + + if (node->output()->has_sizes()) { + node->input()->setSizes(node->output()->sizes()); + } + node->output()->replaceAllUsesWith(node->input()); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/eliminate_nop_dropout.h b/onnx_opt/passes/eliminate_nop_dropout.h new file mode 100644 index 000000000..6be190b5e --- /dev/null +++ b/onnx_opt/passes/eliminate_nop_dropout.h @@ -0,0 +1,45 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct EliminateNopDropout final : public PredicateBasedPass { + explicit EliminateNopDropout() + : PredicateBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "eliminate_nop_dropout"; + } + + bool patternMatchPredicate(Node* node) override { + // in opset 12, ratio is an input of Dropout rather than an attribute, + // however we don't want to to remove Dropout fro opset 12+, since it + // supports training-friendly models, for which the Dropout ops are required + return (node->kind() == kDropout && node->hasAttribute(kratio)) && + node->f(kratio) == 0.0; + } + + bool runTransform(Node* node, Graph&, NodeDestroyType& destroy_current) + override { + // Don't assume that theres only one output. + for (size_t i = 0; i < node->outputs().size(); ++i) { + node->outputs()[i]->replaceAllUsesWith(node->input()); + } + if (node->outputs()[0]->has_sizes()) { + node->input()->setSizes(node->outputs()[0]->sizes()); + } + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/eliminate_nop_monotone_argmax.h b/onnx_opt/passes/eliminate_nop_monotone_argmax.h new file mode 100644 index 000000000..67f3fdf79 --- /dev/null +++ b/onnx_opt/passes/eliminate_nop_monotone_argmax.h @@ -0,0 +1,66 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +const std::unordered_set monotone_node_no_axis_kind{kLog, + kExp, + kSqrt}; + +const std::unordered_set monotone_node_axis_kind{kSoftmax, + kLogSoftmax}; + +struct EliminateNopMonotoneArgmax final : public PredicateBasedPass { + explicit EliminateNopMonotoneArgmax() + : PredicateBasedPass( + PassType::Nop, + PassEfficiency::Partial, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "eliminate_nop_monotone_argmax"; + } + + static inline bool satisfies_monotone_condition(int64_t axis, Node* node) { + if (monotone_node_no_axis_kind.find(node->kind()) != + monotone_node_no_axis_kind.end()) { + return true; + } + if (monotone_node_axis_kind.find(node->kind()) != + monotone_node_axis_kind.end()) { + if (node->hasAttribute(kaxis)) { + return axis == node->i(kaxis); + } + } + return false; + } + + bool patternMatchPredicate(Node* node) override { + if (node->kind() == kArgMax) { + if (node->hasAttribute(kaxis)) { + auto node_axis = node->i(kaxis); + return node->inputs().size() == 1 && + satisfies_monotone_condition(node_axis, node->input()->node()); + } + } + return false; + } + + bool runTransform(Node* node, Graph&, NodeDestroyType&) + override { + Node* monotone_node = node->input()->node(); + if (monotone_node->output()->uses().size() == 1) { + monotone_node->output()->replaceAllUsesWith(monotone_node->input()); + monotone_node->destroy(); + return true; + } + return false; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE \ No newline at end of file diff --git a/onnx_opt/passes/eliminate_nop_pad.h b/onnx_opt/passes/eliminate_nop_pad.h new file mode 100644 index 000000000..23f8cfc6f --- /dev/null +++ b/onnx_opt/passes/eliminate_nop_pad.h @@ -0,0 +1,77 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/defs/tensor_util.h" +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct EliminateNopPad final : public PredicateBasedPass { + explicit EliminateNopPad() + : PredicateBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "eliminate_nop_pad"; + } + + static bool is_nop_pad(Node* node, Graph& graph) { + if (node->hasAttribute(kpads)) { + // opset 10 and below + const auto& pads = node->is(kpads); + for (size_t i = 0; i < pads.size(); i++) { + // if pad constant_value is non-zero, this is not a nop pad + if (pads[i] != 0) { + return false; + } + } + return true; + } else { + // opset 11 and above + const auto& pads_name = node->inputs()[1]->uniqueName(); + const auto pads_initializer = graph.getInitializer(pads_name); + // 'pad' node has the 'pads' input which has not been initialized - + // can't proceed with elimination + if (pads_initializer == graph.initializers().end()) + return false; + + // validate values within 'pads' + if (pads_initializer->elem_type() == TensorProto::INT64) { + const auto& pads = ParseData(&*pads_initializer); + for (const auto& val : pads) { + // if pad constant_value is non-zero, this is not a nop pad + if (val != 0) { + return false; + } + } + return true; + } + } + + return false; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kPad; + } + + bool runTransform(Node* node, Graph& graph, NodeDestroyType& destroy_current) + override { + if (!is_nop_pad(node, graph)) + return false; + if (node->output()->has_sizes()) { + node->inputs()[0]->setSizes(node->output()->sizes()); + } + node->output()->replaceAllUsesWith(node->inputs()[0]); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/eliminate_nop_transpose.h b/onnx_opt/passes/eliminate_nop_transpose.h new file mode 100644 index 000000000..ba1595dfc --- /dev/null +++ b/onnx_opt/passes/eliminate_nop_transpose.h @@ -0,0 +1,46 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct EliminateNopTranspose final : public PredicateBasedPass { + explicit EliminateNopTranspose() + : PredicateBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "eliminate_nop_transpose"; + } + + static bool is_nop_transpose(const std::vector& perm) { + for (size_t i = 0; i < perm.size(); i++) + if (perm[i] != (int)i) + return false; + return true; + } + + bool patternMatchPredicate(Node* node) override { + return (node->kind() == kTranspose && node->hasAttribute(kperm)) && + is_nop_transpose(node->is(kperm)); + } + + bool runTransform(Node* node, Graph&, NodeDestroyType& destroy_current) + override { + if (node->output()->has_sizes()) { + node->input()->setSizes(node->output()->sizes()); + } + node->output()->replaceAllUsesWith(node->input()); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/eliminate_unused_initializer.h b/onnx_opt/passes/eliminate_unused_initializer.h new file mode 100644 index 000000000..95995610f --- /dev/null +++ b/onnx_opt/passes/eliminate_unused_initializer.h @@ -0,0 +1,80 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// A, B, C are in the initializer list +// D = Add(B, C) +// After: +// B, C are in the initializer list and A is removed +// D = Add(B, C) +// +// this pass can handle the case satisfy all following conditions: +// condition 1: A is not used as any node's input +// condition 2: A is not an output + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct EliminateUnusedInitializer final : public FullGraphBasedPass { + explicit EliminateUnusedInitializer() + : FullGraphBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Memory) {} + + std::string getPassName() const override { + return "eliminate_unused_initializer"; + } + + PassAnalysisType getPassAnalysisType() const override { + return PassAnalysisType::Empty; + } + + void erase_used_initializers( + Graph& g, + std::unordered_set* initializer_names) { + for (auto output : g.outputs()) { + initializer_names->erase(output->uniqueName()); + } + for (auto it = g.begin(); it != g.end(); ++it) { + auto* n = *it; + DescendOnGraphAttributesUnconstrained( + n, [this, initializer_names](Graph& graph) { + erase_used_initializers(graph, initializer_names); + }); + for (auto* input : n->inputs()) { + initializer_names->erase(input->uniqueName()); + } + } + } + + void eliminate_unused_initializer(Graph& graph) { + std::unordered_set initializer_names( + graph.initializer_names().begin(), graph.initializer_names().end()); + erase_used_initializers(graph, &initializer_names); + + // remove initializer and input if need + for (std::string name : initializer_names) { + graph.eraseInitializer(name); + auto iter = std::find_if( + graph.inputs().begin(), graph.inputs().end(), [&name](Value* input) { + return input->uniqueName() == name; + }); + if (iter != graph.inputs().end()) { + graph.eraseInput(std::distance(graph.inputs().begin(), iter)); + } + } + } + + std::shared_ptr runPass(Graph& graph) override { + eliminate_unused_initializer(graph); + return std::shared_ptr(new PostPassAnalysis()); + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/extract_constant_to_initializer.h b/onnx_opt/passes/extract_constant_to_initializer.h new file mode 100644 index 000000000..696f78c6e --- /dev/null +++ b/onnx_opt/passes/extract_constant_to_initializer.h @@ -0,0 +1,46 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// A = Constant() +// After: +// A is in the initializer list +// +// this pass can handle the case satisfy all following conditions: +// condition 1: A is the output of a Constant node +#include "onnx/common/assertions.h" +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct ExtractConstantToInitializer final : public PredicateBasedPass { + explicit ExtractConstantToInitializer() + : PredicateBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::Memory) {} + + std::string getPassName() const override { + return "extract_constant_to_initializer"; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kConstant; + } + + bool runTransform(Node* node, Graph& graph, NodeDestroyType& destroy_current) + override { + const auto name = node->output()->uniqueName(); + Tensor t = node->t(kvalue); + Value* new_init = graph.addInitializerAndInput(t, name); + node->output()->replaceAllUsesWith(new_init); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_add_bias_into_conv.h b/onnx_opt/passes/fuse_add_bias_into_conv.h new file mode 100644 index 000000000..0af10cd7c --- /dev/null +++ b/onnx_opt/passes/fuse_add_bias_into_conv.h @@ -0,0 +1,157 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// Z = Conv(X, Y) +// B = Z + A +// After: +// B = Conv(X, Y, A) +// +// the pass can handle the following cases: +// case 1: A is 1D tensor and A.dim[0] == Z.dim[1] +// case 2: A is 1-element 1D tensor + +#include + +#include "onnx/common/assertions.h" +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseAddBiasIntoConv final : public PredicateBasedPass { + explicit FuseAddBiasIntoConv() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + std::string getPassName() const override { + return "fuse_add_bias_into_conv"; + } + bool patternMatchPredicate(Node* node) override { + return node->kind() == kAdd && node->inputs()[0]->node()->kind() == kConv && + node->inputs()[0]->node()->inputs().size() == 2; + } + bool runTransform(Node* n, Graph& graph, NodeDestroyType& destroy_current) + override { + // due to current broadcasting's constraint, Conv has to be the first + // operand + destroy_current = NodeDestroyType::DestroyZero; + auto orig_conv = n->inputs()[0]; + auto orig_bias = n->inputs()[1]; + // check if bias is Const or in graph's initializers + if (orig_bias->node()->kind() != kConstant && + orig_bias->node()->kind() != kParam) { + return false; + } + // check if conv is only used by Add + if (orig_conv->uses().size() > 1) { + return false; + } + auto conv_shape = orig_conv->sizes(); + auto bias_shape = orig_bias->sizes(); + auto weight_shape = orig_conv->node()->inputs()[1]->sizes(); + int64_t M = -1; + int64_t rank = -1; + // try to get feature M and rank from conv_shape + if (conv_shape.size() > 1 && conv_shape[1].is_int) { + M = conv_shape[1].dim; + rank = conv_shape.size(); + } + // try to get feature M and rank from weight_shape + if (weight_shape.size() > 0 && weight_shape[0].is_int) { + ONNX_ASSERT(M == -1 || M == weight_shape[0].dim); + M = weight_shape[0].dim; + ONNX_ASSERT( + rank == -1 || rank == static_cast(weight_shape.size())); + rank = weight_shape.size(); + } + int64_t num_el = 1; + for (int i = 0; i < static_cast(bias_shape.size()); ++i) { + if (bias_shape[i].is_int) { + num_el *= bias_shape[i].dim; + } else { + num_el = -1; + return false; + } + } + if (M == -1 || num_el == -1) { + // No enough information, bail out + return false; + } + if (rank < static_cast(bias_shape.size())) { + return false; + } + if (num_el == 1) { + if (orig_bias->node()->kind() != kParam && + orig_conv->node()->isBefore(orig_bias->node())) { + orig_bias->node()->moveBefore(orig_conv->node()); + } + Value* conv_3rd_input = orig_bias; + if (bias_shape.size() > 1) { + Node* squeeze = graph.create(kSqueeze, 1); + std::vector axes(bias_shape.size() - 1); + std::iota(axes.begin(), axes.end(), 0); + squeeze->is_(kaxes, std::move(axes)); + squeeze->addInput(conv_3rd_input); + conv_3rd_input = squeeze->output(); + squeeze->insertBefore(orig_conv->node()); + } + if (M > 1) { + Node* constant = graph.create(kConstant, 1); + Tensor t; + t.sizes().push_back(static_cast(1)); + t.int64s().push_back(M); + t.elem_type() = TensorProto_DataType_INT64; + Symbol sym = Symbol("value"); + constant->t_(sym, t); + std::vector s = {1}; + constant->output()->setSizes(s); + constant->output()->setElemType(TensorProto_DataType_INT64); + constant->insertBefore(orig_conv->node()); + Node* tile = graph.create(kTile, 1); + tile->addInput(conv_3rd_input); + tile->addInput(constant->output()); + conv_3rd_input = tile->output(); + tile->insertBefore(orig_conv->node()); + } + orig_conv->node()->addInput(conv_3rd_input); + } else if (rank > static_cast(bias_shape.size()) + 1) { + return false; + } else if ( + num_el == M && + bias_shape[1 + bias_shape.size() - static_cast(rank)].dim == + M) { + ONNX_ASSERT(bias_shape.size() > 1); + if (orig_bias->node()->kind() != kParam && + orig_conv->node()->isBefore(orig_bias->node())) { + orig_bias->node()->moveBefore(orig_conv->node()); + } + Node* squeeze = graph.create(kSqueeze, 1); + std::vector axes(bias_shape.size()); + std::iota(axes.begin(), axes.end(), static_cast(0)); + axes.erase( + axes.begin() + (1 + bias_shape.size() - static_cast(rank))); + squeeze->is_(kaxes, std::move(axes)); + squeeze->addInput(orig_bias); + squeeze->insertBefore(orig_conv->node()); + orig_conv->node()->addInput(squeeze->output()); + } else { + return false; + } + if (orig_conv->sizes().size() == 0 && n->output()->sizes().size() > 0) { + orig_conv->setSizes(n->output()->sizes()); + } + if (n->output()->elemType() != TensorProto_DataType_UNDEFINED) { + orig_conv->setElemType(n->output()->elemType()); + } + n->replaceAllUsesWith(orig_conv->node()); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_bn_into_conv.h b/onnx_opt/passes/fuse_bn_into_conv.h new file mode 100644 index 000000000..e4fdddca1 --- /dev/null +++ b/onnx_opt/passes/fuse_bn_into_conv.h @@ -0,0 +1,190 @@ + +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// conv = Conv() +// bn = BatchNormalization() +// +// After: +// bn is deleted +// new inputs/initializers to conv are added to graph +// any no longer used inputs/initializers are erased from graph +// +// this pass can handle the case satisfy all following conditions: +// condition 1: Run in testing mode +// condition 2: Inputs 1 - 4 of bn are all initializer_size +// condition 3: Output of initial conv has no other uses +// condition 3: Currently works for only DOUBLE, FLOAT32 tensor types +// +// Formula for transformation +// $$ X_{bn} = \frac{s(X - m)}{\sqrt{\sigma + \epsilon}} + b_{bn}$$ +// $$ X_{conv} = X * W + b_{conv} $$ +// thus, substituting $X$ with $X_{conv}$ in the BN equation we get: +// $$X_{bn} = X * \frac{sW}{\sqrt{\sigma + \epsilon}} + \frac{s(b_{conv} - +// m)}{\sqrt{\sigma + \epsilon}} + b_{bn}$$ or +// $$ W' = W\frac{s}{\sqrt{\sigma + \epsilon}}$$ +// $$ b' = (b_{conv} - m)\frac{s}{\sqrt{\sigma + \epsilon}} + b_{bn}$$ + +#include "onnx/common/assertions.h" +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { +// TODO: Currently broken for complex values and float16 +struct FuseBNIntoConv final : public PredicateBasedPass { + explicit FuseBNIntoConv() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "fuse_bn_into_conv"; + } + + void replace_inputs(Tensor& W, Tensor& b, Node* conv, Graph& graph) { + Value* new_W_value = graph.addInitializerAndInput(W); + Value* old_W_value = conv->inputs()[1]; + conv->replaceInput(1, new_W_value); + if (old_W_value->uses().size() == 0) { + graph.eraseInitializerAndInput(old_W_value); + } + + if (conv->inputs().size() == 3) { + Value* new_b_value = graph.addInitializerAndInput(b); + Value* old_b_value = conv->inputs()[2]; + conv->replaceInput(2, new_b_value); + if (old_b_value->uses().size() == 0) { + graph.eraseInitializerAndInput(old_b_value); + } + } else { + Value* new_b_value = graph.addInitializerAndInput(b); + conv->addInput(new_b_value); + } + } + + bool modify_conv(Node* conv, Node* bn, Graph& graph) { + const auto& bn_inputs = bn->inputs(); + const auto& conv_inputs = conv->inputs(); + auto end_iter = graph.initializers().end(); + auto s_iter = graph.getInitializer(bn_inputs[1]->uniqueName()); + auto bbn_iter = graph.getInitializer(bn_inputs[2]->uniqueName()); + auto m_iter = graph.getInitializer(bn_inputs[3]->uniqueName()); + auto var_iter = graph.getInitializer(bn_inputs[4]->uniqueName()); + auto W_iter = graph.getInitializer(conv_inputs[1]->uniqueName()); + if (s_iter == end_iter || bbn_iter == end_iter || m_iter == end_iter || + var_iter == end_iter || W_iter == end_iter) { + return false; + } + + ONNX_ASSERT(s_iter->sizes().size() == 1); + ONNX_ASSERT( + bbn_iter->sizes().size() == 1 && + bbn_iter->sizes()[0] == s_iter->sizes()[0]); + ONNX_ASSERT( + m_iter->sizes().size() == 1 && + m_iter->sizes()[0] == s_iter->sizes()[0]); + ONNX_ASSERT( + var_iter->sizes().size() == 1 && + var_iter->sizes()[0] == s_iter->sizes()[0]); + ONNX_ASSERT( + W_iter->sizes().size() > 2 && W_iter->sizes()[0] == s_iter->sizes()[0]); + ONNX_ASSERT( + s_iter->elem_type() == bbn_iter->elem_type() && + s_iter->elem_type() == m_iter->elem_type() && + s_iter->elem_type() == var_iter->elem_type() && + s_iter->elem_type() == W_iter->elem_type()); + if (s_iter->elem_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT && + s_iter->elem_type() != ONNX_NAMESPACE::TensorProto_DataType_DOUBLE) { + return false; + } + + Tensor bc; + if (conv_inputs.size() == 3) { + auto bc_iter = graph.getInitializer(conv_inputs[2]->uniqueName()); + if (bc_iter == end_iter) { + return false; + } + bc = *bc_iter; + ONNX_ASSERT( + bc.sizes().size() == 1 && bc.sizes()[0] == s_iter->sizes()[0]); + } + + Tensor s = *s_iter; + const Tensor& bbn = *bbn_iter; + const Tensor& m = *m_iter; + Tensor var = *var_iter; + Tensor W = *W_iter; + float epsilon = bn->hasAttribute(kepsilon) ? (float)bn->f(kepsilon) : 1e-5f; + Tensor eps; + +#define DO_COMPUTATION(TENSOR_TYPE, vec) \ + eps.sizes().push_back(s.sizes()[0]); \ + eps.elem_type() = ONNX_NAMESPACE::TensorProto_DataType_##TENSOR_TYPE; \ + for (int64_t i = 0; i < eps.sizes()[0]; ++i) { \ + eps.vec().push_back(epsilon); \ + } \ + if (conv_inputs.size() != 3) { \ + bc.sizes().push_back(s.sizes()[0]); \ + bc.elem_type() = ONNX_NAMESPACE::TensorProto_DataType_##TENSOR_TYPE; \ + for (int64_t i = 0; i < eps.sizes()[0]; ++i) { \ + bc.vec().push_back(0.f); \ + } \ + } \ + var.add(eps); \ + var.sqrt(); \ + s.divide(var); \ + W.scale_by_first_dim(s); \ + bc.subtract(m); \ + bc.multiply(s); \ + bc.add(bbn); + + switch (s.elem_type()) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { + DO_COMPUTATION(FLOAT, floats) + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: { + DO_COMPUTATION(DOUBLE, doubles) + break; + } + default: + return false; + } +#undef DO_COMPUTATION + replace_inputs(W, bc, conv, graph); + return true; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kBatchNormalization && + node->inputs()[0]->node()->kind() == kConv; + } + bool runTransform(Node* n, Graph& graph, NodeDestroyType& destroy_current) + override { + Node* bn = n; + Node* conv = n->inputs()[0]->node(); + auto origInput = bn->inputs()[0]; + if (origInput->uses().size() > 1 || bn->outputs().size() > 1 || + !modify_conv(conv, bn, graph)) { + destroy_current = NodeDestroyType::DestroyZero; + return false; + } + for (int i = 4; i >= 1; --i) { + if (bn->inputs()[i]->uses().size() == 1) { + auto input = bn->inputs()[i]; + bn->removeInput(i); + graph.eraseInitializerAndInput(input); + } + } + bn->output()->replaceAllUsesWith(origInput); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_consecutive_concats.h b/onnx_opt/passes/fuse_consecutive_concats.h new file mode 100644 index 000000000..6b18413a7 --- /dev/null +++ b/onnx_opt/passes/fuse_consecutive_concats.h @@ -0,0 +1,76 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseConsecutiveConcats final : public PredicateBasedPass { + explicit FuseConsecutiveConcats() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Partial, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "fuse_consecutive_concats"; + } + + void insertInput(Node* node, size_t i, Value* value) { + const auto input_size = node->inputs().size(); + if (i == input_size) { + node->addInput(value); + } else { + for (size_t j = input_size - 1; j >= i; j--) { + Value* cur_input = node->input(j); + if (j == input_size - 1) { + node->addInput(cur_input); + } else { + node->replaceInput(j + 1, cur_input); + } + } + node->replaceInput(i, value); + } + } + + bool patternMatchPredicate(Node* node) override { + // we don't check if our concat node has inputs which are also concat nodes + // because this requires a for loop through the inputs. If it turns out + // there is then we still have to do a for loop in the runTransform portion + // of the code. In order not to waste a loop we don't check the real pattern + // match condition. + return node->kind() == kConcat && node->hasAttribute(kaxis); + } + bool runTransform(Node* concat_node, Graph&, NodeDestroyType& destroy_current) + override { + destroy_current = NodeDestroyType::DestroyZero; + bool transform_ran = false; + for (size_t i = 0; i < concat_node->inputs().size(); i++) { + Value* cur_input_value = concat_node->inputs()[i]; + Node* cur_input_node = cur_input_value->node(); + if (cur_input_node->kind() == kConcat && + cur_input_value->uses().size() == 1 && + cur_input_node->hasAttribute(kaxis) && + cur_input_node->i(kaxis) == concat_node->i(kaxis)) { + transform_ran = true; + // Inserts n inputs of cur_input_node at index i+1~i+1+(n-1), + // and remove cur_input_node at index i. + // As a result, cur_input_node is replaced by its inputs inplace, + // instead of always appending its inputs at the end. + for (size_t j = 0; j < cur_input_node->inputs().size(); j++) { + Value* value = cur_input_node->input(j); + insertInput(concat_node, i + 1 + j, value); + } + concat_node->removeInput(i); + cur_input_node->destroy(); + } + } + return transform_ran; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_consecutive_log_softmax.h b/onnx_opt/passes/fuse_consecutive_log_softmax.h new file mode 100644 index 000000000..474c0001b --- /dev/null +++ b/onnx_opt/passes/fuse_consecutive_log_softmax.h @@ -0,0 +1,49 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseConsecutiveLogSoftmax final : public PredicateBasedPass { + explicit FuseConsecutiveLogSoftmax() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "fuse_consecutive_log_softmax"; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kLog && node->input()->node()->kind() == kSoftmax && + node->input()->uses().size() == 1; + } + bool runTransform( + Node* log_node, + Graph& graph, + NodeDestroyType& destroy_current) override { + Value* log_node_output = log_node->output(); + Node* softmax_node = log_node->inputs()[0]->node(); + Node* log_softmax_node = graph.create(kLogSoftmax, 1); + + // log_softmax_node construction + log_softmax_node->i_(kaxis, softmax_node->i(kaxis)); + log_softmax_node->addInput(softmax_node->input()); + log_softmax_node->insertBefore(softmax_node); + log_softmax_node->output()->setSizes(log_node_output->sizes()); + log_softmax_node->output()->setElemType(log_node_output->elemType()); + + log_node->replaceAllUsesWith(log_softmax_node); + log_node->removeAllInputs(); + destroy_current = NodeDestroyType::DestroyTwo; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h b/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h new file mode 100644 index 000000000..ae6731755 --- /dev/null +++ b/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h @@ -0,0 +1,65 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +const std::unordered_set reduction_operators{kReduceL1, + kReduceL2, + kReduceLogSum, + kReduceLogSumExp, + kReduceMax, + kReduceMean, + kReduceMin, + kReduceProd, + kReduceSum, + kReduceSumSquare}; + +struct FuseConsecutiveReduceUnsqueeze final : public PredicateBasedPass { + explicit FuseConsecutiveReduceUnsqueeze() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "fuse_consecutive_reduce_unsqueeze"; + } + bool patternMatchPredicate(Node* node) override { + // check that the current node is of type Unsqueeze and has defined axes + bool cur_node_check = + node->kind() == kUnsqueeze && node->hasAttribute(kaxes); + if (cur_node_check) { + Node* prev_node = node->input()->node(); + // check that the previous node a reduction operator and has defined + // axes/keepdims + bool reduction_node_check = reduction_operators.find(prev_node->kind()) != + reduction_operators.end() && + prev_node->hasAttribute(kaxes) && prev_node->hasAttribute(kkeepdims); + if (reduction_node_check) { + // insure that keepdims is set to false currently + return prev_node->i(kkeepdims) == 0 && node->is(kaxes) == prev_node->is(kaxes); + } + } + return false; + } + bool runTransform(Node* node, Graph&, NodeDestroyType& destroy_current) + override { + Node* reduction_op = node->input()->node(); + // set keepdims flag to be true + reduction_op->i_(kkeepdims, 1); + // remove unnecessary unsqueeze + reduction_op->output()->setSizes(node->output()->sizes()); + reduction_op->output()->setElemType(node->output()->elemType()); + node->output()->replaceAllUsesWith(node->input()); + destroy_current = NodeDestroyType::DestroyOne; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_consecutive_squeezes.h b/onnx_opt/passes/fuse_consecutive_squeezes.h new file mode 100644 index 000000000..550cadd21 --- /dev/null +++ b/onnx_opt/passes/fuse_consecutive_squeezes.h @@ -0,0 +1,80 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// X is a tensor with shape=[1, 1, 2, 3, 1, 5, 1] +// Y = Squeeze(X, axes=[1, 4]) -> shape=[1, 2, 3, 5, 1] +// Z = Squeeze(Y, axes=[0, 4]) -> shape=[2, 3, 5] +// After: +// Z = Squeeze(X, axes=[0, 1, 4, 6]) +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseConsecutiveSqueezes final : public PredicateBasedPass { + explicit FuseConsecutiveSqueezes() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "fuse_consecutive_squeezes"; + } + // returns a vector `ret` such that squeeze by `ret` is equivalent + // to squeeze by `axes_1` and then by `axes_2` + std::vector compose_squeezes( + const std::vector& axes_1, + const std::vector& axes_2) { + std::vector ret; + ret.reserve(axes_1.size() + axes_2.size()); + + std::vector sorted_axes_1(axes_1.begin(), axes_1.end()); + std::sort(sorted_axes_1.begin(), sorted_axes_1.end()); + std::copy( + sorted_axes_1.begin(), sorted_axes_1.end(), std::back_inserter(ret)); + + for (int64_t i : axes_2) { + for (auto iter = sorted_axes_1.begin(); iter != sorted_axes_1.end(); + ++iter) { + // if current axis 1 - prev_num is bigger than axis 2 + // put axis 2 + prev_num as new axis + int64_t prev_num = std::distance(sorted_axes_1.begin(), iter); + if (*iter - prev_num > i) { + ret.push_back(i + prev_num); + break; + } + // if no current axis 1 - prev_num is bigger than axis 2 + // put axis 2 + prev_num + 1 as new axis + if (std::next(iter) == sorted_axes_1.end()) { + ret.push_back(i + prev_num + 1); + } + } + } + std::sort(ret.begin(), ret.end()); + return ret; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kSqueeze && + node->input()->node()->kind() == kSqueeze; + } + bool runTransform(Node* n, Graph&, NodeDestroyType& destroy_current) + override { + auto orig_input = n->input(); + n->is_( + kaxes, compose_squeezes(orig_input->node()->is(kaxes), n->is(kaxes))); + n->replaceInput(0, orig_input->node()->input()); + if (orig_input->uses().size() == 0) { + orig_input->node()->destroy(); + } + destroy_current = NodeDestroyType::DestroyZero; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_consecutive_transposes.h b/onnx_opt/passes/fuse_consecutive_transposes.h new file mode 100644 index 000000000..ef2fb664a --- /dev/null +++ b/onnx_opt/passes/fuse_consecutive_transposes.h @@ -0,0 +1,74 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseConsecutiveTransposes final : public PredicateBasedPass { + explicit FuseConsecutiveTransposes() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + + std::string getPassName() const override { + return "fuse_consecutive_transposes"; + } + + // returns a vector `ret` such that transposing by `ret` is equivalent + // to transposing by `t1` and then by `t2` + std::vector compose_transposes( + const std::vector& t1, + const std::vector& t2) { + ONNX_ASSERT(t1.size() == t2.size()); + std::vector ret; + ret.reserve(t1.size()); + for (size_t i = 0; i < t1.size(); i++) { + ONNX_ASSERT(t2[i] < static_cast(t1.size())); + ONNX_ASSERT( + t1[static_cast(t2[i])] < static_cast(t1.size())); + ret.push_back(t1[static_cast(t2[i])]); + } + return ret; + } + + bool patternMatchPredicate(Node* node) override { + return node->kind() == kTranspose && + node->input()->node()->kind() == kTranspose; + } + + bool runTransform(Node* n, Graph&, NodeDestroyType& destroy_current) + override { + auto origInput = n->input(); + if (!n->hasAttribute(kperm) && !origInput->node()->hasAttribute(kperm)) { + // One special case (two consecutive transposes with no perm, + // since we do not have the shape information here, we have + // to eliminate two transpose together. + if (n->output()->has_sizes()) { + origInput->node()->input()->setSizes(n->output()->sizes()); + } + n->replaceAllUsesWith(origInput->node()->input()->node()); + destroy_current = NodeDestroyType::DestroyTwo; + return true; + } + if (!n->hasAttribute(kperm) || !origInput->node()->hasAttribute(kperm)) { + destroy_current = NodeDestroyType::DestroyZero; + return false; + } + n->is_( + kperm, compose_transposes(origInput->node()->is(kperm), n->is(kperm))); + n->replaceInput(0, origInput->node()->input()); + if (origInput->uses().size() == 0) { + origInput->node()->destroy(); + } + destroy_current = NodeDestroyType::DestroyZero; + return false; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h b/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h new file mode 100644 index 000000000..2ddef7cab --- /dev/null +++ b/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h @@ -0,0 +1,107 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// Z = MatMul(X, Y) +// A = Z + Bias +// After: +// A = Gemm(X, Y, Bias) +// +// the pass can handle the case when: +// case 1: Bias is 1D tensor and Bias.dim[0] == Z.dim[1] +// case 2: Bias is 2D tensor and Bias.dim[0] == Z.dim[0] or 1 +// and Bias.dim[1] = Z.dim[1] + +#include + +#include "onnx/common/assertions.h" +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseMatMulAddBiasIntoGemm final : public PredicateBasedPass { + explicit FuseMatMulAddBiasIntoGemm() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + std::string getPassName() const override { + return "fuse_matmul_add_bias_into_gemm"; + } + bool patternMatchPredicate(Node* node) override { + return node->kind() == kAdd && + node->inputs()[0]->node()->kind() == kMatMul; + } + bool runTransform(Node* n, Graph& graph, NodeDestroyType& destroy_current) + override { + // due to current broadcasting's constraint, MatMul has to be the first + // operand + destroy_current = NodeDestroyType::DestroyZero; + auto orig_matmul = n->inputs()[0]; + auto orig_bias = n->inputs()[1]; + // check if bias is Const or in graph's initializers + if (orig_bias->node()->kind() != kConstant && + orig_bias->node()->kind() != kParam) { + return false; + } + // check if MatMul is only used by Add + if (orig_matmul->uses().size() > 1) { + return false; + } + auto x_shape = orig_matmul->node()->inputs()[0]->sizes(); + auto y_shape = orig_matmul->node()->inputs()[1]->sizes(); + int64_t z_N = -1; + int64_t z_M = -1; + // try to get feature N from x_shape + if (static_cast(x_shape.size()) == 2 && x_shape[0].is_int) { + z_N = x_shape[0].dim; + } else { + return false; + } + // try to get feature M from y_shape + if (static_cast(y_shape.size()) == 2 && y_shape[1].is_int) { + z_M = y_shape[1].dim; + } else { + return false; + } + // check if bias_shape is compatible + auto bias_shape = orig_bias->sizes(); + auto bias_dim = static_cast(bias_shape.size()); + int64_t bias_N = -1; + int64_t bias_M = -1; + if (bias_dim == 1 && bias_shape[0].is_int) { + bias_N = 1; + bias_M = bias_shape[0].dim; + } else if (bias_dim == 2 && bias_shape[0].is_int && bias_shape[1].is_int) { + bias_N = bias_shape[0].dim; + bias_M = bias_shape[1].dim; + } else { + return false; + } + if ((bias_N != z_N && bias_N != 1) || bias_M != z_M) { + return false; + } + // proceed to fuse MatMul and Add into Gemm + Node* gemm = graph.create(kGemm, + orig_matmul->node()->inputs(), + n->outputs().size()); + gemm->addInput(n->inputs()[1]); + for (int i = 0; i < static_cast(gemm->outputs().size()); ++i) { + gemm->outputs()[i]->copyMetadata(n->outputs()[i]); + } + gemm->f_(kalpha, 1.0); + gemm->f_(kbeta, 1.0); + gemm->i_(ktransA, 0); + gemm->i_(ktransB, 0); + gemm->insertBefore(orig_matmul->node()); + n->replaceAllUsesWith(gemm); + destroy_current = NodeDestroyType::DestroyTwo; + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_pad_into_conv.h b/onnx_opt/passes/fuse_pad_into_conv.h new file mode 100644 index 000000000..4a66aa6e5 --- /dev/null +++ b/onnx_opt/passes/fuse_pad_into_conv.h @@ -0,0 +1,173 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +// Before: +// P = Pad(X) - opset 10 and below (or) Pad(X, Pads, [Constant_value]) - opset 11 and +// above Z = Conv(P, Y) +// After: +// Z = Conv(X, Y) with "pads" attribute set +// +// the pass handles the case when Pad is zero-padding the input +// (i.e. mode=constant and Constant_value=0) + +#include + +#include "onnx/defs/tensor_util.h" +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FusePadIntoConv final : public PredicateBasedPass { + explicit FusePadIntoConv() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + std::string getPassName() const override { + return "fuse_pad_into_conv"; + } + bool patternMatchPredicate(Node* node) override { + return node->kind() == kConv && node->inputs()[0]->node()->kind() == kPad; + } + bool runTransform(Node* n, Graph& graph, NodeDestroyType& destroy_current) + override { + destroy_current = NodeDestroyType::DestroyZero; + + // check if Pad is only used by Conv + if (n->inputs()[0]->uses().size() > 1) { + return false; + } + + Node* conv = n; + Node* pad = n->inputs()[0]->node(); + + // Process 'pads' data + std::vector pads; + if (pad->hasAttribute(kpads)) { + // opset 10 and below + pads = pad->is(kpads); + } else { + // opset 11 and above - first check if 'pad' node has 'pads' input + // initialized + const auto& pads_name = pad->inputs()[1]->uniqueName(); + const auto pads_initializer = graph.getInitializer(pads_name); + // 'pad' node has the 'pads' input which has not been initialized - + // can't proceed with fusing + if (pads_initializer == graph.initializers().end()) { + return false; + } + + // make sure the type of 'pads' is INT64 + if (pads_initializer->elem_type() != TensorProto::INT64) { + return false; + } + + // parse 'pads' data from the initialized input + pads = ParseData(&*pads_initializer); + } + + // Process 'mode' + std::string pad_mode; + if (pad->hasAttribute(kmode)) { + pad_mode = pad->s(kmode); + } else { + pad_mode = "constant"; + } + + // cannot fuse if the pad mode is not "Constant" + if (pad_mode != "constant") { + return false; + } + + // Process 'Constant_value' + // opset 10 and below + if (pad->hasAttribute(kvalue) && static_cast(pad->f(kvalue)) != 0.0) { + return false; + } else if (pad->inputs().size() == 3) { + // opset 11 and above - check if the 'pad' node has the optional 'Constant_value' + // input check if it has data initialized + const auto& value_name = pad->inputs()[2]->uniqueName(); + const auto value_initializer = graph.getInitializer(value_name); + + // 'pad' node has the 'Constant_value' input which has not been initialized - + // can't proceed with fusing + if (value_initializer == graph.initializers().end()) { + return false; + } + + // parse 'Constant_value' data from the initialized input and stop optimizer if the + // Constant_value is non-zero + switch (value_initializer->elem_type()) { + case TensorProto::FLOAT: + if (ParseData(&*value_initializer)[0] != 0) + return false; // cannot fuse Pad into Conv + else + break; + + case TensorProto::DOUBLE: + if (ParseData(&*value_initializer)[0] != 0) + return false; // cannot fuse Pad into Conv + else + break; + + case TensorProto::INT32: + if (ParseData(&*value_initializer)[0] != 0) + return false; // cannot fuse Pad into Conv + else + break; + + case TensorProto::INT64: + if (ParseData(&*value_initializer)[0] != 0) + return false; // cannot fuse Pad into Conv + else + break; + + // TODO: Support more uncommon but valid types for Pad op (int8, uint8, int16, uint16, etc.) + + default: + return false; // Either type of Constant_value is invalid or not yet supported by data parsing logic. + // Since we canot validate the data present in 'Constant_value', we exit the optimizer + } + } + + // check if some values in 'pads' prevents us from fusing it into 'Conv' node + int pads_size = static_cast(pads.size()); + + // check if padding is applied only on feature dims + if (pads[0] != 0 || pads[1] != 0 || pads[pads_size / 2] != 0 || + pads[pads_size / 2 + 1] != 0) { + return false; + } + + // check if padding is only positive + if (std::any_of(pads.begin(), pads.end(), [](int64_t local_value) { + return local_value < 0; + })) { + return false; + } + + int conv_pads_size = pads_size - 4; + std::vector conv_pads(conv_pads_size, 0); + // Fuse into existing padding, if available + if (conv->hasAttribute(kpads)) { + conv_pads = conv->is(kpads); + } + + for (int i = 2, j = 0; i < pads_size / 2; ++i, ++j) { + conv_pads[j] += pads[i]; + conv_pads[conv_pads_size / 2 + j] += pads[pads_size / 2 + i]; + } + + conv->is_(kpads, std::move(conv_pads)); + conv->replaceInput(0, pad->inputs()[0]); + pad->destroy(); + + return true; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/fuse_transpose_into_gemm.h b/onnx_opt/passes/fuse_transpose_into_gemm.h new file mode 100644 index 000000000..b7ada112c --- /dev/null +++ b/onnx_opt/passes/fuse_transpose_into_gemm.h @@ -0,0 +1,46 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct FuseTransposeIntoGemm final : public PredicateBasedPass { + explicit FuseTransposeIntoGemm() + : PredicateBasedPass( + PassType::Fuse, + PassEfficiency::Complete, + PassOptimizationType::Compute) {} + std::string getPassName() const override { + return "fuse_transpose_into_gemm"; + } + bool patternMatchPredicate(Node* node) override { + return node->kind() == kGemm; + } + bool runTransform(Node* n, Graph&, NodeDestroyType& destroy_current) + override { + const std::vector simple_trans_perm({1, 0}); + destroy_current = NodeDestroyType::DestroyZero; + bool ret_val = false; + for (size_t i : {0, 1}) { + auto inp = n->inputs()[i]; + auto trans = i == 0 ? ktransA : ktransB; + if (inp->node()->kind() == kTranspose && + inp->node()->is(kperm) == simple_trans_perm) { + n->replaceInput(i, inp->node()->input()); + n->i_(trans, n->hasAttribute(trans) ? !n->i(trans) : 1); + if (inp->uses().size() == 0) { + inp->node()->destroy(); + ret_val = true; + } + } + } + return ret_val; + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/lift_lexical_references.h b/onnx_opt/passes/lift_lexical_references.h new file mode 100644 index 000000000..d0a5eeec9 --- /dev/null +++ b/onnx_opt/passes/lift_lexical_references.h @@ -0,0 +1,231 @@ +#pragma once + +#include +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +// Lift lexically-scoped references within control operators to be inputs of the +// ops themselves. This transformation yields a graph that does not conform to +// the ONNX spec. +// +// The purpose of this pass is to expose the data dependencies within control +// blocks for frameworks that use those dependencies to schedule parallel +// execution. e.g. caffe2 graph execution. +// +// Example: +// ******************************** Before ************************************* +// graph test (%X[FLOAT, 5]) { +// %Y = Identity(%X) +// %trip_count = Constant[value = ]() +// %condition = Constant[value = ]() +// %Y2, %Y3 = Loop[body = ](%trip_count, %condition, %) +// return %Y, %Y2 +// } +// +// graph body_graph (%i[INT32, scalar], %cond[BOOL, scalar]) { +// %_Y2 = Identity(%X) +// %_Y3 = Identity(%Y) +// return %cond, %_Y2, %_Y3 +// } +// +// ******************************** After ************************************** +// graph test (%X[FLOAT, 5]) { +// %Y = Identity(%X) +// %trip_count = Constant[value = ]() +// %condition = Constant[value = ]() +// %Y2, %Y3 = Loop[__control_inputs = ['X', 'Y'], body = ](%trip_count, %condition, %) +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// return %Y, %Y2 +// } +// +// graph body_graph (%i[INT32, scalar], %cond[BOOL, scalar]) { +// %_Y2 = Identity(%X) +// %_Y3 = Identity(%Y) +// return %cond, %_Y2, %_Y3 +// } +// +// ******************************** Continue Docs******************************* +// +// The algorithm is roughly: +// symbol_table_stack = empty stack of symbol tables +// +// liftreferences(graph) +// -> a set of unresolved reference strings: +// unresolved_references = {} +// +// symbol_table_stack.push(new symbol table containing inputs for this +// sub-graph) for each node in the graph: +// for input in node.inputs: +// if input is not in this frame: +// unresolved_references.insert(input) +// if node is a control flow operator: +// for each sub-graph g: +// for each output in g's body: +// if output is defined in current scope: +// control_inputs.insert(output) +// refs = liftreferences(g) +// for each ref in refs: +// if ref is in this frame or any parent frame (control_inputs): +// control_inputs.insert(ref) +// else: +// unresolved_references.insert(ref) +// set the control inputs attribute to the node +// for output in node.outputs: +// symbol_table_stack.top()[output] = Value* +// return unresolved_references +struct LiftLexicalReferences : public FullGraphBasedPass { + explicit LiftLexicalReferences() + : FullGraphBasedPass( + PassType::Separate, + PassEfficiency::Complete, + PassOptimizationType::Memory) {} + + std::string getPassName() const override { + return "lift_lexical_references"; + } + PassAnalysisType getPassAnalysisType() const override { + return PassAnalysisType::Empty; + } + + using ValueTable = std::unordered_map; + + // Environment stack, please to store value table and + // controlled inputs + struct Environment { + Environment(std::shared_ptr next = nullptr) : next(next) {} + + std::shared_ptr next; + + Value* findInThisFrame(const std::string& name) { + auto it = value_table.find(name); + if (it != value_table.end()) { + return it->second; + } + return nullptr; + } + + Value* findInParentFrame(const std::string& name) { + return next ? next->findInAnyFrame(name) : nullptr; + } + + Value* findInAnyFrame(const std::string& name) { + for (auto runner = this; runner; runner = runner->next.get()) { + if (auto r = runner->findInThisFrame(name)) { + return r; + } + } + return nullptr; + } + + void setVar(const std::string& name, Value* value) { + value_table[name] = value; + } + + private: + ValueTable value_table; + }; + + std::shared_ptr environment_stack; + + // environment stack helper + void pushFrame() { + environment_stack = std::make_shared(environment_stack); + } + + std::shared_ptr popFrame() { + auto old_frame = environment_stack; + environment_stack = environment_stack->next; + return old_frame; + } + + std::set liftReferences(Graph* g) { + std::set unresolved_references; + pushFrame(); + for (auto& inp : g->inputs()) { + environment_stack->setVar(inp->uniqueName(), inp); + } + + for (auto* n : g->nodes()) { + // Skip optional input/captured value node. + if (n->kind() == ONNX_NAMESPACE::kUndefined || + n->kind() == ONNX_NAMESPACE::kCaptured) { + continue; + } + for (auto* inp : n->inputs()) { + // Empty string is 0-input variadic argument. Skip that one. + if (!inp->uniqueName().empty() && + !environment_stack->findInThisFrame(inp->uniqueName())) { + unresolved_references.insert(inp->uniqueName()); + } + } + + std::set local_unresolved; + + // if a graph body output has already already been emitted outside of the + // subgraph scope, then it must be added as an input to the subgraph + auto add_subgraph_outputs = [&](Graph* body_graph) { + for (auto* out : body_graph->outputs()) { + if (environment_stack->findInAnyFrame(out->uniqueName())) { + local_unresolved.insert(out->uniqueName()); + } + } + }; + + if (n->kind() == ONNX_NAMESPACE::kLoop) { + auto* body_graph = n->g(ONNX_NAMESPACE::kbody).get(); + local_unresolved = liftReferences(body_graph); + add_subgraph_outputs(body_graph); + } else if (n->kind() == ONNX_NAMESPACE::kIf) { + auto* then_graph = n->g(ONNX_NAMESPACE::kthen_branch).get(); + add_subgraph_outputs(then_graph); + auto then_unresolved = liftReferences(then_graph); + local_unresolved.insert(then_unresolved.begin(), then_unresolved.end()); + auto* else_graph = n->g(ONNX_NAMESPACE::kelse_branch).get(); + add_subgraph_outputs(else_graph); + auto else_unresolved = liftReferences(else_graph); + local_unresolved.insert(else_unresolved.begin(), else_unresolved.end()); + } + + std::vector control_inputs; + for (auto& unresolved : local_unresolved) { + if (environment_stack->findInAnyFrame(unresolved)) { + control_inputs.push_back(unresolved); + } else { + unresolved_references.insert(unresolved); + } + } + + // Create this attribute so the backend knows how many of these inputs + // are simply there for control dependencies + if (!control_inputs.empty()) { + n->ss_(ONNX_NAMESPACE::k__control_inputs, std::move(control_inputs)); + } + + for (auto* out : n->outputs()) { + environment_stack->setVar(out->uniqueName(), out); + } + } + + popFrame(); + return unresolved_references; + } + + std::shared_ptr runPass(Graph& graph) override { + auto unresolved = liftReferences(&graph); + + if (unresolved.size()) { + std::string errmsg = "Unresolved value references: "; + for (auto& ref : unresolved) { + errmsg += ref + ","; + } + throw std::runtime_error(errmsg); + } + return std::shared_ptr(new PostPassAnalysis()); + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/nop.h b/onnx_opt/passes/nop.h new file mode 100644 index 000000000..a51321e02 --- /dev/null +++ b/onnx_opt/passes/nop.h @@ -0,0 +1,26 @@ +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +struct NopEmptyPass final : public FullGraphBasedPass { + explicit NopEmptyPass() + : FullGraphBasedPass( + PassType::Nop, + PassEfficiency::Complete, + PassOptimizationType::None) {} + + std::string getPassName() const override { + return "nop"; + } + PassAnalysisType getPassAnalysisType() const override { + return PassAnalysisType::Empty; + } + std::shared_ptr runPass(Graph&) override { + return std::make_shared(); + } +}; +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/onnx_opt/passes/split.h b/onnx_opt/passes/split.h new file mode 100644 index 000000000..f59a8cc1a --- /dev/null +++ b/onnx_opt/passes/split.h @@ -0,0 +1,228 @@ +// ATTENTION: The code in this file is highly EXPERIMENTAL. +// Adventurous users should note that the APIs will probably change. + +#pragma once + +#include "onnx/optimizer/pass.h" + +namespace ONNX_NAMESPACE { +namespace optimization { + +static constexpr const char* impure_operators[] = { + "RandomNormal", + "RandomNormalLike", + "RandomUniform", + "RandomUniformLike", + "Loop", + "If", + "Scan", +}; + +static bool is_pure_operator(Node* n) { + for (auto x : impure_operators) { + if (n->kind() == Symbol(x)) { + return false; + } + } + return true; +} + +// Split the graph into 'init' and 'predict' nets. This is kind of +// like constant folding, except that rather than actually execute the +// constant computations, we simply split them out into a separate +// graph. Nodes that have any transitive dependency on the +// initializers, or on impure operators, must remain in the predict +// net. All others may be moved to the init net. +// +// This function destructively mutates the graph into either the init +// or the predict net. If you want both, which you probably do, +// arrange to call it twice. +// +// NOTE POTENTIAL BREAKAGE: +// +// The ONNX spec provides no guarantees about "staging", i.e. which +// inputs change on every invocation vs which generally stay the same. +// Here we make the assumption that inputs which have an initializer +// value provided for them vary only between invocations of the init +// net, and are constant across runs of the predict net. +// +static void split_init_and_predict(Graph& graph, bool init, bool predict) { + // The first step is to identify which Values are reachable from + // either of + // - inputs without corresponding initializers + // - impure operators + // Any such Values belong to the predict net. Nodes belong to the + // predict net if they are impure or if any of their inputs do. + + std::unordered_set predict_net_values; + + auto value_belongs_to_predict_net = [&](Value* v) { + return predict_net_values.count(v) > 0; + }; + auto node_belongs_to_predict_net = [&](Node* n) { + return !is_pure_operator(n) || + std::any_of( + n->inputs().begin(), + n->inputs().end(), + value_belongs_to_predict_net); + }; + + { + std::unordered_set initializer_names( + graph.initializer_names().begin(), graph.initializer_names().end()); + + for (Value* v : graph.inputs()) { + if (initializer_names.count(v->uniqueName()) == 0) { + predict_net_values.insert(v); + } + } + } + + for (Node* n : graph.nodes()) { + if (node_belongs_to_predict_net(n)) { + for (Value* v : n->outputs()) { + predict_net_values.insert(v); + } + } + } + + // Any Value which is not itself in the predict net, but which + // is used by a Node which is, becomes an output of the init + // graph and an input of the predict net + std::unordered_set new_interface; + for (Node* n : graph.nodes()) { + if (node_belongs_to_predict_net(n)) { + for (Value* v : n->inputs()) { + if (!value_belongs_to_predict_net(v)) { + new_interface.insert(v); + } + } + } + } + + for (Value* v : graph.outputs()) { + if (!value_belongs_to_predict_net(v)) { + new_interface.insert(v); + } + } + + if (init) { + // Add new outputs corresponding to the boundary between init and + // predict nets, ensuring that we don't duplicate outputs. + for (Value* v : graph.outputs()) { + new_interface.erase(v); + } + for (Value* v : new_interface) { + if (v->node()->kind() == kUndefined) { + continue; + } + graph.registerOutput(v); + } + + // Remove outputs that belong to the predict net. + for (auto i = graph.outputs().size(); i--;) { + if (value_belongs_to_predict_net(graph.outputs()[i])) { + graph.return_node()->removeInput(i); + } + } + + // Delete nodes that belong to the predict net, in reverse + // topological order. + for (auto it = graph.nodes().rbegin(); it != graph.nodes().rend(); it++) { + if (node_belongs_to_predict_net(*it)) { + it.destroyCurrent(); + } + } + + // Remove inputs that belong to the predict net. + for (auto i = graph.inputs().size(); i--;) { + if (value_belongs_to_predict_net(graph.inputs()[i])) { + graph.eraseInput(i); + } + } + } else if (predict) { + // When creating the predict net, 'undefined' nodes will + // naturally go into the init net. We need to have a place to + // copy the ones we want to keep in the predict net. + auto* optionalInputDummyNode = graph.create(kUndefined, 1); + graph.appendNode(optionalInputDummyNode); + optionalInputDummyNode->outputs()[0]->setUniqueName(""); + + // Add new inputs, ensuring that we don't introduce duplicates. + // Also cut the boundary between init and predict net by replacing + // the Values along the boundary with replaceAllUsesWith. + for (Value* v : graph.inputs()) { + new_interface.erase(v); + } + for (Value* v : new_interface) { + if (v->node()->kind() == kUndefined) { + v->replaceAllUsesWith(optionalInputDummyNode->outputs()[0]); + } else { + Value* newv = graph.addInput()->copyMetadata(v); + v->replaceAllUsesWith(newv); + } + } + + // Delete nodes that aren't in the predict net, in reverse + // topological order. + for (auto it = graph.nodes().rbegin(); it != graph.nodes().rend(); it++) { + if (*it == optionalInputDummyNode) { + continue; + } + if (node_belongs_to_predict_net(*it)) { + continue; + } + it.destroyCurrent(); + } + + // Remove inputs that aren't used by the predict net. + for (auto i = graph.inputs().size(); i--;) { + if (graph.inputs()[i]->uses().empty()) { + graph.eraseInput(i); + } + } + + // Remove all initializers, they are already in the init net. + graph.clearInitializers(); + } +} + +struct SplitInit final : public FullGraphBasedPass { + explicit SplitInit() + : FullGraphBasedPass( + PassType::Separate, + PassEfficiency::Complete, + PassOptimizationType::Memory) {} + + std::string getPassName() const override { + return "split_init"; + } + PassAnalysisType getPassAnalysisType() const override { + return PassAnalysisType::Empty; + } + std::shared_ptr runPass(Graph& graph) override { + split_init_and_predict(graph, true, false); + return std::shared_ptr(new PostPassAnalysis()); + } +}; + +struct SplitPredict final : public FullGraphBasedPass { + explicit SplitPredict() + : FullGraphBasedPass( + PassType::Separate, + PassEfficiency::Complete, + PassOptimizationType::Memory) {} + std::string getPassName() const override { + return "split_predict"; + } + PassAnalysisType getPassAnalysisType() const override { + return PassAnalysisType::Empty; + } + std::shared_ptr runPass(Graph& graph) override { + split_init_and_predict(graph, false, true); + return std::shared_ptr(new PostPassAnalysis()); + } +}; + +} // namespace optimization +} // namespace ONNX_NAMESPACE diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..720b29389 --- /dev/null +++ b/setup.py @@ -0,0 +1,344 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from distutils.spawn import find_executable +from distutils import sysconfig, log +import setuptools +import setuptools.command.build_py +import setuptools.command.develop +import setuptools.command.build_ext + +from collections import namedtuple +from contextlib import contextmanager +import glob +import os +import shlex +import subprocess +import sys +import platform +from textwrap import dedent +import multiprocessing + + +TOP_DIR = os.path.realpath(os.path.dirname(__file__)) +SRC_DIR = os.path.join(TOP_DIR, 'onnx_opt') +CMAKE_BUILD_DIR = os.path.join(TOP_DIR, '.setuptools-cmake-build') + +WINDOWS = (os.name == 'nt') + +CMAKE = find_executable('cmake3') or find_executable('cmake') +MAKE = find_executable('make') + +install_requires = [] +setup_requires = [] +tests_require = [] +extras_require = {} + +################################################################################ +# Global variables for controlling the build variant +################################################################################ + +# Default value is set to TRUE\1 to keep the settings same as the current ones. +# However going forward the recomemded way to is to set this to False\0 +USE_MSVC_STATIC_RUNTIME = bool(os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1') +ONNX_ML = not bool(os.getenv('ONNX_ML') == '0') +ONNX_VERIFY_PROTO3 = bool(os.getenv('ONNX_VERIFY_PROTO3') == '1') +ONNX_NAMESPACE = os.getenv('ONNX_NAMESPACE', 'onnx') +ONNX_BUILD_TESTS = bool(os.getenv('ONNX_BUILD_TESTS') == '1') + +DEBUG = bool(os.getenv('DEBUG')) +COVERAGE = bool(os.getenv('COVERAGE')) + +################################################################################ +# Version +################################################################################ + +try: + git_version = subprocess.check_output(['git', 'rev-parse', 'HEAD'], + cwd=TOP_DIR).decode('ascii').strip() +except (OSError, subprocess.CalledProcessError): + git_version = None + +with open(os.path.join(TOP_DIR, 'VERSION_NUMBER')) as version_file: + VersionInfo = namedtuple('VersionInfo', ['version', 'git_version'])( + version=version_file.read().strip(), + git_version=git_version + ) + +################################################################################ +# Pre Check +################################################################################ + +assert CMAKE, 'Could not find "cmake" executable!' + +################################################################################ +# Utilities +################################################################################ + + +@contextmanager +def cd(path): + if not os.path.isabs(path): + raise RuntimeError('Can only cd to absolute path, got: {}'.format(path)) + orig_path = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(orig_path) + + +################################################################################ +# Customized commands +################################################################################ + + +class ONNXCommand(setuptools.Command): + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + +class create_version(ONNXCommand): + def run(self): + with open(os.path.join(SRC_DIR, 'version.py'), 'w') as f: + f.write(dedent('''\ + # This file is generated by setup.py. DO NOT EDIT! + + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + + version = '{version}' + git_version = '{git_version}' + '''.format(**dict(VersionInfo._asdict())))) + + +class cmake_build(setuptools.Command): + """ + Compiles everything when `python setupmnm.py build` is run using cmake. + + Custom args can be passed to cmake by specifying the `CMAKE_ARGS` + environment variable. + + The number of CPUs used by `make` can be specified by passing `-j` + to `setup.py build`. By default all CPUs are used. + """ + user_options = [ + (str('jobs='), str('j'), str('Specifies the number of jobs to use with make')) + ] + + built = False + + def initialize_options(self): + self.jobs = None + + def finalize_options(self): + if sys.version_info[0] >= 3: + self.set_undefined_options('build', ('parallel', 'jobs')) + if self.jobs is None and os.getenv("MAX_JOBS") is not None: + self.jobs = os.getenv("MAX_JOBS") + self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs) + + def run(self): + if cmake_build.built: + return + cmake_build.built = True + if not os.path.exists(CMAKE_BUILD_DIR): + os.makedirs(CMAKE_BUILD_DIR) + + with cd(CMAKE_BUILD_DIR): + build_type = 'Release' + # configure + cmake_args = [ + CMAKE, + '-DPYTHON_INCLUDE_DIR={}'.format(sysconfig.get_python_inc()), + '-DPYTHON_EXECUTABLE={}'.format(sys.executable), + '-DBUILD_ONNX_PYTHON=ON', + '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON', + '-DONNX_NAMESPACE={}'.format(ONNX_NAMESPACE), + '-DPY_EXT_SUFFIX={}'.format(sysconfig.get_config_var('EXT_SUFFIX') or ''), + ] + if COVERAGE: + cmake_args.append('-DONNX_COVERAGE=ON') + if COVERAGE or DEBUG: + # in order to get accurate coverage information, the + # build needs to turn off optimizations + build_type = 'Debug' + cmake_args.append('-DCMAKE_BUILD_TYPE=%s' % build_type) + if WINDOWS: + cmake_args.extend([ + # we need to link with libpython on windows, so + # passing python version to window in order to + # find python in cmake + '-DPY_VERSION={}'.format('{0}.{1}'.format(*sys.version_info[:2])), + ]) + if USE_MSVC_STATIC_RUNTIME: + cmake_args.append('-DONNX_USE_MSVC_STATIC_RUNTIME=ON') + if platform.architecture()[0] == '64bit': + cmake_args.extend(['-A', 'x64', '-T', 'host=x64']) + else: + cmake_args.extend(['-A', 'Win32', '-T', 'host=x86']) + if ONNX_ML: + cmake_args.append('-DONNX_ML=1') + if ONNX_VERIFY_PROTO3: + cmake_args.append('-DONNX_VERIFY_PROTO3=1') + if ONNX_BUILD_TESTS: + cmake_args.append('-DONNX_BUILD_TESTS=ON') + if 'CMAKE_ARGS' in os.environ: + extra_cmake_args = shlex.split(os.environ['CMAKE_ARGS']) + # prevent crossfire with downstream scripts + del os.environ['CMAKE_ARGS'] + log.info('Extra cmake args: {}'.format(extra_cmake_args)) + cmake_args.extend(extra_cmake_args) + cmake_args.append(TOP_DIR) + subprocess.check_call(cmake_args) + + build_args = [CMAKE, '--build', os.curdir] + if WINDOWS: + build_args.extend(['--config', build_type]) + build_args.extend(['--', '/maxcpucount:{}'.format(self.jobs)]) + else: + build_args.extend(['--', '-j', str(self.jobs)]) + subprocess.check_call(build_args) + + +class build_py(setuptools.command.build_py.build_py): + def run(self): + self.run_command('create_version') + self.run_command('cmake_build') + + generated_python_files = \ + glob.glob(os.path.join(CMAKE_BUILD_DIR, 'onnx_opt', '*.py')) + \ + glob.glob(os.path.join(CMAKE_BUILD_DIR, 'onnx_opt', '*.pyi')) + + for src in generated_python_files: + dst = os.path.join( + TOP_DIR, os.path.relpath(src, CMAKE_BUILD_DIR)) + self.copy_file(src, dst) + + return setuptools.command.build_py.build_py.run(self) + + +class develop(setuptools.command.develop.develop): + def run(self): + self.run_command('build_py') + setuptools.command.develop.develop.run(self) + + +class build_ext(setuptools.command.build_ext.build_ext): + def run(self): + self.run_command('cmake_build') + setuptools.command.build_ext.build_ext.run(self) + + def build_extensions(self): + for ext in self.extensions: + fullname = self.get_ext_fullname(ext.name) + filename = os.path.basename(self.get_ext_filename(fullname)) + + lib_path = CMAKE_BUILD_DIR + if os.name == 'nt': + debug_lib_dir = os.path.join(lib_path, "Debug") + release_lib_dir = os.path.join(lib_path, "Release") + if os.path.exists(debug_lib_dir): + lib_path = debug_lib_dir + elif os.path.exists(release_lib_dir): + lib_path = release_lib_dir + src = os.path.join(lib_path, filename) + dst = os.path.join(os.path.realpath(self.build_lib), "onnx_opt", filename) + self.copy_file(src, dst) + + +class mypy_type_check(ONNXCommand): + description = 'Run MyPy type checker' + + def run(self): + """Run command.""" + onnx_script = os.path.realpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "tools/mypy-onnx.py")) + returncode = subprocess.call([sys.executable, onnx_script]) + sys.exit(returncode) + + +cmdclass = { + 'create_version': create_version, + 'cmake_build': cmake_build, + 'build_py': build_py, + 'develop': develop, + 'build_ext': build_ext, + 'typecheck': mypy_type_check, +} + +################################################################################ +# Extensions +################################################################################ + +ext_modules = [ + setuptools.Extension( + name=str('onnx_opt.onnx_opt_cpp2py_export'), + sources=[]) +] + +################################################################################ +# Packages +################################################################################ + +# no need to do fancy stuff so far +packages = setuptools.find_packages() + +install_requires.extend([ + 'protobuf', + 'numpy', + 'six', + 'typing>=3.6.4; python_version < "3.5"', + 'typing-extensions>=3.6.2.1', +]) + +################################################################################ +# Test +################################################################################ + +setup_requires.append('pytest-runner') +tests_require.append('pytest') +tests_require.append('nbval') +tests_require.append('tabulate') + +if sys.version_info[0] == 3: + # Mypy doesn't work with Python 2 + extras_require['mypy'] = ['mypy==0.600'] + +################################################################################ +# Final +################################################################################ + +setuptools.setup( + name="onnx_opt", + version=VersionInfo.version, + description="Open Neural Network Exchange", + ext_modules=ext_modules, + cmdclass=cmdclass, + packages=packages, + license='MIT', + include_package_data=True, + install_requires=install_requires, + setup_requires=setup_requires, + tests_require=tests_require, + extras_require=extras_require, + author='ONNX', + author_email='onnx-technical-discuss@lists.lfai.foundation', + url='https://github.com/onnx/onnx', + entry_points={ + 'console_scripts': [ + 'check-model = onnx.bin.checker:check_model', + 'check-node = onnx.bin.checker:check_node', + 'backend-test-tools = onnx.backend.test.cmd_tools:main', + ] + }, +) + diff --git a/third_party/onnx b/third_party/onnx new file mode 160000 index 000000000..c443abd2a --- /dev/null +++ b/third_party/onnx @@ -0,0 +1 @@ +Subproject commit c443abd2acad2411103593600319ff81a676afbc From 4e80d5ba567727a3ff40ef4ed92798493e77d87f Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 15:28:37 +0800 Subject: [PATCH 02/14] change include dir, add code about cmake install --- CMakeLists.txt | 32 +++++++++++++- cmake/ONNXOptimizerConfig.cmake.in | 24 +++++++++++ cmake/ONNXOptimizerConfigVersion.cmake.in | 12 ++++++ onnx_opt/cpp2py_export.cc | 2 +- onnx_opt/optimize.cc | 2 +- onnx_opt/optimize.h | 4 +- onnx_opt/pass.cc | 2 +- onnx_opt/pass_manager.cc | 2 +- onnx_opt/pass_manager.h | 4 +- onnx_opt/pass_registry.cc | 2 +- onnx_opt/pass_registry.h | 42 +++++++++---------- onnx_opt/passes/eliminate_deadend.h | 2 +- onnx_opt/passes/eliminate_identity.h | 2 +- onnx_opt/passes/eliminate_nop_dropout.h | 2 +- .../passes/eliminate_nop_monotone_argmax.h | 2 +- onnx_opt/passes/eliminate_nop_pad.h | 2 +- onnx_opt/passes/eliminate_nop_transpose.h | 2 +- .../passes/eliminate_unused_initializer.h | 2 +- .../passes/extract_constant_to_initializer.h | 2 +- onnx_opt/passes/fuse_add_bias_into_conv.h | 2 +- onnx_opt/passes/fuse_bn_into_conv.h | 2 +- onnx_opt/passes/fuse_consecutive_concats.h | 2 +- .../passes/fuse_consecutive_log_softmax.h | 2 +- .../fuse_consecutive_reduce_unsqueeze.h | 2 +- onnx_opt/passes/fuse_consecutive_squeezes.h | 2 +- onnx_opt/passes/fuse_consecutive_transposes.h | 2 +- .../passes/fuse_matmul_add_bias_into_gemm.h | 2 +- onnx_opt/passes/fuse_pad_into_conv.h | 2 +- onnx_opt/passes/fuse_transpose_into_gemm.h | 2 +- onnx_opt/passes/lift_lexical_references.h | 2 +- onnx_opt/passes/nop.h | 2 +- onnx_opt/passes/split.h | 2 +- 32 files changed, 117 insertions(+), 53 deletions(-) create mode 100644 cmake/ONNXOptimizerConfig.cmake.in create mode 100644 cmake/ONNXOptimizerConfigVersion.cmake.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 2502e9224..7a4abbf3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,6 @@ project(onnx_optimizer C CXX) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(ONNX_ROOT ${PROJECT_SOURCE_DIR}/third_party/onnx) -option(ONNX_BUILD_OPTIMIZER "" OFF) add_subdirectory(${ONNX_ROOT}) file(GLOB_RECURSE onnx_opt_srcs "onnx_opt/*.cc" @@ -15,6 +14,10 @@ list(REMOVE_ITEM onnx_opt_srcs "${PROJECT_SOURCE_DIR}/onnx_opt/cpp2py_export.cc" add_library(onnx_optimizer ${onnx_opt_srcs}) target_link_libraries(onnx_optimizer PUBLIC onnx) +target_include_directories(onnx_optimizer PUBLIC + $ + $ + ) if(BUILD_ONNX_PYTHON) if("${PY_EXT_SUFFIX}" STREQUAL "") @@ -34,7 +37,6 @@ if(BUILD_ONNX_PYTHON) PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) target_include_directories(onnx_opt_cpp2py_export PRIVATE $ - $ $ ${PYTHON_INCLUDE_DIR}) # pybind11 is a header only lib @@ -108,3 +110,29 @@ if(BUILD_ONNX_PYTHON) add_msvc_runtime_flag(onnx_opt_cpp2py_export) endif() endif() + +include(GNUInstallDirs) + +install(DIRECTORY ${PROJECT_SOURCE_DIR}/onnx_opt + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING + PATTERN "*.h") + +configure_file( + ${PROJECT_SOURCE_DIR}/cmake/ONNXOptimizerConfigVersion.cmake.in + ${PROJECT_BINARY_DIR}/ONNXOptimizerConfigVersion.cmake + @ONLY) +configure_file( + ${PROJECT_SOURCE_DIR}/cmake/ONNXOptimizerConfig.cmake.in + ${PROJECT_BINARY_DIR}/ONNXOptimizerConfig.cmake + @ONLY) +install(FILES + ${PROJECT_BINARY_DIR}/ONNXOptimizerConfigVersion.cmake + ${PROJECT_BINARY_DIR}/ONNXOptimizerConfig.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ONNXOptimizer + COMPONENT dev) +install(EXPORT ONNXOptimizerTargets DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/ONNXOptimizer") +install(TARGETS + onnx_optimizer + EXPORT ONNXOptimizerTargets DESTINATION ${CMAKE_INSTALL_LIBDIR}) + diff --git a/cmake/ONNXOptimizerConfig.cmake.in b/cmake/ONNXOptimizerConfig.cmake.in new file mode 100644 index 000000000..85c56549e --- /dev/null +++ b/cmake/ONNXOptimizerConfig.cmake.in @@ -0,0 +1,24 @@ +# - Config file for the ONNX Optimizer package +# It defines the following variable(s) +# ONNX_OPTIMIZER_INCLUDE_DIRS - include directories for onnx optimizer +# as well as ONNX Optimizer targets for other cmake libraries to use. + +# library version information +set(ONNX_OPTIMIZER_VERSION "@ONNX_OPTIMIZER_VERSION@") + +# import targets +include ("${CMAKE_CURRENT_LIST_DIR}/ONNXOptimizerTargets.cmake") + +# include directory. +# +# Newer versions of CMake set the INTERFACE_INCLUDE_DIRECTORIES property +# of the imported targets. It is hence not necessary to add this path +# manually to the include search path for targets which link to gflags. +# The following lines are here for backward compatibility, in case one +# would like to use the old-style include path. +get_filename_component( + CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component( + _INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) +set(ONNX_INCLUDE_DIRS "${_INSTALL_PREFIX}/include") + diff --git a/cmake/ONNXOptimizerConfigVersion.cmake.in b/cmake/ONNXOptimizerConfigVersion.cmake.in new file mode 100644 index 000000000..76724ae13 --- /dev/null +++ b/cmake/ONNXOptimizerConfigVersion.cmake.in @@ -0,0 +1,12 @@ +set(PACKAGE_VERSION "@ONNX_OPTIMIZER_VERSION@") + +# Check whether the requested PACKAGE_FIND_VERSION is compatible +if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + set(PACKAGE_VERSION_COMPATIBLE TRUE) + if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") + set(PACKAGE_VERSION_EXACT TRUE) + endif() +endif() + diff --git a/onnx_opt/cpp2py_export.cc b/onnx_opt/cpp2py_export.cc index 0a4f60af0..6393baa86 100644 --- a/onnx_opt/cpp2py_export.cc +++ b/onnx_opt/cpp2py_export.cc @@ -1,7 +1,7 @@ #include #include -#include "onnx/optimizer/optimize.h" +#include "onnx_opt/optimize.h" #include "onnx/py_utils.h" namespace ONNX_NAMESPACE { diff --git a/onnx_opt/optimize.cc b/onnx_opt/optimize.cc index dbacb1a32..7b27aebb8 100644 --- a/onnx_opt/optimize.cc +++ b/onnx_opt/optimize.cc @@ -1,7 +1,7 @@ // ATTENTION: The code in this file is highly EXPERIMENTAL. // Adventurous users should note that the APIs will probably change. -#include "onnx/optimizer/optimize.h" +#include "onnx_opt/optimize.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/optimize.h b/onnx_opt/optimize.h index 018a62f79..459beb337 100644 --- a/onnx_opt/optimize.h +++ b/onnx_opt/optimize.h @@ -6,8 +6,8 @@ #include "onnx/common/ir.h" #include "onnx/common/ir_pb_converter.h" #include "onnx/common/stl_backports.h" -#include "onnx/optimizer/pass_manager.h" -#include "onnx/optimizer/pass_registry.h" +#include "onnx_opt/pass_manager.h" +#include "onnx_opt/pass_registry.h" #include "onnx/proto_utils.h" #include "vector" diff --git a/onnx_opt/pass.cc b/onnx_opt/pass.cc index df0a89b5b..27ac67062 100644 --- a/onnx_opt/pass.cc +++ b/onnx_opt/pass.cc @@ -1,4 +1,4 @@ -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" #include "onnx/common/assertions.h" namespace ONNX_NAMESPACE { diff --git a/onnx_opt/pass_manager.cc b/onnx_opt/pass_manager.cc index 7ad365a06..0c9cae0a9 100644 --- a/onnx_opt/pass_manager.cc +++ b/onnx_opt/pass_manager.cc @@ -1,4 +1,4 @@ -#include "onnx/optimizer/pass_manager.h" +#include "onnx_opt/pass_manager.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_manager.h b/onnx_opt/pass_manager.h index c7ba35ef0..44a3899b4 100644 --- a/onnx_opt/pass_manager.h +++ b/onnx_opt/pass_manager.h @@ -3,8 +3,8 @@ // Adventurous users should note that the APIs will probably change. #include -#include "onnx/optimizer/pass.h" -#include "onnx/optimizer/passes/eliminate_deadend.h" +#include "onnx_opt/pass.h" +#include "onnx_opt/passes/eliminate_deadend.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_registry.cc b/onnx_opt/pass_registry.cc index c2eee87f8..b1c6cc69d 100644 --- a/onnx_opt/pass_registry.cc +++ b/onnx_opt/pass_registry.cc @@ -1,7 +1,7 @@ // ATTENTION: The code in this file is highly EXPERIMENTAL. // Adventurous users should note that the APIs will probably change. -#include "onnx/optimizer/pass_registry.h" +#include "onnx_opt/pass_registry.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_registry.h b/onnx_opt/pass_registry.h index a925b9142..5c4350359 100644 --- a/onnx_opt/pass_registry.h +++ b/onnx_opt/pass_registry.h @@ -6,27 +6,27 @@ #include "onnx/common/ir.h" #include "onnx/common/ir_pb_converter.h" #include "onnx/common/stl_backports.h" -#include "onnx/optimizer/passes/eliminate_deadend.h" -#include "onnx/optimizer/passes/eliminate_identity.h" -#include "onnx/optimizer/passes/eliminate_nop_dropout.h" -#include "onnx/optimizer/passes/eliminate_nop_monotone_argmax.h" -#include "onnx/optimizer/passes/eliminate_nop_pad.h" -#include "onnx/optimizer/passes/eliminate_nop_transpose.h" -#include "onnx/optimizer/passes/eliminate_unused_initializer.h" -#include "onnx/optimizer/passes/extract_constant_to_initializer.h" -#include "onnx/optimizer/passes/fuse_add_bias_into_conv.h" -#include "onnx/optimizer/passes/fuse_bn_into_conv.h" -#include "onnx/optimizer/passes/fuse_consecutive_concats.h" -#include "onnx/optimizer/passes/fuse_consecutive_log_softmax.h" -#include "onnx/optimizer/passes/fuse_consecutive_reduce_unsqueeze.h" -#include "onnx/optimizer/passes/fuse_consecutive_squeezes.h" -#include "onnx/optimizer/passes/fuse_consecutive_transposes.h" -#include "onnx/optimizer/passes/fuse_matmul_add_bias_into_gemm.h" -#include "onnx/optimizer/passes/fuse_pad_into_conv.h" -#include "onnx/optimizer/passes/fuse_transpose_into_gemm.h" -#include "onnx/optimizer/passes/lift_lexical_references.h" -#include "onnx/optimizer/passes/nop.h" -#include "onnx/optimizer/passes/split.h" +#include "onnx_opt/passes/eliminate_deadend.h" +#include "onnx_opt/passes/eliminate_identity.h" +#include "onnx_opt/passes/eliminate_nop_dropout.h" +#include "onnx_opt/passes/eliminate_nop_monotone_argmax.h" +#include "onnx_opt/passes/eliminate_nop_pad.h" +#include "onnx_opt/passes/eliminate_nop_transpose.h" +#include "onnx_opt/passes/eliminate_unused_initializer.h" +#include "onnx_opt/passes/extract_constant_to_initializer.h" +#include "onnx_opt/passes/fuse_add_bias_into_conv.h" +#include "onnx_opt/passes/fuse_bn_into_conv.h" +#include "onnx_opt/passes/fuse_consecutive_concats.h" +#include "onnx_opt/passes/fuse_consecutive_log_softmax.h" +#include "onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h" +#include "onnx_opt/passes/fuse_consecutive_squeezes.h" +#include "onnx_opt/passes/fuse_consecutive_transposes.h" +#include "onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h" +#include "onnx_opt/passes/fuse_pad_into_conv.h" +#include "onnx_opt/passes/fuse_transpose_into_gemm.h" +#include "onnx_opt/passes/lift_lexical_references.h" +#include "onnx_opt/passes/nop.h" +#include "onnx_opt/passes/split.h" #include "onnx/proto_utils.h" #include diff --git a/onnx_opt/passes/eliminate_deadend.h b/onnx_opt/passes/eliminate_deadend.h index a70e2ec9d..db121d694 100644 --- a/onnx_opt/passes/eliminate_deadend.h +++ b/onnx_opt/passes/eliminate_deadend.h @@ -2,7 +2,7 @@ // ATTENTION: The code in this file is highly EXPERIMENTAL. // Adventurous users should note that the APIs will probably change. #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { struct EliminateDeadEnd final : public FullGraphBasedPass { diff --git a/onnx_opt/passes/eliminate_identity.h b/onnx_opt/passes/eliminate_identity.h index 3f5f8525b..4fae9fc19 100644 --- a/onnx_opt/passes/eliminate_identity.h +++ b/onnx_opt/passes/eliminate_identity.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_dropout.h b/onnx_opt/passes/eliminate_nop_dropout.h index 6be190b5e..94ffb9fa3 100644 --- a/onnx_opt/passes/eliminate_nop_dropout.h +++ b/onnx_opt/passes/eliminate_nop_dropout.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_monotone_argmax.h b/onnx_opt/passes/eliminate_nop_monotone_argmax.h index 67f3fdf79..0e3334225 100644 --- a/onnx_opt/passes/eliminate_nop_monotone_argmax.h +++ b/onnx_opt/passes/eliminate_nop_monotone_argmax.h @@ -2,7 +2,7 @@ // Adventurous users should note that the APIs will probably change. #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_pad.h b/onnx_opt/passes/eliminate_nop_pad.h index 23f8cfc6f..04441f983 100644 --- a/onnx_opt/passes/eliminate_nop_pad.h +++ b/onnx_opt/passes/eliminate_nop_pad.h @@ -4,7 +4,7 @@ #pragma once #include "onnx/defs/tensor_util.h" -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_transpose.h b/onnx_opt/passes/eliminate_nop_transpose.h index ba1595dfc..daad9c8d9 100644 --- a/onnx_opt/passes/eliminate_nop_transpose.h +++ b/onnx_opt/passes/eliminate_nop_transpose.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_unused_initializer.h b/onnx_opt/passes/eliminate_unused_initializer.h index 95995610f..592dc1cec 100644 --- a/onnx_opt/passes/eliminate_unused_initializer.h +++ b/onnx_opt/passes/eliminate_unused_initializer.h @@ -14,7 +14,7 @@ // condition 1: A is not used as any node's input // condition 2: A is not an output -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/extract_constant_to_initializer.h b/onnx_opt/passes/extract_constant_to_initializer.h index 696f78c6e..5fafe85a6 100644 --- a/onnx_opt/passes/extract_constant_to_initializer.h +++ b/onnx_opt/passes/extract_constant_to_initializer.h @@ -11,7 +11,7 @@ // this pass can handle the case satisfy all following conditions: // condition 1: A is the output of a Constant node #include "onnx/common/assertions.h" -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_add_bias_into_conv.h b/onnx_opt/passes/fuse_add_bias_into_conv.h index 0af10cd7c..22a8adb9f 100644 --- a/onnx_opt/passes/fuse_add_bias_into_conv.h +++ b/onnx_opt/passes/fuse_add_bias_into_conv.h @@ -16,7 +16,7 @@ #include #include "onnx/common/assertions.h" -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_bn_into_conv.h b/onnx_opt/passes/fuse_bn_into_conv.h index e4fdddca1..697f9cbd7 100644 --- a/onnx_opt/passes/fuse_bn_into_conv.h +++ b/onnx_opt/passes/fuse_bn_into_conv.h @@ -29,7 +29,7 @@ // $$ b' = (b_{conv} - m)\frac{s}{\sqrt{\sigma + \epsilon}} + b_{bn}$$ #include "onnx/common/assertions.h" -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_concats.h b/onnx_opt/passes/fuse_consecutive_concats.h index 6b18413a7..9e42c7530 100644 --- a/onnx_opt/passes/fuse_consecutive_concats.h +++ b/onnx_opt/passes/fuse_consecutive_concats.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_log_softmax.h b/onnx_opt/passes/fuse_consecutive_log_softmax.h index 474c0001b..8f732c17f 100644 --- a/onnx_opt/passes/fuse_consecutive_log_softmax.h +++ b/onnx_opt/passes/fuse_consecutive_log_softmax.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h b/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h index ae6731755..550fb5cc1 100644 --- a/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h +++ b/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_squeezes.h b/onnx_opt/passes/fuse_consecutive_squeezes.h index 550cadd21..2b1c8aa51 100644 --- a/onnx_opt/passes/fuse_consecutive_squeezes.h +++ b/onnx_opt/passes/fuse_consecutive_squeezes.h @@ -9,7 +9,7 @@ // Z = Squeeze(Y, axes=[0, 4]) -> shape=[2, 3, 5] // After: // Z = Squeeze(X, axes=[0, 1, 4, 6]) -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_transposes.h b/onnx_opt/passes/fuse_consecutive_transposes.h index ef2fb664a..6b7d58978 100644 --- a/onnx_opt/passes/fuse_consecutive_transposes.h +++ b/onnx_opt/passes/fuse_consecutive_transposes.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h b/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h index 2ddef7cab..8d093cee1 100644 --- a/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h +++ b/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h @@ -17,7 +17,7 @@ #include #include "onnx/common/assertions.h" -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_pad_into_conv.h b/onnx_opt/passes/fuse_pad_into_conv.h index 4a66aa6e5..575a199b2 100644 --- a/onnx_opt/passes/fuse_pad_into_conv.h +++ b/onnx_opt/passes/fuse_pad_into_conv.h @@ -15,7 +15,7 @@ #include #include "onnx/defs/tensor_util.h" -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_transpose_into_gemm.h b/onnx_opt/passes/fuse_transpose_into_gemm.h index b7ada112c..b9fab13af 100644 --- a/onnx_opt/passes/fuse_transpose_into_gemm.h +++ b/onnx_opt/passes/fuse_transpose_into_gemm.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/lift_lexical_references.h b/onnx_opt/passes/lift_lexical_references.h index d0a5eeec9..2082c555c 100644 --- a/onnx_opt/passes/lift_lexical_references.h +++ b/onnx_opt/passes/lift_lexical_references.h @@ -1,7 +1,7 @@ #pragma once #include -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/nop.h b/onnx_opt/passes/nop.h index a51321e02..de71fab65 100644 --- a/onnx_opt/passes/nop.h +++ b/onnx_opt/passes/nop.h @@ -1,6 +1,6 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/split.h b/onnx_opt/passes/split.h index f59a8cc1a..c7311201b 100644 --- a/onnx_opt/passes/split.h +++ b/onnx_opt/passes/split.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/optimizer/pass.h" +#include "onnx_opt/pass.h" namespace ONNX_NAMESPACE { namespace optimization { From 5308d3ed859b467a2de449e573fff19d66156c03 Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 15:32:48 +0800 Subject: [PATCH 03/14] move optimizer_test.py --- onnx_opt/test/optimizer_test.py | 1774 +++++++++++++++++++++++++++++++ 1 file changed, 1774 insertions(+) create mode 100644 onnx_opt/test/optimizer_test.py diff --git a/onnx_opt/test/optimizer_test.py b/onnx_opt/test/optimizer_test.py new file mode 100644 index 000000000..6fb1de61a --- /dev/null +++ b/onnx_opt/test/optimizer_test.py @@ -0,0 +1,1774 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import onnx +from onnx import checker, helper, ModelProto, TensorProto, GraphProto, NodeProto, shape_inference +from typing import Sequence, Text, Any, Tuple, List, Callable +from onnx import numpy_helper + +import numpy as np # type: ignore + +import onnx_opt +import unittest + + +class TestOptimizer(unittest.TestCase): + + def _optimized(self, graph, opts, fixed_point=False, **kwargs): # type: (GraphProto, Sequence[Text], bool, **Any) -> ModelProto + orig_model = helper.make_model(graph, producer_name='onnx-test', **kwargs) + optimized_model = onnx_opt.optimize(orig_model, opts, fixed_point) + checker.check_model(optimized_model) + return optimized_model + + # input_types and output_types are lists of triples of (name, type, shape) + def _make_fake_loop_op(self, + body_nodes, # type: Sequence[NodeProto] + input_types, # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + output_types # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + ): # type: (...) -> List[NodeProto] + zero = helper.make_tensor( + "trip_count_value", TensorProto.INT64, (), [10]) + true = helper.make_tensor("condition", TensorProto.BOOL, (), [True]) + # lcd is a dummy loop-carried dependency that only exists because + # right now the schema checker is broken and assumes a variadic + # input needs at least one value. + graph_inputs = [helper.make_tensor_value_info("i", TensorProto.INT64, ()), + helper.make_tensor_value_info("cond", TensorProto.BOOL, ())] + for type, shape, name in input_types: + graph_inputs.append( + helper.make_tensor_value_info("_" + name, type, shape)) + graph_outputs = [helper.make_tensor_value_info( + "cond", TensorProto.BOOL, ())] + for type, shape, name in output_types: + graph_outputs.append( + helper.make_tensor_value_info("_" + name, type, shape)) + body_graph = helper.make_graph(body_nodes, "body_graph", graph_inputs, + graph_outputs) + loop_inputs = ["trip_count", "condition"] + loop_inputs.extend([name for _, _, name in input_types]) + # TODO: fix checker to accept 0-input variadic inputs + if len(loop_inputs) == 2: + loop_inputs.append("") + loop_outputs = [name for _, _, name in output_types] + retval_nodes = [ + helper.make_node("Constant", [], ["trip_count"], value=zero), + helper.make_node("Constant", [], ["condition"], value=true), + helper.make_node("Loop", loop_inputs, loop_outputs, body=body_graph) + ] + return retval_nodes + + def _make_fake_if_op(self, + true_nodes, # type: Sequence[NodeProto] + false_nodes, # type: Sequence[NodeProto] + output_types # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + ): # type: (...) -> List[NodeProto] + true = helper.make_tensor("condition", TensorProto.BOOL, (), [True]) + true_graph = helper.make_graph(true_nodes, "true_graph", [], []) + false_graph = helper.make_graph(false_nodes, "false_graph", [], []) + if_inputs = ["condition"] + if_outputs = [name for _, _, name in output_types] + retval_nodes = [ + helper.make_node("Constant", [], ["condition"], value=true), + helper.make_node("If", if_inputs, if_outputs, then_branch=true_graph, + else_branch=false_graph) + ] + return retval_nodes + + # fn is a function that takes a single node as argument + def _visit_all_nodes_recursive(self, graph, fn): # type: (GraphProto, Callable[[NodeProto], None]) -> None + for node in graph.node: + fn(node) + for attr in node.attribute: + if attr.g is not None: + self._visit_all_nodes_recursive(attr.g, fn) + if len(attr.graphs): + for gr in attr.graphs: + self._visit_all_nodes_recursive(gr, fn) + + def test_get_available_passes(self): # type: () -> None + # FIXME does not guarantees to be listing all + graph = helper.make_graph([], "dummy_graph", [], []) + list_of_passes = onnx_opt.get_available_passes() + assert isinstance(list_of_passes, (list)) and len(list_of_passes) > 0 + for pass_name in list_of_passes: + # If pass_name is invalid it throws a RuntimeError + self._optimized(graph, [pass_name]) + + def test_eliminate_identity_single_use(self): # type: () -> None + nodes = [helper.make_node("Identity", ["X"], ["Y"])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Identity", ["_Y"], ["_Y2"])], + [(TensorProto.FLOAT, (5,), "Y")], + [(TensorProto.FLOAT, (5,), "Y2")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (5,))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Y2", TensorProto.FLOAT, (5,))]) + optimized_model = self._optimized(graph, ["eliminate_identity"]) + + # All identity nodes should have been eliminated + def check_identity(node): # type: (NodeProto) -> None + assert node.op_type != "Identity" + self._visit_all_nodes_recursive(optimized_model.graph, check_identity) + # Use of the output from the Identity node in the main graph should + # have been replaced with the input to the identity node + assert len(optimized_model.graph.output) == 2 + assert optimized_model.graph.output[0].name == "X" + # Use of the output from the Identity node in the loop graph should + # have been replaced with the input to that identity node + assert len(optimized_model.graph.node[2].attribute[0].g.output) == 2 + assert optimized_model.graph.node[2].attribute[0].g.output[1].name == "_Y" + + def test_eliminate_identity_graph_output(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["A"]) + identity = helper.make_node("Identity", ["A"], ["B"]) + graph = helper.make_graph( + [add, identity], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5,))], + [helper.make_tensor_value_info("B", TensorProto.FLOAT, (5,))]) + optimized_model = self._optimized(graph, ["eliminate_identity"]) + + for node in optimized_model.graph.node: + assert node.op_type != "Identity" + assert len(optimized_model.graph.node) == 1 + + def test_eliminate_identity_multiple_uses(self): # type: () -> None + identity = helper.make_node("Identity", ["X"], ["Y"]) + add = helper.make_node("Add", ["Z", "Y"], ["A"]) + mul = helper.make_node("Mul", ["A", "Y"], ["B"]) + graph = helper.make_graph( + [identity, add, mul], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5,))], + [helper.make_tensor_value_info("B", TensorProto.FLOAT, (5,))]) + optimized_model = self._optimized(graph, ["eliminate_identity"]) + + for node in optimized_model.graph.node: + assert node.op_type != "Identity" + assert len(optimized_model.graph.node) == 2 + + def test_nop_transpose_graph_output(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["A"]) + trans = helper.make_node("Transpose", ["A"], ["B"], perm=[0, 1]) + graph = helper.make_graph( + [add, trans], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("B", TensorProto.FLOAT, (2, 3))]) + # The existence of shape infos of graoh outputs is checked in _optimized + optimized_model = self._optimized(graph, ["eliminate_nop_transpose"]) + + def check_transpose(node): # type: (NodeProto) -> None + assert node.op_type != "Transpose" + self._visit_all_nodes_recursive(optimized_model.graph, check_transpose) + assert len(optimized_model.graph.node) == 1 + + def test_nop_transpose(self): # type: () -> None + nodes = [helper.make_node("Transpose", ["X"], ["Y"], perm=[0, 1])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Transpose", ["_Y"], ["_Y2"], perm=[0, 1])], + [(TensorProto.FLOAT, (2, 3), "Y")], + [(TensorProto.FLOAT, (2, 3), "Y2")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3)), + helper.make_tensor_value_info("Y2", TensorProto.FLOAT, (2, 3))]) + optimized_model = self._optimized(graph, ["eliminate_nop_transpose"]) + + def check_transpose(node): # type: (NodeProto) -> None + assert node.op_type != "Transpose" + self._visit_all_nodes_recursive(optimized_model.graph, check_transpose) + # Use of the output from the Transpose node in the main graph should + # have been replaced with the input to the identity node + assert len(optimized_model.graph.output) == 2 + assert optimized_model.graph.output[0].name == "X" + # Use of the output from the Transpose node in the loop graph should + # have been replaced with the input to that identity node + assert len(optimized_model.graph.node[2].attribute[0].g.output) == 2 + assert optimized_model.graph.node[2].attribute[0].g.output[1].name == "_Y" + + def test_nop_transpose_default(self): # type: () -> None + trans = helper.make_node("Transpose", ["X"], ["Y"]) + graph = helper.make_graph( + [trans], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (3, 2))]) + optimized_model = self._optimized(graph, ["eliminate_nop_transpose"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Transpose" + + def test_nop_pad_opset10(self): # type: () -> None + nodes = [helper.make_node("Pad", ["X"], ["Y"], pads=[0, 0])] + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))]) + assert len(graph.node) == 1 + optimized_model = self._optimized(graph, ["eliminate_nop_pad"], False, opset_imports=[helper.make_opsetid("", 10)]) + + def check_pad(node): # type: (NodeProto) -> None + assert node.op_type != "Pad" + self._visit_all_nodes_recursive(optimized_model.graph, check_pad) + assert len(optimized_model.graph.output) == 1 + assert optimized_model.graph.output[0].name == "X" + assert len(optimized_model.graph.node) == 0 + + def test_nop_pad_graph_output(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["A"]) + pad = helper.make_node("Pad", ["A", "Pads"], ["B"]) + graph = helper.make_graph( + [add, pad], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (2,))], + [helper.make_tensor_value_info("B", TensorProto.FLOAT, (5,))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(2,), + vals=np.array([0, 0]).astype(np.int64).tobytes(), + raw=True)]) + # The existence of shape infos of graoh outputs is checked in _optimized + optimized_model = self._optimized(graph, ["eliminate_nop_pad"]) + + def check_pad(node): # type: (NodeProto) -> None + assert node.op_type != "Pad" + self._visit_all_nodes_recursive(optimized_model.graph, check_pad) + assert len(optimized_model.graph.node) == 1 + + def test_nop_pad(self): # type: () -> None + nodes = [helper.make_node("Pad", ["X", "Pads"], ["Y"])] + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (4,))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(4,), + vals=np.array([0, 0, 0, 0]).astype(np.int64).tobytes(), + raw=True)]) + assert len(graph.node) == 1 + optimized_model = self._optimized(graph, ["eliminate_nop_pad"]) + + def check_pad(node): # type: (NodeProto) -> None + assert node.op_type != "Pad" + self._visit_all_nodes_recursive(optimized_model.graph, check_pad) + assert len(optimized_model.graph.output) == 1 + assert optimized_model.graph.output[0].name == "X" + assert len(optimized_model.graph.node) == 0 + + def test_nop_pad_default_opset10(self): # type: () -> None + trans = helper.make_node("Pad", ["X"], ["Y"], pads=[0, 1]) + graph = helper.make_graph( + [trans], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 4))]) + optimized_model = self._optimized(graph, ["eliminate_nop_pad"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Pad" + + def test_nop_pad_default(self): # type: () -> None + trans = helper.make_node("Pad", ["X", "Pads"], ["Y"]) + graph = helper.make_graph( + [trans], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (4,))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 4))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(4,), + vals=np.array([0, 1, 0, 0]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["eliminate_nop_pad"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Pad" + + def test_eliminate_unused_initializer(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["Z"]) + graph = helper.make_graph( + [add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor("A", TensorProto.FLOAT, + dims=(2, 3), + vals=np.random.randn(2, 3).astype( + np.float32).tobytes(), + raw=True)]) + optimized_model = self._optimized( + graph, ["eliminate_unused_initializer"]) + + assert len(list(optimized_model.graph.initializer)) == 0 + + def test_eliminate_unused_initializer_input(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["Z"]) + graph = helper.make_graph( + [add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor("A", TensorProto.FLOAT, + dims=(2, 3), + vals=np.random.randn(2, 3).astype( + np.float32).tobytes(), + raw=True)]) + optimized_model = self._optimized( + graph, ["eliminate_unused_initializer"]) + + assert len(list(optimized_model.graph.initializer)) == 0 + assert len(optimized_model.graph.input) == 2 + + def test_eliminate_unused_initializer_no_eliminate_used_default(self): # type: () -> None + add = helper.make_node("Add", ["X", "A"], ["Z"]) + graph = helper.make_graph( + [add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor("A", TensorProto.FLOAT, + dims=(1, 2), + vals=np.random.randn(1, 2).astype( + np.float32).tobytes(), + raw=True)]) + optimized_model = self._optimized( + graph, ["eliminate_unused_initializer"]) + + assert len(list(optimized_model.graph.initializer)) == 1 + + def test_eliminate_unused_initializer_no_eliminate_used(self): # type: () -> None + nodes = [helper.make_node("Add", ["X", "A"], ["Z"])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Add", ["_X", "_A"], ["_Z2"])], + [(TensorProto.FLOAT, (1, 2), "X"), + (TensorProto.FLOAT, (1, 2), "A")], + [(TensorProto.FLOAT, (1, 2), "Z2")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2))], + [helper.make_tensor("A", TensorProto.FLOAT, + dims=(1, 2), + vals=np.random.randn(1, 2).astype( + np.float32).tobytes(), + raw=True)]) + optimized_model = self._optimized( + graph, ["eliminate_unused_initializer"]) + + # Add, Constant (trip count), Constant (cond), Loop + assert len(list(optimized_model.graph.node)) == 4 + assert optimized_model.graph.node[0].op_type == "Add" + assert optimized_model.graph.output[0].name == "Z" + # Add + assert len(optimized_model.graph.node[3].attribute[0].g.node) == 1 + assert optimized_model.graph.node[3].attribute[0].g.node[0].op_type == 'Add' + assert optimized_model.graph.node[3].attribute[0].g.output[1].name == '_Z2' + + assert len(list(optimized_model.graph.initializer)) == 1 + + def test_eliminate_unused_initializer_no_eliminate_output(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["Z"]) + graph = helper.make_graph( + [add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor("A", TensorProto.FLOAT, + dims=(2, 3), + vals=np.random.randn(2, 3).astype( + np.float32).tobytes(), + raw=True)]) + optimized_model = self._optimized( + graph, ["eliminate_unused_initializer"]) + + assert len(list(optimized_model.graph.initializer)) == 1 + assert "Z" in [o.name for o in optimized_model.graph.output] + + def test_extract_constant_to_initializer(self): # type: () -> None + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + constant = helper.make_node("Constant", [], ["A"], + value=helper.make_tensor( + name="bias", + data_type=TensorProto.FLOAT, + dims=(16, 1, 1), + vals=np.random.randn(16).astype(np.float32).tolist())) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [conv, constant, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 1, 1))], + ) + optimized_model = self._optimized( + graph, ["extract_constant_to_initializer"]) + self.assertEqual( + set(vi.name for vi in optimized_model.graph.input), + {'X', 'Y', 'A'}) + + self.assertEqual(len(optimized_model.graph.initializer), 1) + init = optimized_model.graph.initializer[0] + self.assertEqual(init.name, 'A') + self.assertEqual(init.dims, [16, 1, 1]) + self.assertEqual(init.data_type, TensorProto.FLOAT) + + self.assertEqual( + [n.op_type for n in optimized_model.graph.node], ['Conv', 'Add']) + + def test_fuse_concats(self): # type: () -> None + nodes = [helper.make_node("Concat", ["A", "B", "C"], ["X"], axis=0), + helper.make_node("Concat", ["D", "E", "F"], ["Y"], axis=0), + helper.make_node("Concat", ["X", "G", "Y"], ["Z"], axis=0)] + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("D", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("E", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("F", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("G", TensorProto.FLOAT, (4, 3, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (18, 3, 4))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_concats"], True) # two passes are needed to simplify the graph to its simplest state. + + assert len(optimized_model.graph.node) == 1 + assert len(optimized_model.graph.node[0].input) == 7 + assert optimized_model.graph.node[0].input == [ + "A", "B", "C", "G", "D", "E", "F"] + assert optimized_model.graph.node[0].op_type == "Concat" + + def test_fuse_concats_different_axis(self): # type: () -> None + nodes = [helper.make_node("Concat", ["A", "B", "C"], ["X"], axis=0), + helper.make_node("Concat", ["D", "E", "F"], ["Y"], axis=1), + helper.make_node("Concat", ["X", "Y"], ["Z"], axis=2)] + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("D", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("E", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("F", TensorProto.FLOAT, (4, 3, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (18, 3, 4))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_concats"], True) # two passes are needed to simplify the graph to its simplest state. + + assert optimized_model.graph == graph + + def test_fuse_transpose(self): # type: () -> None + nodes = [helper.make_node("Transpose", ["X"], ["Y"], perm=[1, 0, 2]), + helper.make_node("Transpose", ["Y"], ["Z"], perm=[2, 0, 1]), + helper.make_node("Transpose", ["Z"], ["A"], perm=[2, 0, 1])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Transpose", ["_X"], ["_Y2"], perm=[1, 0, 2]), + helper.make_node("Transpose", ["_Y2"], ["_Y3"], perm=[2, 0, 1]), + helper.make_node("Transpose", ["_Y3"], ["_Y4"], perm=[2, 0, 1])], + [(TensorProto.FLOAT, (2, 3, 4), "X")], + [(TensorProto.FLOAT, (2, 4, 3), "Y4")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 4, 3)), + helper.make_tensor_value_info("Y4", TensorProto.FLOAT, (4, 3, 2))]) + original_model = helper.make_model(graph) + shape_inference.infer_shapes(original_model) + optimized_model = self._optimized( + graph, ["fuse_consecutive_transposes"]) + shape_inference.infer_shapes(optimized_model) + + # Transpose, Constant (trip count), Constant (cond), Loop + assert len(list(optimized_model.graph.node)) == 4 + # Transpose + assert len(optimized_model.graph.node[3].attribute[0].g.node) == 1 + + def test_fuse_transpose_default_graph_output(self): # type: () -> None + add = helper.make_node("Add", ["X", "Y"], ["A"]) + trans1 = helper.make_node("Transpose", ["A"], ["B"]) + trans2 = helper.make_node("Transpose", ["B"], ["C"]) + graph = helper.make_graph( + [add, trans1, trans2], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))], + [helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3))]) + # The existence of shape infos of graoh outputs is checked in _optimized + optimized_model = self._optimized(graph, ["fuse_consecutive_transposes"]) + + def check_transpose(node): # type: (NodeProto) -> None + assert node.op_type != "Transpose" + self._visit_all_nodes_recursive(optimized_model.graph, check_transpose) + assert len(optimized_model.graph.node) == 1 + + def test_fuse_transpose_default(self): # type: () -> None + trans1 = helper.make_node("Transpose", ["X"], ["Y"]) + trans2 = helper.make_node("Transpose", ["Y"], ["Z"]) + graph = helper.make_graph( + [trans1, trans2], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (2, 3, 4))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_transposes"]) + + assert len(list(optimized_model.graph.node)) == 0 + + def test_fuse_transpose_default_no_fuse(self): # type: () -> None + trans1 = helper.make_node("Transpose", ["X"], ["Y"]) + trans2 = helper.make_node("Transpose", ["Y"], ["Z"], perm=[0, 1, 2]) + graph = helper.make_graph( + [trans1, trans2], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (4, 3, 2))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_transposes"]) + + assert len(list(optimized_model.graph.node)) == 2 + for node in optimized_model.graph.node: + assert node.op_type == "Transpose" + + def test_fuse_transpose_into_gemm(self): # type: () -> None + nodes = [helper.make_node("Transpose", ["X"], ["A"], perm=[1, 0]), + helper.make_node("Transpose", ["Y"], ["B"], perm=[1, 0]), + helper.make_node("Gemm", ["A", "B", "C"], ["Z"])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Transpose", ["_X"], ["_A"], perm=[1, 0]), + helper.make_node("Transpose", ["_Y"], ["_B"], perm=[1, 0]), + helper.make_node("Gemm", ["_A", "_B", "_C"], ["_Z2"])], + [(TensorProto.FLOAT, (2, 3), "X"), + (TensorProto.FLOAT, (5, 2), "Y"), + (TensorProto.FLOAT, (3, 5), "C")], + [(TensorProto.FLOAT, (2, 3), "Z2")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5, 2)), + helper.make_tensor_value_info("C", TensorProto.FLOAT, (3, 5))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (3, 5))]) + optimized_model = self._optimized(graph, ["fuse_transpose_into_gemm"]) + + # Gemm, Constant (trip count), Constant (cond), Loop + assert len(list(optimized_model.graph.node)) == 4 + assert optimized_model.graph.node[0].op_type == "Gemm" + # Gemm + assert len(optimized_model.graph.node[3].attribute[0].g.node) == 1 + assert optimized_model.graph.node[3].attribute[0].g.node[0].op_type == "Gemm" + + def test_fuse_add_bias_into_conv_use_weight_shape(self): # type: () -> None + nodes = [helper.make_node("Conv", ["X", "Y"], ["Z"]), + helper.make_node("Add", ["Z", "A"], ["B"])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Conv", ["_X", "_Y"], ["_Z"]), + helper.make_node("Add", ["_Z", "_A"], ["_B2"])], + [(TensorProto.FLOAT, (1, 5, 3, 3), "X"), + (TensorProto.FLOAT, (16, 5, 3, 3), "Y"), + (TensorProto.FLOAT, (16, 1, 1), "A")], + [(TensorProto.FLOAT, (1, 16, 3, 3), "B2")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 1, 1))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 1, 1))], + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + # Squeeze, Conv, Constant (trip count), Constant (condition), Loop + assert len(list(optimized_model.graph.node)) == 5 + assert optimized_model.graph.node[0].op_type == 'Squeeze' + assert optimized_model.graph.node[1].op_type == 'Conv' + assert optimized_model.graph.output[0].name == 'Z' + # Squeeze, Conv + assert len(optimized_model.graph.node[4].attribute[0].g.node) == 2 + assert optimized_model.graph.node[4].attribute[0].g.node[0].op_type == 'Squeeze' + assert optimized_model.graph.node[4].attribute[0].g.node[1].op_type == 'Conv' + # Output 1 since 0 is 'cond' + assert optimized_model.graph.node[4].attribute[0].g.output[1].name == '_Z' + + def test_fuse_add_bias_into_conv_use_weight_shape_with_tile(self): # type: () -> None + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [conv, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (1,))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 1, 1))], + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 3 + assert len(optimized_model.graph.value_info) == 1 + assert optimized_model.graph.value_info[0].type.tensor_type.elem_type == TensorProto.INT64 + assert len( + optimized_model.graph.value_info[0].type.tensor_type.shape.dim) == 1 + assert optimized_model.graph.node[0].op_type == 'Constant' + assert optimized_model.graph.node[1].op_type == 'Tile' + assert optimized_model.graph.node[2].op_type == 'Conv' + assert optimized_model.graph.output[0].name == 'Z' + + def test_fuse_add_bias_into_conv_use_conv_shape(self): # type: () -> None + sub = helper.make_node("Sub", ["M", "N"], ["Y"]) + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [sub, conv, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info( + "M", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info( + "N", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 1, 1))], + value_info=[ + helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1)) + ], + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + assert len(optimized_model.graph.node) == 3 + assert optimized_model.graph.node[0].op_type == 'Sub' + assert optimized_model.graph.node[1].op_type == 'Squeeze' + assert optimized_model.graph.node[2].op_type == 'Conv' + assert optimized_model.graph.output[0].name == 'Z' + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert len( + optimized_model.graph.output[0].type.tensor_type.shape.dim) == 4 + + def test_fuse_add_bias_into_conv_use_move_constant(self): # type: () -> None + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + constant = helper.make_node("Constant", [], ["A"], + value=helper.make_tensor( + name="bias", + data_type=TensorProto.FLOAT, + dims=(16, 1, 1), + vals=np.random.randn(16).astype(np.float32).tolist())) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [conv, constant, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 1, 1))], + value_info=[ + helper.make_tensor_value_info( + "A", TensorProto.FLOAT, (16, 1, 1)), + ] + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + assert len(optimized_model.graph.node) == 3 + assert optimized_model.graph.node[0].op_type == 'Constant' + assert optimized_model.graph.node[1].op_type == 'Squeeze' + assert optimized_model.graph.node[2].op_type == 'Conv' + assert optimized_model.graph.output[0].name == 'Z' + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert len( + optimized_model.graph.output[0].type.tensor_type.shape.dim) == 4 + + def test_fuse_add_bias_into_conv_squeeze_1d_bias_no_fuse(self): # type: () -> None + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [conv, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (3,))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 1, 3))], + value_info=[ + helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1)), + ] + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 2 + assert optimized_model.graph.node[0].op_type == 'Conv' + assert optimized_model.graph.node[1].op_type == 'Add' + + def test_fuse_add_bias_into_conv_squeeze_3d_bias_no_fuse(self): # type: () -> None + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [conv, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 3, 3))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 3, 3))], + value_info=[ + helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1)), + ] + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 2 + assert optimized_model.graph.node[0].op_type == 'Conv' + assert optimized_model.graph.node[1].op_type == 'Add' + + def test_fuse_add_bias_into_conv_squeeze_4d_bias_no_fuse(self): # type: () -> None + conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "A"], ["B"]) + graph = helper.make_graph( + [conv, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT, (16, 5, 3, 3)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (1, 16, 3, 3))], + [helper.make_tensor_value_info( + "B", TensorProto.FLOAT, (1, 16, 3, 3))] + ) + optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 2 + assert optimized_model.graph.node[0].op_type == 'Conv' + assert optimized_model.graph.node[1].op_type == 'Add' + + def test_fuse_matmul_add_bias_into_gemm(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "B"], ["A"]) + graph = helper.make_graph( + [matmul, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (32, 10)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (10, 16)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (16,))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Gemm" + + def test_fuse_matmul_add_bias_into_gemm_2d_bias(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "B"], ["A"]) + graph = helper.make_graph( + [matmul, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (32, 10)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (10, 16)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (1, 16))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Gemm" + + def test_fuse_matmul_add_bias_into_gemm_2d_bias_same_shape(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "B"], ["A"]) + graph = helper.make_graph( + [matmul, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (32, 10)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (10, 16)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (32, 16))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Gemm" + + def test_fuse_matmul_add_bias_into_gemm_2d_bias_bcast_no_fuse(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "B"], ["A"]) + graph = helper.make_graph( + [matmul, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 10)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (10, 16)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (16, 16))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 16))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert optimized_model.graph == graph + + def test_fuse_matmul_add_bias_into_gemm_3d_matmul_no_fuse(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "B"], ["A"]) + graph = helper.make_graph( + [matmul, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 4, 3)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (3, 3))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 3))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert optimized_model.graph == graph + + def test_fuse_matmul_add_bias_into_gemm_3d_bias_no_fuse(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + add = helper.make_node("Add", ["Z", "B"], ["A"]) + graph = helper.make_graph( + [matmul, add], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (32, 10)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (10, 16)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 1, 16))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert optimized_model.graph == graph + + def test_fuse_matmul_add_bias_into_gemm_multiple_use_no_fuse(self): # type: () -> None + matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) + identity = helper.make_node("Identity", ["Z"], ["A1"]) + add = helper.make_node("Add", ["Z", "B"], ["A2"]) + graph = helper.make_graph( + [matmul, add, identity], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (32, 10)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (10, 16)), + helper.make_tensor_value_info("B", TensorProto.FLOAT, (1, 16))], + [helper.make_tensor_value_info("A1", TensorProto.FLOAT, (32, 16)), + helper.make_tensor_value_info("A2", TensorProto.FLOAT, (32, 16))] + ) + optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_no_optional_value_opset10(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X"], + ["P"], + mode="constant", + pads=[0, 0, 0, 0, 0, 0, 1, 1] + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + ) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [0, 0, 1, 1] + + def test_fuse_pad_into_conv_no_optional_value(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads"], + ["P"], + mode="constant" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [0, 0, 1, 1] + + def test_fuse_pad_into_conv_with_optional_value(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads", "Constant_value"], + ["P"], + mode="constant" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Constant_value", TensorProto.FLOAT, ()), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), + raw=True), + helper.make_tensor("Constant_value", TensorProto.FLOAT, + dims=(), + vals=np.array([0]).astype(np.float32).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [0, 0, 1, 1] + + def test_fuse_pad_into_conv_with_nonzero_optional_value(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads", "Constant_value"], + ["P"], + mode="constant" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Constant_value", TensorProto.FLOAT, ()), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), + raw=True), + helper.make_tensor("Constant_value", TensorProto.FLOAT, + dims=(), + vals=np.array([25]).astype(np.float32).tobytes(), # non-zero Constant_value -> so no pad + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_1d_opset10(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X"], + ["P"], + mode="constant", + pads=[0, 0, 1, 0, 0, 1] + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 30)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 32))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1))] + ) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1] + + def test_fuse_pad_into_conv_1d(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads"], + ["P"], + mode="constant" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 30)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (6,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 32))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(6,), + vals=np.array([0, 0, 1, 0, 0, 1]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1] + + def test_fuse_pad_into_conv_existing_conv_pad_opset10(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X"], + ["P"], + mode="constant", + pads=[0, 0, 0, 0, 0, 0, 1, 1] + ) + conv = helper.make_node( + "Conv", + ["P", "Y"], + ["Z"], + pads=[1, 1, 0, 0] + ) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 4, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + ) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1, 1, 1] + + def test_fuse_pad_into_conv_existing_conv_pad(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads"], + ["P"], + mode="constant" + ) + conv = helper.make_node( + "Conv", + ["P", "Y"], + ["Z"], + pads=[1, 1, 0, 0] + ) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 4, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert len(list(optimized_model.graph.node)) == 1 + assert optimized_model.graph.node[0].op_type == "Conv" + assert optimized_model.graph.node[0].attribute[0].name == "pads" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1, 1, 1] + + def test_fuse_pad_into_conv_pad_feature_no_fuse_opset10(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X"], + ["P"], + mode="constant", + pads=[0, 1, 0, 0, 0, 0, 0, 0] + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 4, 3, 3)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + ) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_pad_feature_no_fuse(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads"], + ["P"], + mode="constant" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 4, 3, 3)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 1, 0, 0, 0, 0, 0, 0]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_negative_pad_no_fuse_opset10(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X"], + ["P"], + mode="constant", + pads=[0, 0, 0, 0, 0, 0, -1, -1] + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 4, 4)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + ) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_negative_pad_no_fuse(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads"], + ["P"], + mode="constant" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 4, 4)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, -1, -1]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_reflection_pad_no_fuse_opset10(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X"], + ["P"], + mode="reflect", + pads=[0, 0, 0, 0, 0, 0, 1, 1] + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + ) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + + assert optimized_model.graph == graph + + def test_fuse_pad_into_conv_reflection_pad_no_fuse(self): # type: () -> None + pad = helper.make_node( + "Pad", + ["X", "Pads"], + ["P"], + mode="reflect" + ) + conv = helper.make_node("Conv", ["P", "Y"], ["Z"]) + graph = helper.make_graph( + [pad, conv], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), + helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor("Pads", TensorProto.INT64, + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), + raw=True)]) + optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) + + assert optimized_model.graph == graph + + def test_fuse_consecutive_squeezes(self): # type: () -> None + nodes = [helper.make_node("Squeeze", ["X"], ["Y"], axes=[0, 4, 5]), + helper.make_node("Squeeze", ["Y"], ["Z"], axes=[0, 3])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Squeeze", ["_X"], ["_Y"], axes=[0, 4, 5]), + helper.make_node("Squeeze", ["_Y"], ["_Z2"], axes=[0, 3])], + [(TensorProto.FLOAT, (1, 1, 2, 3, 1, 1, 1, 1, 8, 9), "X")], + [(TensorProto.FLOAT, (2, 3, 1, 8, 9), "Z2")])) + + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (1, 1, 2, 3, 1, 1, 1, 1, 8, 9))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (2, 3, 1, 8, 9))]) + optimized_model = self._optimized(graph, ["fuse_consecutive_squeezes"]) + + # Squeeze, Constant (trip count), Constant (cond), Loop + assert optimized_model.graph.node[0].op_type == "Squeeze" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 1, 4, 5, 6] + assert len(list(optimized_model.graph.node)) == 4 + + def test_fuse_consecutive_squeezes_default(self): # type: () -> None + squeeze1 = helper.make_node("Squeeze", ["X"], ["Y"], axes=[0, 4, 5]) + squeeze2 = helper.make_node("Squeeze", ["Y"], ["Z"], axes=[0, 3]) + squeeze3 = helper.make_node("Squeeze", ["Z"], ["A"], axes=[2]) + nodes = [squeeze1, squeeze2, squeeze3] + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (1, 1, 2, 3, 1, 1, 1, 1, 8, 9))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 8, 9))]) + optimized_model = self._optimized(graph, ["fuse_consecutive_squeezes"]) + + assert optimized_model.graph.node[0].op_type == "Squeeze" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 1, 4, 5, 6, 7] + assert len(list(optimized_model.graph.node)) == 1 + + def test_fuse_consecutive_squeezes_random(self): # type: () -> None + x_shape = [1, 1, 1, 3, 4, 1, 6, 1, 1, 9] + s1_one_indices = [i for i, a in enumerate(x_shape) if a == 1] + s1_axes = np.random.choice(s1_one_indices, size=np.random.randint(low=1, high=len(s1_one_indices) - 1), + replace=False) + s2_x_shape = [a for i, a in enumerate(x_shape) if i not in s1_axes] + s2_one_indices = [i for i, a in enumerate(s2_x_shape) if a == 1] + s2_axes = s2_one_indices + + squeeze1 = helper.make_node("Squeeze", ["X"], ["Y"], axes=s1_axes) + squeeze2 = helper.make_node("Squeeze", ["Y"], ["Z"], axes=s2_axes) + nodes = [squeeze1, squeeze2] + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, x_shape)], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (3, 4, 6, 9))]) + optimized_model = self._optimized(graph, ["fuse_consecutive_squeezes"]) + + assert optimized_model.graph.node[0].op_type == "Squeeze" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 1, 2, 5, 7, 8] + assert len(list(optimized_model.graph.node)) == 1 + + def test_fuse_consecutive_squeezes_multi_uses(self): # type: () -> None + squeeze1 = helper.make_node("Squeeze", ["X"], ["Y"], axes=[0, 4, 5]) + add = helper.make_node("Add", ["Y", "A"], ["Z2"]) + squeeze2 = helper.make_node("Squeeze", ["Y"], ["Z"], axes=[0, 3]) + graph = helper.make_graph( + [squeeze1, add, squeeze2], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 1, 2, 3, 1, 1, 1, 1, 8, 9)), + helper.make_tensor_value_info("A", TensorProto.FLOAT, (1,))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (2, 3, 1, 8, 9)), + helper.make_tensor_value_info("Z2", TensorProto.FLOAT, (1, 2, 3, 1, 1, 8, 9))]) + optimized_model = self._optimized(graph, ["fuse_consecutive_squeezes"]) + + assert optimized_model.graph.node[0].op_type == "Squeeze" + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 4, 5] + assert optimized_model.graph.node[2].op_type == "Squeeze" + assert optimized_model.graph.node[2].input == ["X"] + assert list(optimized_model.graph.node[2].attribute[0].ints) == [ + 0, 1, 4, 5, 6] + assert len(list(optimized_model.graph.node)) == 3 + + def test_fuse_consecutive_softmax_log_axis(self): # type: () -> None + for axis in range(3): + softmax = helper.make_node("Softmax", ["X"], ["Y"], axis=axis) + log = helper.make_node("Log", ["Y"], ["Z"]) + graph = helper.make_graph( + [softmax, log], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_log_softmax"]) + + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.node[0].op_type == "LogSoftmax" + assert optimized_model.graph.node[0].attribute[0].name == "axis" + assert optimized_model.graph.node[0].attribute[0].i == axis + + def test_fuse_consecutive_softmax_log_side_effect(self): # type: () -> None + softmax = helper.make_node("Softmax", ["X"], ["Y"], axis=2) + log = helper.make_node("Log", ["Y"], ["Z"]) + graph = helper.make_graph( + [softmax, log], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11)), + helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_log_softmax"]) + + assert graph == optimized_model.graph + + def test_fuse_consecutive_softmax_log_multiple_out(self): # type: () -> None + softmax = helper.make_node("Softmax", ["X"], ["Y"], axis=2) + log = helper.make_node("Log", ["Y"], ["Z"]) + exp = helper.make_node("Exp", ["Z"], ["Z1"]) + graph = helper.make_graph( + [softmax, log, exp], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11)), + helper.make_tensor_value_info("Z1", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_log_softmax"]) + + assert len(optimized_model.graph.output) == 2 + assert len(optimized_model.graph.node) == 2 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.output[1].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[0].op_type == "LogSoftmax" + assert optimized_model.graph.node[0].attribute[0].name == "axis" + assert optimized_model.graph.node[0].attribute[0].i == 2 + assert optimized_model.graph.node[1].op_type == "Exp" + + def test_preserve_value_info(self): # type: () -> None + trans1 = helper.make_node("Transpose", ["X"], ["Y"], perm=[1, 0, 2]) + trans2 = helper.make_node("Transpose", ["Y"], ["Z"], perm=[2, 0, 1]) + trans3 = helper.make_node("Transpose", ["Z"], ["A"], perm=[2, 0, 1]) + graph = helper.make_graph( + [trans1, trans2, trans3], + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4))], + [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 4, 3))]) + vi = helper.make_tensor_value_info("Y", TensorProto.FLOAT, (3, 2, 4)) + graph.value_info.extend([vi]) + optimized_model = self._optimized(graph, ["nop"]) + assert list(optimized_model.graph.value_info) == [vi] + assert len(list(optimized_model.graph.node)) == 3 + + def test_split(self): # type: () -> None + node = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['X'], + value=onnx.helper.make_tensor( + name='X', + data_type=TensorProto.FLOAT, + dims=[1], + vals=[5], + ), + ) + graph = helper.make_graph( + [node], + 'test-optimize-split', + [], + [helper.make_tensor_value_info('X', TensorProto.FLOAT, (1,))]) + + init_model = self._optimized(graph, ['split_init']) + self.assertEqual(len(init_model.graph.node), 1) + self.assertEqual(len(init_model.graph.output), 1) + self.assertEqual(init_model.graph.node[0].op_type, 'Constant') + + predict_model = self._optimized(graph, ['split_predict']) + self.assertEqual(len(predict_model.graph.node), 0) + self.assertEqual(len(predict_model.graph.input), 1) + self.assertEqual(predict_model.graph.input[0].name, 'X') + + def test_lift_lex_loop(self): # type: () -> None + nodes = [helper.make_node("Identity", ["X"], ["Y"])] + nodes.extend(self._make_fake_loop_op( + [helper.make_node("Identity", ["X"], ["_Y2"]), + helper.make_node("Identity", ["Y"], ["_Y3"])], + [], + [(TensorProto.FLOAT, (5,), "Y2"), + (TensorProto.FLOAT, (5,), "Y3")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (5,))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Y2", TensorProto.FLOAT, (5,))]) + optimized_model = self._optimized(graph, ["lift_lexical_references"]) + assert len(optimized_model.graph.node) == 4 + # body_graph, __control_inputs + assert len(optimized_model.graph.node[3].attribute) == 2 + assert optimized_model.graph.node[3].attribute[1].name == "__control_inputs" + assert optimized_model.graph.node[3].attribute[1].strings[0] == b"X" + assert optimized_model.graph.node[3].attribute[1].strings[1] == b"Y" + + def test_lift_lex_if(self): # type: () -> None + nodes = [helper.make_node("Identity", ["X"], ["Y"])] + nodes.extend(self._make_fake_if_op( + [helper.make_node("Identity", ["X"], ["_Y2"]), + helper.make_node("Identity", ["Y"], ["_Y3"])], + [helper.make_node("Identity", ["X"], ["_Y2"]), + helper.make_node("Identity", ["X"], ["_Y3"])], + [(TensorProto.FLOAT, (5,), "Y2"), + (TensorProto.FLOAT, (5,), "Y3")])) + graph = helper.make_graph( + nodes, + "test", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (5,))], + [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (5,)), + helper.make_tensor_value_info("Y2", TensorProto.FLOAT, (5,))]) + # "If" node now diverges from ONNX schema. Disable checking. + optimized_model = self._optimized(graph, ["lift_lexical_references"]) + + # Identity, Constant (condition), If + assert len(optimized_model.graph.node) == 3 + # else_branch, then_branch, __control_inputs + assert len(optimized_model.graph.node[2].attribute) == 3 + assert optimized_model.graph.node[2].attribute[2].name == "__control_inputs" + assert optimized_model.graph.node[2].attribute[2].strings[0] == b"X" + assert optimized_model.graph.node[2].attribute[2].strings[1] == b"Y" + + def test_fuse_bn_into_conv_simple(self): # type: () -> None + for (tensor_type, np_type) in [(TensorProto.FLOAT, np.float32), (TensorProto.DOUBLE, np.float64)]: + conv = helper.make_node("Conv", ["X", "W", "B"], ["Y"]) + bn = helper.make_node("BatchNormalization", [ + "Y", "scale", "b", "mean", "var"], ["Z"]) + + W = np.random.randn(3, 2, 5, 5).astype(np_type) + 2 + B = np.random.randn(3,).astype(np_type) + 2 + scale = np.random.randn(3,).astype(np_type) + 2 + b = np.random.randn(3,).astype(np_type) + 2 + mean = np.random.randn(3,).astype(np_type) + 2 + var = np.abs(np.random.randn(3,).astype(np_type)) + 2 + + initializers = [ + helper.make_tensor(name, tensor_type, + npa.shape, npa.tobytes(), raw=True) + for name, npa in [('W', W), ('B', B), ('scale', scale), ('b', b), ('mean', mean), ('var', var)] + ] + graph = helper.make_graph( + [conv, bn], + "test", + [helper.make_tensor_value_info("X", tensor_type, (5, 2, 28, 28)), + helper.make_tensor_value_info("W", tensor_type, (3, 2, 5, 5)), + helper.make_tensor_value_info("B", tensor_type, (3,)), + helper.make_tensor_value_info("scale", tensor_type, (3,)), + helper.make_tensor_value_info("b", tensor_type, (3,)), + helper.make_tensor_value_info("mean", tensor_type, (3,)), + helper.make_tensor_value_info("var", tensor_type, (3,))], + [helper.make_tensor_value_info( + "Z", tensor_type, (5, 3, 24, 24))], + initializer=initializers, + value_info=[ + helper.make_tensor_value_info( + "Y", tensor_type, (5, 3, 24, 24)) + ] + ) + optimized_model = self._optimized(graph, ["fuse_bn_into_conv"]) + + self.assertEqual(len(optimized_model.graph.node), 1) + self.assertEqual(optimized_model.graph.node[0].op_type, 'Conv') + self.assertEqual(len(optimized_model.graph.initializer), 2) + new_W = numpy_helper.to_array(optimized_model.graph.initializer[0]) + new_b = numpy_helper.to_array(optimized_model.graph.initializer[1]) + + f = scale / np.sqrt(var + 1e-5) + np.testing.assert_almost_equal((B - mean) * f + b, new_b) + np.testing.assert_almost_equal( + W * f[:, np.newaxis, np.newaxis, np.newaxis], new_W) + + def _internal_test_deadend_elimination(self, fixed): # type: (bool) -> None + softmax = helper.make_node("Softmax", ["X"], ["Y"], axis=2) + log = helper.make_node("Log", ["Y"], ["Z"]) + exp = helper.make_node("Exp", ["Z"], ["Z1"]) + exp1 = helper.make_node("Log", ["Z"], ["Z2"]) + exp2 = helper.make_node("Sqrt", ["Z1"], ["Z3"]) + graph = helper.make_graph( + [softmax, log, exp, exp1, exp2], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_deadend"], fixed) + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 2 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[0].op_type == "Softmax" + assert optimized_model.graph.node[0].attribute[0].name == "axis" + assert optimized_model.graph.node[0].attribute[0].i == 2 + assert optimized_model.graph.node[1].op_type == "Log" + + def test_deadend_elimination_simple(self): # type: () -> None + self._internal_test_deadend_elimination(False) + + def test_deadend_elimination_simple_fixed(self): # type: () -> None + self._internal_test_deadend_elimination(True) + + def test_eliminate_nop_monotone_argmax_basic_no_node_axis(self): # type: () -> None + for node_name in ["Log", "Exp", "Sqrt"]: + for axis in range(3): + node = helper.make_node(node_name, ["X"], ["Y"]) + argmax = helper.make_node("ArgMax", ["Y"], ["Z"], axis=axis) + graph = helper.make_graph( + [node, argmax], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_monotone_argmax"]) + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[0].op_type == "ArgMax" + assert optimized_model.graph.node[0].attribute[0].name == "axis" + assert optimized_model.graph.node[0].attribute[0].i == axis + + def test_eliminate_nop_monotone_argmax_basic_with_node_axis(self): # type: () -> None + for node_name in ["Softmax", "LogSoftmax"]: + for axis_n in range(3): + for axis_max in range(3): + node = helper.make_node(node_name, ["X"], ["Y"], axis=axis_n) + argmax = helper.make_node("ArgMax", ["Y"], ["Z"], axis=axis_max) + graph = helper.make_graph( + [node, argmax], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_monotone_argmax"]) + if axis_max == axis_n: + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[0].op_type == "ArgMax" + assert optimized_model.graph.node[0].attribute[0].name == "axis" + assert optimized_model.graph.node[0].attribute[0].i == axis_max + else: + assert optimized_model.graph == graph + + def test_eliminate_nop_monotone_argmax_multiple_out(self): # type: () -> None + for node_name in ["Log", "Exp", "Sqrt"]: + for axis in range(3): + node = helper.make_node(node_name, ["X"], ["Y"]) + node2 = helper.make_node(node_name, ["Y"], ["Z1"]) + argmax = helper.make_node("ArgMax", ["Y"], ["Z"], axis=axis) + graph = helper.make_graph( + [node, node2, argmax], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11)), + helper.make_tensor_value_info("Z1", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_monotone_argmax"]) + assert optimized_model.graph == graph + + def test_eliminate_nop_monotone_argmax_consecutive(self): # type: () -> None + def _assertion(graph, optimized_model, axis_aligned, true_axis): # type: (GraphProto, ModelProto, bool, int) -> None + if axis_aligned: + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[0].op_type == "ArgMax" + assert optimized_model.graph.node[0].attribute[0].name == "axis" + assert optimized_model.graph.node[0].attribute[0].i == true_axis + else: + assert optimized_model.graph == graph + # no axis X no axis test + for node_name_0 in ["Log", "Exp", "Sqrt"]: + for node_name_1 in ["Log", "Exp", "Sqrt"]: + for axis in range(3): + node = helper.make_node(node_name_0, ["X"], ["Y"]) + node2 = helper.make_node(node_name_1, ["Y"], ["Y1"]) + argmax = helper.make_node("ArgMax", ["Y1"], ["Z"], axis=axis) + graph = helper.make_graph( + [node, node2, argmax], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_monotone_argmax"], True) + _assertion(graph, optimized_model, True, axis) + # no axis X axis test + for node_name_0 in ["Log", "Exp", "Sqrt"]: + for node_name_1 in ["Softmax", "LogSoftmax"]: + for axis_0 in range(3): + for axis_1 in range(3): + node = helper.make_node(node_name_0, ["X"], ["Y"]) + node2 = helper.make_node(node_name_1, ["Y"], ["Y1"], axis=axis_0) + argmax = helper.make_node("ArgMax", ["Y1"], ["Z"], axis=axis_1) + graph = helper.make_graph( + [node, node2, argmax], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_monotone_argmax"], True) + _assertion(graph, optimized_model, axis_0 == axis_1, axis_1) + # axis X axis test + for node_name_0 in ["Softmax", "LogSoftmax"]: + for node_name_1 in ["Softmax", "LogSoftmax"]: + for axis_0 in range(3): + for axis_1 in range(3): + for axis_2 in range(3): + node = helper.make_node(node_name_0, ["X"], ["Y"], axis=axis_0) + node2 = helper.make_node(node_name_1, ["Y"], ["Y1"], axis=axis_1) + argmax = helper.make_node("ArgMax", ["Y1"], ["Z"], axis=axis_2) + graph = helper.make_graph( + [node, node2, argmax], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7, 11))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_monotone_argmax"], True) + if axis_0 == axis_1: # we can reduce both of the monotonic ops + _assertion(graph, optimized_model, axis_1 == axis_2, axis_2) + elif axis_1 == axis_2: # we can reduce one of the monotonic ops + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 2 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[-1].op_type == "ArgMax" + assert optimized_model.graph.node[-1].attribute[0].name == "axis" + assert optimized_model.graph.node[-1].attribute[0].i == axis_2 + else: # we can't reduce anything + assert optimized_model.graph == graph + + def test_eliminate_nop_dropout(self): # type: () -> None + node = helper.make_node("Dropout", ["X"], ["Y"]) + node1 = helper.make_node("Log", ["Y"], ["Z"]) + graph = helper.make_graph( + [node, node1], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_dropout"], False) + + # we don't want to eliminate the dropoutin opset 12, + # even when it';s an optional parameter (defaults to 0) + assert optimized_model.graph == graph + + def test_eliminate_nop_dropout_opset11_graph_output(self): # type: () -> None + node = helper.make_node("Log", ["X"], ["Y"]) + node1 = helper.make_node("Dropout", ["Y"], ["Z"], ratio=0.0) + graph = helper.make_graph( + [node, node1], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_dropout"], False, opset_imports=[helper.make_opsetid("", 11)]) + + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.node[0].op_type == "Log" + + def test_eliminate_nop_dropout_opset11(self): # type: () -> None + for ratio in [0.0, 0.5]: + node = helper.make_node("Dropout", ["X"], ["Y"], ratio=ratio) + node1 = helper.make_node("Log", ["Y"], ["Z"]) + graph = helper.make_graph( + [node, node1], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, (5, 7))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7))]) + optimized_model = self._optimized( + graph, ["eliminate_nop_dropout"], False, opset_imports=[helper.make_opsetid("", 11)]) + + if ratio > 0.0: + assert optimized_model.graph == graph + else: + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.node[0].op_type == "Log" + + def test_fuse_reduction_unsqueeze(self): # type: () -> None + def _calculate_post_transform_shape(input_shape, reduction_axes, unsqueeze_axes, keepdim): # type: (Tuple[int, ...], List[int], List[int], bool) -> Tuple[int, ...] + post_reduce_shape = None + if keepdim: + post_reduce_shape = tuple([(x if i not in reduction_axes else 1) for i, x in enumerate(input_shape)]) + else: + post_reduce_shape = tuple([x for i, x in enumerate(input_shape) if i not in reduction_axes]) + post_unsqueeze_shape = list(post_reduce_shape) + for ax in unsqueeze_axes: + post_unsqueeze_shape.insert(ax, 1) + return tuple(post_unsqueeze_shape) + + for reduction in ["ReduceL1", "ReduceL2", "ReduceLogSum", + "ReduceLogSumExp", "ReduceMax", "ReduceMean", + "ReduceMin", "ReduceProd", "ReduceSum", "ReduceSumSquare"]: + for axes1 in [[1], [1, 2], [2]]: + for axes2 in [[1], [1, 2], [2]]: + for keepdim in [False, True]: + input_shape = (5, 7, 9) + output_shape = _calculate_post_transform_shape(input_shape, axes1, axes2, keepdim) # type: Tuple[int, ...] + node = helper.make_node(reduction, ["X"], ["Y"], axes=axes1, keepdims=keepdim) + node1 = helper.make_node("Unsqueeze", ["Y"], ["Z"], axes=axes2) + graph = helper.make_graph( + [node, node1], + "test", + [helper.make_tensor_value_info( + "X", TensorProto.FLOAT, input_shape)], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, output_shape)]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_reduce_unsqueeze"], False) + + if keepdim or axes1 != axes2: + assert optimized_model.graph == graph + else: + assert len(optimized_model.graph.output) == 1 + assert len(optimized_model.graph.node) == 1 + assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT + assert optimized_model.graph.node[-1].op_type == reduction + assert optimized_model.graph.node[-1].attribute[0].name == "axes" + assert optimized_model.graph.node[-1].attribute[0].ints == axes1 + optimized_output_shape = tuple(x.dim_value for x in optimized_model.graph.output[0].type.tensor_type.shape.dim) + assert optimized_output_shape == output_shape + + +if __name__ == '__main__': + unittest.main() + From 98d2156bca9202cc704b05b1c2bd1762da0380c5 Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 15:49:39 +0800 Subject: [PATCH 04/14] update .gitignore --- .gitignore | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 512a0b353..13bc1fbe0 100644 --- a/.gitignore +++ b/.gitignore @@ -63,23 +63,16 @@ build_* # setup.py intermediates .eggs dist -onnx_opt.egg-info +*.egg-info *.ninja .ninja_deps .ninja_log compile_commands.json # generated files -onnx/version.py +onnx_opt/version.py compile_commands.json -# test generated files -.cache -.coverage -onnx/examples/.coverage.nbval -.pytest_cache -test_report - # autocomplete .ycm_extra_conf.py From 3dd943b358ee3fba5aaddca3daee1d7ce79bd2a8 Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 16:06:45 +0800 Subject: [PATCH 05/14] update setup.py --- setup.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index 720b29389..2d18a8b54 100644 --- a/setup.py +++ b/setup.py @@ -293,11 +293,7 @@ def run(self): packages = setuptools.find_packages() install_requires.extend([ - 'protobuf', - 'numpy', - 'six', - 'typing>=3.6.4; python_version < "3.5"', - 'typing-extensions>=3.6.2.1', + 'onnx' ]) ################################################################################ @@ -330,15 +326,8 @@ def run(self): setup_requires=setup_requires, tests_require=tests_require, extras_require=extras_require, - author='ONNX', + author='ONNX Optimizer Authors', author_email='onnx-technical-discuss@lists.lfai.foundation', - url='https://github.com/onnx/onnx', - entry_points={ - 'console_scripts': [ - 'check-model = onnx.bin.checker:check_model', - 'check-node = onnx.bin.checker:check_node', - 'backend-test-tools = onnx.backend.test.cmd_tools:main', - ] - }, + url='https://github.com/onnx/optimizer', ) From 2f94057efc6aa97d74b0c0f15d9f0ef92ec67d4a Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 16:26:03 +0800 Subject: [PATCH 06/14] fix undefined ONNX_OPTIMIZER_VERSION --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a4abbf3f..2a832a846 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,9 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(ONNX_ROOT ${PROJECT_SOURCE_DIR}/third_party/onnx) add_subdirectory(${ONNX_ROOT}) +file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ONNX_OPTIMIZER_VERSION) +string(STRIP "${ONNX_OPTIMIZER_VERSION}" ONNX_OPTIMIZER_VERSION) + file(GLOB_RECURSE onnx_opt_srcs "onnx_opt/*.cc" "onnx_opt/*.h" ) From d1882615a84b46d69da1b0693422c41bbf5a73bc Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 16:28:32 +0800 Subject: [PATCH 07/14] fix undefined ONNX_OPTIMIZER_INCLUDE_DIR --- cmake/ONNXOptimizerConfig.cmake.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ONNXOptimizerConfig.cmake.in b/cmake/ONNXOptimizerConfig.cmake.in index 85c56549e..72dcc88d6 100644 --- a/cmake/ONNXOptimizerConfig.cmake.in +++ b/cmake/ONNXOptimizerConfig.cmake.in @@ -20,5 +20,5 @@ get_filename_component( CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component( _INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) -set(ONNX_INCLUDE_DIRS "${_INSTALL_PREFIX}/include") +set(ONNX_OPTIMIZER_INCLUDE_DIRS "${_INSTALL_PREFIX}/include") From 50997e6445610f7c11cbb26e310c1b7a5d600f2a Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 16:31:42 +0800 Subject: [PATCH 08/14] refine include order --- onnx_opt/cpp2py_export.cc | 2 +- onnx_opt/optimize.h | 3 ++- onnx_opt/pass.cc | 3 ++- onnx_opt/pass_registry.h | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/onnx_opt/cpp2py_export.cc b/onnx_opt/cpp2py_export.cc index 6393baa86..9cebf841a 100644 --- a/onnx_opt/cpp2py_export.cc +++ b/onnx_opt/cpp2py_export.cc @@ -1,8 +1,8 @@ #include #include +#include "onnx/py_utils.h" #include "onnx_opt/optimize.h" -#include "onnx/py_utils.h" namespace ONNX_NAMESPACE { namespace py = pybind11; diff --git a/onnx_opt/optimize.h b/onnx_opt/optimize.h index 459beb337..e579ba911 100644 --- a/onnx_opt/optimize.h +++ b/onnx_opt/optimize.h @@ -6,9 +6,10 @@ #include "onnx/common/ir.h" #include "onnx/common/ir_pb_converter.h" #include "onnx/common/stl_backports.h" +#include "onnx/proto_utils.h" + #include "onnx_opt/pass_manager.h" #include "onnx_opt/pass_registry.h" -#include "onnx/proto_utils.h" #include "vector" diff --git a/onnx_opt/pass.cc b/onnx_opt/pass.cc index 27ac67062..88ac1ba90 100644 --- a/onnx_opt/pass.cc +++ b/onnx_opt/pass.cc @@ -1,6 +1,7 @@ -#include "onnx_opt/pass.h" #include "onnx/common/assertions.h" +#include "onnx_opt/pass.h" + namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_registry.h b/onnx_opt/pass_registry.h index 5c4350359..5168f2e70 100644 --- a/onnx_opt/pass_registry.h +++ b/onnx_opt/pass_registry.h @@ -6,6 +6,8 @@ #include "onnx/common/ir.h" #include "onnx/common/ir_pb_converter.h" #include "onnx/common/stl_backports.h" +#include "onnx/proto_utils.h" + #include "onnx_opt/passes/eliminate_deadend.h" #include "onnx_opt/passes/eliminate_identity.h" #include "onnx_opt/passes/eliminate_nop_dropout.h" @@ -27,7 +29,6 @@ #include "onnx_opt/passes/lift_lexical_references.h" #include "onnx_opt/passes/nop.h" #include "onnx_opt/passes/split.h" -#include "onnx/proto_utils.h" #include #include From b4950ec5b99e299c251117c42ad5084a5300c2ba Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 23 Aug 2020 18:30:47 +0800 Subject: [PATCH 09/14] rename onnx_opt to onnxoptimizer --- .gitignore | 2 +- CMakeLists.txt | 10 ++--- {onnx_opt => onnxoptimizer}/__init__.py | 2 +- {onnx_opt => onnxoptimizer}/cpp2py_export.cc | 2 +- {onnx_opt => onnxoptimizer}/optimize.cc | 2 +- {onnx_opt => onnxoptimizer}/optimize.h | 4 +- {onnx_opt => onnxoptimizer}/pass.cc | 2 +- {onnx_opt => onnxoptimizer}/pass.h | 0 {onnx_opt => onnxoptimizer}/pass_manager.cc | 2 +- {onnx_opt => onnxoptimizer}/pass_manager.h | 4 +- {onnx_opt => onnxoptimizer}/pass_registry.cc | 2 +- {onnx_opt => onnxoptimizer}/pass_registry.h | 42 +++++++++---------- .../passes/eliminate_deadend.h | 2 +- .../passes/eliminate_identity.h | 2 +- .../passes/eliminate_nop_dropout.h | 2 +- .../passes/eliminate_nop_monotone_argmax.h | 2 +- .../passes/eliminate_nop_pad.h | 2 +- .../passes/eliminate_nop_transpose.h | 2 +- .../passes/eliminate_unused_initializer.h | 2 +- .../passes/extract_constant_to_initializer.h | 2 +- .../passes/fuse_add_bias_into_conv.h | 2 +- .../passes/fuse_bn_into_conv.h | 2 +- .../passes/fuse_consecutive_concats.h | 2 +- .../passes/fuse_consecutive_log_softmax.h | 2 +- .../fuse_consecutive_reduce_unsqueeze.h | 2 +- .../passes/fuse_consecutive_squeezes.h | 2 +- .../passes/fuse_consecutive_transposes.h | 2 +- .../passes/fuse_matmul_add_bias_into_gemm.h | 2 +- .../passes/fuse_pad_into_conv.h | 2 +- .../passes/fuse_transpose_into_gemm.h | 2 +- .../passes/lift_lexical_references.h | 2 +- {onnx_opt => onnxoptimizer}/passes/nop.h | 2 +- {onnx_opt => onnxoptimizer}/passes/split.h | 2 +- .../test/optimizer_test.py | 6 +-- setup.py | 12 +++--- 35 files changed, 67 insertions(+), 67 deletions(-) rename {onnx_opt => onnxoptimizer}/__init__.py (97%) rename {onnx_opt => onnxoptimizer}/cpp2py_export.cc (96%) rename {onnx_opt => onnxoptimizer}/optimize.cc (97%) rename {onnx_opt => onnxoptimizer}/optimize.h (94%) rename {onnx_opt => onnxoptimizer}/pass.cc (98%) rename {onnx_opt => onnxoptimizer}/pass.h (100%) rename {onnx_opt => onnxoptimizer}/pass_manager.cc (97%) rename {onnx_opt => onnxoptimizer}/pass_manager.h (94%) rename {onnx_opt => onnxoptimizer}/pass_registry.cc (91%) rename {onnx_opt => onnxoptimizer}/pass_registry.h (64%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_deadend.h (97%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_identity.h (96%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_nop_dropout.h (97%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_nop_monotone_argmax.h (98%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_nop_pad.h (98%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_nop_transpose.h (97%) rename {onnx_opt => onnxoptimizer}/passes/eliminate_unused_initializer.h (98%) rename {onnx_opt => onnxoptimizer}/passes/extract_constant_to_initializer.h (97%) rename {onnx_opt => onnxoptimizer}/passes/fuse_add_bias_into_conv.h (99%) rename {onnx_opt => onnxoptimizer}/passes/fuse_bn_into_conv.h (99%) rename {onnx_opt => onnxoptimizer}/passes/fuse_consecutive_concats.h (98%) rename {onnx_opt => onnxoptimizer}/passes/fuse_consecutive_log_softmax.h (97%) rename {onnx_opt => onnxoptimizer}/passes/fuse_consecutive_reduce_unsqueeze.h (98%) rename {onnx_opt => onnxoptimizer}/passes/fuse_consecutive_squeezes.h (98%) rename {onnx_opt => onnxoptimizer}/passes/fuse_consecutive_transposes.h (98%) rename {onnx_opt => onnxoptimizer}/passes/fuse_matmul_add_bias_into_gemm.h (99%) rename {onnx_opt => onnxoptimizer}/passes/fuse_pad_into_conv.h (99%) rename {onnx_opt => onnxoptimizer}/passes/fuse_transpose_into_gemm.h (97%) rename {onnx_opt => onnxoptimizer}/passes/lift_lexical_references.h (99%) rename {onnx_opt => onnxoptimizer}/passes/nop.h (95%) rename {onnx_opt => onnxoptimizer}/passes/split.h (99%) rename {onnx_opt => onnxoptimizer}/test/optimizer_test.py (99%) diff --git a/.gitignore b/.gitignore index 13bc1fbe0..2435431ae 100644 --- a/.gitignore +++ b/.gitignore @@ -70,7 +70,7 @@ dist compile_commands.json # generated files -onnx_opt/version.py +onnxoptimizer/version.py compile_commands.json # autocomplete diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a832a846..ef6fcc930 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ add_subdirectory(${ONNX_ROOT}) file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ONNX_OPTIMIZER_VERSION) string(STRIP "${ONNX_OPTIMIZER_VERSION}" ONNX_OPTIMIZER_VERSION) -file(GLOB_RECURSE onnx_opt_srcs "onnx_opt/*.cc" - "onnx_opt/*.h" +file(GLOB_RECURSE onnx_opt_srcs "onnxoptimizer/*.cc" + "onnxoptimizer/*.h" ) -list(REMOVE_ITEM onnx_opt_srcs "${PROJECT_SOURCE_DIR}/onnx_opt/cpp2py_export.cc") +list(REMOVE_ITEM onnx_opt_srcs "${PROJECT_SOURCE_DIR}/onnxoptimizer/cpp2py_export.cc") add_library(onnx_optimizer ${onnx_opt_srcs}) target_link_libraries(onnx_optimizer PUBLIC onnx) @@ -31,7 +31,7 @@ if(BUILD_ONNX_PYTHON) endif() endif() - add_library(onnx_opt_cpp2py_export MODULE "onnx_opt/cpp2py_export.cc") + add_library(onnx_opt_cpp2py_export MODULE "onnxoptimizer/cpp2py_export.cc") set_target_properties(onnx_opt_cpp2py_export PROPERTIES PREFIX "") set_target_properties(onnx_opt_cpp2py_export PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") @@ -116,7 +116,7 @@ endif() include(GNUInstallDirs) -install(DIRECTORY ${PROJECT_SOURCE_DIR}/onnx_opt +install(DIRECTORY ${PROJECT_SOURCE_DIR}/onnxoptimizer DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") diff --git a/onnx_opt/__init__.py b/onnxoptimizer/__init__.py similarity index 97% rename from onnx_opt/__init__.py rename to onnxoptimizer/__init__.py index c0c044827..ec8429605 100644 --- a/onnx_opt/__init__.py +++ b/onnxoptimizer/__init__.py @@ -11,7 +11,7 @@ from __future__ import unicode_literals import onnx -import onnx_opt.onnx_opt_cpp2py_export as C +import onnxoptimizer.onnx_opt_cpp2py_export as C from onnx import ModelProto from typing import Text, Sequence, Optional diff --git a/onnx_opt/cpp2py_export.cc b/onnxoptimizer/cpp2py_export.cc similarity index 96% rename from onnx_opt/cpp2py_export.cc rename to onnxoptimizer/cpp2py_export.cc index 9cebf841a..2f92222db 100644 --- a/onnx_opt/cpp2py_export.cc +++ b/onnxoptimizer/cpp2py_export.cc @@ -2,7 +2,7 @@ #include #include "onnx/py_utils.h" -#include "onnx_opt/optimize.h" +#include "onnxoptimizer/optimize.h" namespace ONNX_NAMESPACE { namespace py = pybind11; diff --git a/onnx_opt/optimize.cc b/onnxoptimizer/optimize.cc similarity index 97% rename from onnx_opt/optimize.cc rename to onnxoptimizer/optimize.cc index 7b27aebb8..7ba4bf3fb 100644 --- a/onnx_opt/optimize.cc +++ b/onnxoptimizer/optimize.cc @@ -1,7 +1,7 @@ // ATTENTION: The code in this file is highly EXPERIMENTAL. // Adventurous users should note that the APIs will probably change. -#include "onnx_opt/optimize.h" +#include "onnxoptimizer/optimize.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/optimize.h b/onnxoptimizer/optimize.h similarity index 94% rename from onnx_opt/optimize.h rename to onnxoptimizer/optimize.h index e579ba911..aefd03f2d 100644 --- a/onnx_opt/optimize.h +++ b/onnxoptimizer/optimize.h @@ -8,8 +8,8 @@ #include "onnx/common/stl_backports.h" #include "onnx/proto_utils.h" -#include "onnx_opt/pass_manager.h" -#include "onnx_opt/pass_registry.h" +#include "onnxoptimizer/pass_manager.h" +#include "onnxoptimizer/pass_registry.h" #include "vector" diff --git a/onnx_opt/pass.cc b/onnxoptimizer/pass.cc similarity index 98% rename from onnx_opt/pass.cc rename to onnxoptimizer/pass.cc index 88ac1ba90..ece7bde3a 100644 --- a/onnx_opt/pass.cc +++ b/onnxoptimizer/pass.cc @@ -1,6 +1,6 @@ #include "onnx/common/assertions.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass.h b/onnxoptimizer/pass.h similarity index 100% rename from onnx_opt/pass.h rename to onnxoptimizer/pass.h diff --git a/onnx_opt/pass_manager.cc b/onnxoptimizer/pass_manager.cc similarity index 97% rename from onnx_opt/pass_manager.cc rename to onnxoptimizer/pass_manager.cc index 0c9cae0a9..2ea5f65d4 100644 --- a/onnx_opt/pass_manager.cc +++ b/onnxoptimizer/pass_manager.cc @@ -1,4 +1,4 @@ -#include "onnx_opt/pass_manager.h" +#include "onnxoptimizer/pass_manager.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_manager.h b/onnxoptimizer/pass_manager.h similarity index 94% rename from onnx_opt/pass_manager.h rename to onnxoptimizer/pass_manager.h index 44a3899b4..7f925acd3 100644 --- a/onnx_opt/pass_manager.h +++ b/onnxoptimizer/pass_manager.h @@ -3,8 +3,8 @@ // Adventurous users should note that the APIs will probably change. #include -#include "onnx_opt/pass.h" -#include "onnx_opt/passes/eliminate_deadend.h" +#include "onnxoptimizer/pass.h" +#include "onnxoptimizer/passes/eliminate_deadend.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_registry.cc b/onnxoptimizer/pass_registry.cc similarity index 91% rename from onnx_opt/pass_registry.cc rename to onnxoptimizer/pass_registry.cc index b1c6cc69d..a3d3fd257 100644 --- a/onnx_opt/pass_registry.cc +++ b/onnxoptimizer/pass_registry.cc @@ -1,7 +1,7 @@ // ATTENTION: The code in this file is highly EXPERIMENTAL. // Adventurous users should note that the APIs will probably change. -#include "onnx_opt/pass_registry.h" +#include "onnxoptimizer/pass_registry.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/pass_registry.h b/onnxoptimizer/pass_registry.h similarity index 64% rename from onnx_opt/pass_registry.h rename to onnxoptimizer/pass_registry.h index 5168f2e70..12e274d77 100644 --- a/onnx_opt/pass_registry.h +++ b/onnxoptimizer/pass_registry.h @@ -8,27 +8,27 @@ #include "onnx/common/stl_backports.h" #include "onnx/proto_utils.h" -#include "onnx_opt/passes/eliminate_deadend.h" -#include "onnx_opt/passes/eliminate_identity.h" -#include "onnx_opt/passes/eliminate_nop_dropout.h" -#include "onnx_opt/passes/eliminate_nop_monotone_argmax.h" -#include "onnx_opt/passes/eliminate_nop_pad.h" -#include "onnx_opt/passes/eliminate_nop_transpose.h" -#include "onnx_opt/passes/eliminate_unused_initializer.h" -#include "onnx_opt/passes/extract_constant_to_initializer.h" -#include "onnx_opt/passes/fuse_add_bias_into_conv.h" -#include "onnx_opt/passes/fuse_bn_into_conv.h" -#include "onnx_opt/passes/fuse_consecutive_concats.h" -#include "onnx_opt/passes/fuse_consecutive_log_softmax.h" -#include "onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h" -#include "onnx_opt/passes/fuse_consecutive_squeezes.h" -#include "onnx_opt/passes/fuse_consecutive_transposes.h" -#include "onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h" -#include "onnx_opt/passes/fuse_pad_into_conv.h" -#include "onnx_opt/passes/fuse_transpose_into_gemm.h" -#include "onnx_opt/passes/lift_lexical_references.h" -#include "onnx_opt/passes/nop.h" -#include "onnx_opt/passes/split.h" +#include "onnxoptimizer/passes/eliminate_deadend.h" +#include "onnxoptimizer/passes/eliminate_identity.h" +#include "onnxoptimizer/passes/eliminate_nop_dropout.h" +#include "onnxoptimizer/passes/eliminate_nop_monotone_argmax.h" +#include "onnxoptimizer/passes/eliminate_nop_pad.h" +#include "onnxoptimizer/passes/eliminate_nop_transpose.h" +#include "onnxoptimizer/passes/eliminate_unused_initializer.h" +#include "onnxoptimizer/passes/extract_constant_to_initializer.h" +#include "onnxoptimizer/passes/fuse_add_bias_into_conv.h" +#include "onnxoptimizer/passes/fuse_bn_into_conv.h" +#include "onnxoptimizer/passes/fuse_consecutive_concats.h" +#include "onnxoptimizer/passes/fuse_consecutive_log_softmax.h" +#include "onnxoptimizer/passes/fuse_consecutive_reduce_unsqueeze.h" +#include "onnxoptimizer/passes/fuse_consecutive_squeezes.h" +#include "onnxoptimizer/passes/fuse_consecutive_transposes.h" +#include "onnxoptimizer/passes/fuse_matmul_add_bias_into_gemm.h" +#include "onnxoptimizer/passes/fuse_pad_into_conv.h" +#include "onnxoptimizer/passes/fuse_transpose_into_gemm.h" +#include "onnxoptimizer/passes/lift_lexical_references.h" +#include "onnxoptimizer/passes/nop.h" +#include "onnxoptimizer/passes/split.h" #include #include diff --git a/onnx_opt/passes/eliminate_deadend.h b/onnxoptimizer/passes/eliminate_deadend.h similarity index 97% rename from onnx_opt/passes/eliminate_deadend.h rename to onnxoptimizer/passes/eliminate_deadend.h index db121d694..ea6a9f767 100644 --- a/onnx_opt/passes/eliminate_deadend.h +++ b/onnxoptimizer/passes/eliminate_deadend.h @@ -2,7 +2,7 @@ // ATTENTION: The code in this file is highly EXPERIMENTAL. // Adventurous users should note that the APIs will probably change. #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { struct EliminateDeadEnd final : public FullGraphBasedPass { diff --git a/onnx_opt/passes/eliminate_identity.h b/onnxoptimizer/passes/eliminate_identity.h similarity index 96% rename from onnx_opt/passes/eliminate_identity.h rename to onnxoptimizer/passes/eliminate_identity.h index 4fae9fc19..a0e75ee78 100644 --- a/onnx_opt/passes/eliminate_identity.h +++ b/onnxoptimizer/passes/eliminate_identity.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_dropout.h b/onnxoptimizer/passes/eliminate_nop_dropout.h similarity index 97% rename from onnx_opt/passes/eliminate_nop_dropout.h rename to onnxoptimizer/passes/eliminate_nop_dropout.h index 94ffb9fa3..44132de96 100644 --- a/onnx_opt/passes/eliminate_nop_dropout.h +++ b/onnxoptimizer/passes/eliminate_nop_dropout.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_monotone_argmax.h b/onnxoptimizer/passes/eliminate_nop_monotone_argmax.h similarity index 98% rename from onnx_opt/passes/eliminate_nop_monotone_argmax.h rename to onnxoptimizer/passes/eliminate_nop_monotone_argmax.h index 0e3334225..9bca9048b 100644 --- a/onnx_opt/passes/eliminate_nop_monotone_argmax.h +++ b/onnxoptimizer/passes/eliminate_nop_monotone_argmax.h @@ -2,7 +2,7 @@ // Adventurous users should note that the APIs will probably change. #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_pad.h b/onnxoptimizer/passes/eliminate_nop_pad.h similarity index 98% rename from onnx_opt/passes/eliminate_nop_pad.h rename to onnxoptimizer/passes/eliminate_nop_pad.h index 04441f983..3c74e4a96 100644 --- a/onnx_opt/passes/eliminate_nop_pad.h +++ b/onnxoptimizer/passes/eliminate_nop_pad.h @@ -4,7 +4,7 @@ #pragma once #include "onnx/defs/tensor_util.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_nop_transpose.h b/onnxoptimizer/passes/eliminate_nop_transpose.h similarity index 97% rename from onnx_opt/passes/eliminate_nop_transpose.h rename to onnxoptimizer/passes/eliminate_nop_transpose.h index daad9c8d9..230d07005 100644 --- a/onnx_opt/passes/eliminate_nop_transpose.h +++ b/onnxoptimizer/passes/eliminate_nop_transpose.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/eliminate_unused_initializer.h b/onnxoptimizer/passes/eliminate_unused_initializer.h similarity index 98% rename from onnx_opt/passes/eliminate_unused_initializer.h rename to onnxoptimizer/passes/eliminate_unused_initializer.h index 592dc1cec..aea24201b 100644 --- a/onnx_opt/passes/eliminate_unused_initializer.h +++ b/onnxoptimizer/passes/eliminate_unused_initializer.h @@ -14,7 +14,7 @@ // condition 1: A is not used as any node's input // condition 2: A is not an output -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/extract_constant_to_initializer.h b/onnxoptimizer/passes/extract_constant_to_initializer.h similarity index 97% rename from onnx_opt/passes/extract_constant_to_initializer.h rename to onnxoptimizer/passes/extract_constant_to_initializer.h index 5fafe85a6..6c6607e28 100644 --- a/onnx_opt/passes/extract_constant_to_initializer.h +++ b/onnxoptimizer/passes/extract_constant_to_initializer.h @@ -11,7 +11,7 @@ // this pass can handle the case satisfy all following conditions: // condition 1: A is the output of a Constant node #include "onnx/common/assertions.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_add_bias_into_conv.h b/onnxoptimizer/passes/fuse_add_bias_into_conv.h similarity index 99% rename from onnx_opt/passes/fuse_add_bias_into_conv.h rename to onnxoptimizer/passes/fuse_add_bias_into_conv.h index 22a8adb9f..749916aa8 100644 --- a/onnx_opt/passes/fuse_add_bias_into_conv.h +++ b/onnxoptimizer/passes/fuse_add_bias_into_conv.h @@ -16,7 +16,7 @@ #include #include "onnx/common/assertions.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_bn_into_conv.h b/onnxoptimizer/passes/fuse_bn_into_conv.h similarity index 99% rename from onnx_opt/passes/fuse_bn_into_conv.h rename to onnxoptimizer/passes/fuse_bn_into_conv.h index 697f9cbd7..564b26c37 100644 --- a/onnx_opt/passes/fuse_bn_into_conv.h +++ b/onnxoptimizer/passes/fuse_bn_into_conv.h @@ -29,7 +29,7 @@ // $$ b' = (b_{conv} - m)\frac{s}{\sqrt{\sigma + \epsilon}} + b_{bn}$$ #include "onnx/common/assertions.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_concats.h b/onnxoptimizer/passes/fuse_consecutive_concats.h similarity index 98% rename from onnx_opt/passes/fuse_consecutive_concats.h rename to onnxoptimizer/passes/fuse_consecutive_concats.h index 9e42c7530..e9edb8a81 100644 --- a/onnx_opt/passes/fuse_consecutive_concats.h +++ b/onnxoptimizer/passes/fuse_consecutive_concats.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_log_softmax.h b/onnxoptimizer/passes/fuse_consecutive_log_softmax.h similarity index 97% rename from onnx_opt/passes/fuse_consecutive_log_softmax.h rename to onnxoptimizer/passes/fuse_consecutive_log_softmax.h index 8f732c17f..521cda3ea 100644 --- a/onnx_opt/passes/fuse_consecutive_log_softmax.h +++ b/onnxoptimizer/passes/fuse_consecutive_log_softmax.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h b/onnxoptimizer/passes/fuse_consecutive_reduce_unsqueeze.h similarity index 98% rename from onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h rename to onnxoptimizer/passes/fuse_consecutive_reduce_unsqueeze.h index 550fb5cc1..8f20247a4 100644 --- a/onnx_opt/passes/fuse_consecutive_reduce_unsqueeze.h +++ b/onnxoptimizer/passes/fuse_consecutive_reduce_unsqueeze.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_squeezes.h b/onnxoptimizer/passes/fuse_consecutive_squeezes.h similarity index 98% rename from onnx_opt/passes/fuse_consecutive_squeezes.h rename to onnxoptimizer/passes/fuse_consecutive_squeezes.h index 2b1c8aa51..0d0d8a493 100644 --- a/onnx_opt/passes/fuse_consecutive_squeezes.h +++ b/onnxoptimizer/passes/fuse_consecutive_squeezes.h @@ -9,7 +9,7 @@ // Z = Squeeze(Y, axes=[0, 4]) -> shape=[2, 3, 5] // After: // Z = Squeeze(X, axes=[0, 1, 4, 6]) -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_consecutive_transposes.h b/onnxoptimizer/passes/fuse_consecutive_transposes.h similarity index 98% rename from onnx_opt/passes/fuse_consecutive_transposes.h rename to onnxoptimizer/passes/fuse_consecutive_transposes.h index 6b7d58978..764091f69 100644 --- a/onnx_opt/passes/fuse_consecutive_transposes.h +++ b/onnxoptimizer/passes/fuse_consecutive_transposes.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h b/onnxoptimizer/passes/fuse_matmul_add_bias_into_gemm.h similarity index 99% rename from onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h rename to onnxoptimizer/passes/fuse_matmul_add_bias_into_gemm.h index 8d093cee1..19f21d2be 100644 --- a/onnx_opt/passes/fuse_matmul_add_bias_into_gemm.h +++ b/onnxoptimizer/passes/fuse_matmul_add_bias_into_gemm.h @@ -17,7 +17,7 @@ #include #include "onnx/common/assertions.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_pad_into_conv.h b/onnxoptimizer/passes/fuse_pad_into_conv.h similarity index 99% rename from onnx_opt/passes/fuse_pad_into_conv.h rename to onnxoptimizer/passes/fuse_pad_into_conv.h index 575a199b2..e7adc9312 100644 --- a/onnx_opt/passes/fuse_pad_into_conv.h +++ b/onnxoptimizer/passes/fuse_pad_into_conv.h @@ -15,7 +15,7 @@ #include #include "onnx/defs/tensor_util.h" -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/fuse_transpose_into_gemm.h b/onnxoptimizer/passes/fuse_transpose_into_gemm.h similarity index 97% rename from onnx_opt/passes/fuse_transpose_into_gemm.h rename to onnxoptimizer/passes/fuse_transpose_into_gemm.h index b9fab13af..20a9a1e37 100644 --- a/onnx_opt/passes/fuse_transpose_into_gemm.h +++ b/onnxoptimizer/passes/fuse_transpose_into_gemm.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/lift_lexical_references.h b/onnxoptimizer/passes/lift_lexical_references.h similarity index 99% rename from onnx_opt/passes/lift_lexical_references.h rename to onnxoptimizer/passes/lift_lexical_references.h index 2082c555c..99cf6342f 100644 --- a/onnx_opt/passes/lift_lexical_references.h +++ b/onnxoptimizer/passes/lift_lexical_references.h @@ -1,7 +1,7 @@ #pragma once #include -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/nop.h b/onnxoptimizer/passes/nop.h similarity index 95% rename from onnx_opt/passes/nop.h rename to onnxoptimizer/passes/nop.h index de71fab65..edc02bae1 100644 --- a/onnx_opt/passes/nop.h +++ b/onnxoptimizer/passes/nop.h @@ -1,6 +1,6 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/passes/split.h b/onnxoptimizer/passes/split.h similarity index 99% rename from onnx_opt/passes/split.h rename to onnxoptimizer/passes/split.h index c7311201b..81e8a7c2d 100644 --- a/onnx_opt/passes/split.h +++ b/onnxoptimizer/passes/split.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx_opt/pass.h" +#include "onnxoptimizer/pass.h" namespace ONNX_NAMESPACE { namespace optimization { diff --git a/onnx_opt/test/optimizer_test.py b/onnxoptimizer/test/optimizer_test.py similarity index 99% rename from onnx_opt/test/optimizer_test.py rename to onnxoptimizer/test/optimizer_test.py index 6fb1de61a..3dabd5423 100644 --- a/onnx_opt/test/optimizer_test.py +++ b/onnxoptimizer/test/optimizer_test.py @@ -10,7 +10,7 @@ import numpy as np # type: ignore -import onnx_opt +import onnxoptimizer import unittest @@ -18,7 +18,7 @@ class TestOptimizer(unittest.TestCase): def _optimized(self, graph, opts, fixed_point=False, **kwargs): # type: (GraphProto, Sequence[Text], bool, **Any) -> ModelProto orig_model = helper.make_model(graph, producer_name='onnx-test', **kwargs) - optimized_model = onnx_opt.optimize(orig_model, opts, fixed_point) + optimized_model = onnxoptimizer.optimize(orig_model, opts, fixed_point) checker.check_model(optimized_model) return optimized_model @@ -90,7 +90,7 @@ def _visit_all_nodes_recursive(self, graph, fn): # type: (GraphProto, Callable[ def test_get_available_passes(self): # type: () -> None # FIXME does not guarantees to be listing all graph = helper.make_graph([], "dummy_graph", [], []) - list_of_passes = onnx_opt.get_available_passes() + list_of_passes = onnxoptimizer.get_available_passes() assert isinstance(list_of_passes, (list)) and len(list_of_passes) > 0 for pass_name in list_of_passes: # If pass_name is invalid it throws a RuntimeError diff --git a/setup.py b/setup.py index 2d18a8b54..4bd207d90 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ TOP_DIR = os.path.realpath(os.path.dirname(__file__)) -SRC_DIR = os.path.join(TOP_DIR, 'onnx_opt') +SRC_DIR = os.path.join(TOP_DIR, 'onnxoptimizer') CMAKE_BUILD_DIR = os.path.join(TOP_DIR, '.setuptools-cmake-build') WINDOWS = (os.name == 'nt') @@ -216,8 +216,8 @@ def run(self): self.run_command('cmake_build') generated_python_files = \ - glob.glob(os.path.join(CMAKE_BUILD_DIR, 'onnx_opt', '*.py')) + \ - glob.glob(os.path.join(CMAKE_BUILD_DIR, 'onnx_opt', '*.pyi')) + glob.glob(os.path.join(CMAKE_BUILD_DIR, 'onnxoptimizer', '*.py')) + \ + glob.glob(os.path.join(CMAKE_BUILD_DIR, 'onnxoptimizer', '*.pyi')) for src in generated_python_files: dst = os.path.join( @@ -252,7 +252,7 @@ def build_extensions(self): elif os.path.exists(release_lib_dir): lib_path = release_lib_dir src = os.path.join(lib_path, filename) - dst = os.path.join(os.path.realpath(self.build_lib), "onnx_opt", filename) + dst = os.path.join(os.path.realpath(self.build_lib), "onnxoptimizer", filename) self.copy_file(src, dst) @@ -281,7 +281,7 @@ def run(self): ext_modules = [ setuptools.Extension( - name=str('onnx_opt.onnx_opt_cpp2py_export'), + name=str('onnxoptimizer.onnx_opt_cpp2py_export'), sources=[]) ] @@ -314,7 +314,7 @@ def run(self): ################################################################################ setuptools.setup( - name="onnx_opt", + name="onnxoptimizer", version=VersionInfo.version, description="Open Neural Network Exchange", ext_modules=ext_modules, From aaa3ea4b4bf0b0bf25af9a9108979daafd7b35bb Mon Sep 17 00:00:00 2001 From: daquexian Date: Sat, 29 Aug 2020 21:03:13 +0800 Subject: [PATCH 10/14] add mypy typecheck and ci (copied from onnx/onnx repo) --- .azure-pipelines/Linux-CI.yml | 105 ++++++++++++++++++++++++++++++++ .azure-pipelines/MacOS-CI.yml | 70 +++++++++++++++++++++ .azure-pipelines/Windows-CI.yml | 66 ++++++++++++++++++++ setup.cfg | 73 ++++++++++++++++++++++ tools/mypy-onnx.py | 22 +++++++ 5 files changed, 336 insertions(+) create mode 100644 .azure-pipelines/Linux-CI.yml create mode 100644 .azure-pipelines/MacOS-CI.yml create mode 100644 .azure-pipelines/Windows-CI.yml create mode 100644 setup.cfg create mode 100644 tools/mypy-onnx.py diff --git a/.azure-pipelines/Linux-CI.yml b/.azure-pipelines/Linux-CI.yml new file mode 100644 index 000000000..3fe5610ea --- /dev/null +++ b/.azure-pipelines/Linux-CI.yml @@ -0,0 +1,105 @@ +trigger: +- master + +jobs: +- job: 'Test' + pool: + vmImage: 'Ubuntu-16.04' + strategy: + matrix: + py27: + python.version: '2.7' + onnx_ml: 0 + onnx_debug: 0 + py36: + python.version: '3.6' + onnx_ml: 0 + onnx_debug: 0 + py37: + python.version: '3.7' + onnx_ml: 0 + onnx_debug: 0 + py37-ml: + python.version: '3.7' + onnx_ml: 1 + onnx_debug: 0 + py37-ml-debug: + python.version: '3.7' + onnx_ml: 1 + onnx_debug: 1 + maxParallel: 5 + + steps: + - script: sudo install -d -m 0777 /home/vsts/.conda/envs + displayName: Fix Conda permissions + + - task: CondaEnvironment@1 + inputs: + createCustomEnvironment: true + environmentName: 'py$(python.version)' + packageSpecs: 'python=$(python.version) protobuf' + + - script: | + python -m pip install --upgrade pip + sudo apt-get install -qq -o=Dpkg::Use-Pty=0 -y --no-install-recommends dos2unix + python -m pip install numpy + git submodule update --init --recursive + export ONNX_ML=${onnx_ml} + export DEBUG=${onnx_debug} + export ONNX_BUILD_TESTS=0 + export CMAKE_ARGS="-DONNXIFI_DUMMY_BACKEND=ON" + export ONNX_NAMESPACE=ONNX_NAMESPACE_FOO_BAR_FOR_CI + python setup.py --quiet install + displayName: 'Install ONNX and dependencies' + + + - script: | + # lint python code + pip install --quiet flake8 + flake8 + if [ $? -ne 0 ]; then + echo "flake8 returned failures" + exit 1 + fi + + # check line endings to be UNIX + find . -type f -regextype posix-extended -regex '.*\.(py|cpp|md|h|cc|proto|proto3|in)' | xargs dos2unix --quiet + git status + git diff --exit-code + + # Do not hardcode onnx's namespace in the c++ source code, so that + # other libraries who statically link with onnx can hide onnx symbols + # in a private namespace. + ! grep -R --include='*.cc' --include='*.h' 'namespace onnx' . + ! grep -R --include='*.cc' --include='*.h' 'onnx::' . + + # onnx python api tests + if [ "$(python.version)" == "2.7" ]; then + pip install --quiet pytest nbval + else + # pytest 6.0 made deprecation warnings fail by default, pinning pytest to 5.4.3. + # TODO replace deprecated function with the suggested one. https://docs.pytest.org/en/stable/deprecations.html#id5 + pip install --quiet pytest==5.4.3 nbval + fi + + pytest + if [ $? -ne 0 ]; then + echo "pytest failed" + exit 1 + fi + + # Mypy only works with Python 3 + if [ "$(python.version)" != "2.7" ]; then + # Mypy only works with our generated _pb.py files when we install in develop mode, so let's do that + pip uninstall -y onnxoptimizer + ONNX_NAMESPACE=ONNX_NAMESPACE_FOO_BAR_FOR_CI pip install --no-use-pep517 -e .[mypy] + python setup.py --quiet typecheck + if [ $? -ne 0 ]; then + echo "type check failed" + exit 1 + fi + pip uninstall -y onnxoptimizer + rm -rf .setuptools-cmake-build + ONNX_NAMESPACE=ONNX_NAMESPACE_FOO_BAR_FOR_CI pip install . + fi + displayName: 'Run ONNX Optimizer tests' diff --git a/.azure-pipelines/MacOS-CI.yml b/.azure-pipelines/MacOS-CI.yml new file mode 100644 index 000000000..09962e438 --- /dev/null +++ b/.azure-pipelines/MacOS-CI.yml @@ -0,0 +1,70 @@ +trigger: +- master + +jobs: +- job: 'Test' + pool: + vmImage: 'macOS-10.14' + strategy: + matrix: + py27: + python.version: '2.7' + onnx_ml: 0 + py36: + python.version: '3.6' + onnx_ml: 0 + py36-onnx-ml: + python.version: '3.6' + onnx_ml: 1 + maxParallel: 3 + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + + - script: | + git submodule update --init --recursive + python -m pip install --upgrade setuptools + python -m pip install numpy + conda install -y -c conda-forge pybind11 protobuf + brew update + brew install protobuf + export DEBUG=${onnx_debug} + export ONNX_ML=${onnx_ml} + export CMAKE_ARGS="-DONNX_USE_LITE_PROTO=ON -DONNXIFI_DUMMY_BACKEND=ON" + export ONNX_NAMESPACE=ONNX_NAMESPACE_FOO_BAR_FOR_CI + python setup.py --quiet install + displayName: 'Install dependencies and ONNX' + + - script: | + # lint python code + pip install --quiet flake8 + flake8 + if [ $? -ne 0 ]; then + echo "flake8 returned failures" + exit 1 + fi + + # Do not hardcode onnx's namespace in the c++ source code, so that + # other libraries who statically link with onnx can hide onnx symbols + # in a private namespace. + ! grep -R --include='*.cc' --include='*.h' 'namespace onnx' . + ! grep -R --include='*.cc' --include='*.h' 'onnx::' . + + # onnx python api tests + if [ "$(python.version)" == "2.7" ]; then + pip install --quiet pytest nbval + else + # pytest 6.0 made deprecation warnings fail by default, pinning pytest to 5.4.3. + # TODO replace deprecated function with the suggested one. https://docs.pytest.org/en/stable/deprecations.html#id5 + pip install --quiet pytest==5.4.3 nbval + fi + + pytest onnxoptimizer + if [ $? -ne 0 ]; then + echo "pytest failed" + exit 1 + fi + + displayName: 'Run ONNX Optimizer Tests' diff --git a/.azure-pipelines/Windows-CI.yml b/.azure-pipelines/Windows-CI.yml new file mode 100644 index 000000000..49b4b40be --- /dev/null +++ b/.azure-pipelines/Windows-CI.yml @@ -0,0 +1,66 @@ +trigger: +- master + +jobs: + +- job: 'Test' + pool: + vmImage: 'vs2017-win2016' + strategy: + matrix: + py37: + python.version: '3.7' + onnx_ml: 0 + onnx_verify_proto: 0 + py36: + python.version: '3.6' + onnx_ml: 0 + onnx_verify_proto: 0 + py37_onnx_ml: + python.version: '3.7' + onnx_ml: 1 + onnx_verify_proto: 0 + py36_verify_proto: + python.version: '3.6' + onnx_ml: 0 + onnx_verify_proto: 1 + maxParallel: 4 + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + architecture: 'x64' + + - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" + displayName: Add conda to PATH + + - script: conda create --yes --quiet --name py$(python.version) -c conda-forge python=$(python.version) numpy libprotobuf=3.11.3 protobuf + displayName: Create Anaconda environment + + - script: | + call activate py$(python.version) + python -m pip install --upgrade pip + # pytest 6.0 made deprecation warnings fail by default, pinning pytest to 5.4.3. + # TODO replace deprecated function with the suggested one. https://docs.pytest.org/en/stable/deprecations.html#id5 + python -m pip install --quiet pytest==5.4.3 nbval numpy + + git submodule update --init --recursive + set ONNX_BUILD_TESTS=1 + set ONNX_ML=$(onnx_ml) + set ONNX_VERIFY_PROTO_3=$(onnx_verify_proto) + set USE_MSVC_STATIC_RUNTIME=0 + set CMAKE_ARGS=-DONNX_USE_PROTOBUF_SHARED_LIBS=ON -DProtobuf_USE_STATIC_LIBS=OFF -DONNX_USE_LITE_PROTO=ON + + python setup.py --quiet install + pytest + IF NOT %ERRORLEVEL% EQU 0 ( + @echo "pytest failed" + EXIT 1 + ) + + rm -rf .setuptools-cmake-build + pip install --quiet -e .[mypy] + python setup.py typecheck + + displayName: Install and test ONNX Optimizer diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..3f74a5bf6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,73 @@ +[aliases] +test=pytest + +[tool:pytest] +addopts = --nbval --current-env +testpaths = onnxoptimizer/test/ + +[metadata] +license-file = LICENSE + +[flake8] +select = B,C,E,F,P,T4,W,B9 +max-line-length = 80 +### DEFAULT IGNORES FOR 4-space INDENTED PROJECTS ### +# E127, E128 are hard to silence in certain nested formatting situations. +# E265, E266 talk about comment formatting which is too opinionated. +# E402 warns on imports coming after statements. There are important use cases +# like demandimport (https://fburl.com/demandimport) that require statements +# before imports. +# E501 is not flexible enough, we're using B950 instead. +# E722 is a duplicate of B001. +# F405 is hard to silence since we indeed do star import +# P207 is a duplicate of B003. +# P208 is a duplicate of C403. +# W503 talks about operator formatting which is too opinionated. +# F401 clashes with PEP484 requiring us to import types that are only used in +# type comments. +ignore = E127, E128, E265, E266, E402, E501, E722, F405, P207, P208, W503, F401 +exclude = + .git, + __pycache__, + build/*, + third_party/* + *_pb2.py, + .cache/* + .eggs + .setuptools-cmake-build/* + +[mypy] +# follow-imports = silent # TODO remove this +mypy_path = stubs:third_party/onnx/third_party/pybind11 +strict_optional = True +warn_return_any = True +warn_no_return = True +# TODO warn_unused_ignores = True +warn_redundant_casts = True +warn_incomplete_stub = True +# TODO disallow_untyped_calls = True +check_untyped_defs = True +disallow_any_generics = True +no_implicit_optional = True +# TODO disallow_incomplete_defs = True +# TODO disallow_subclassing_any = True +disallow_untyped_decorators = True +warn_unused_configs = True + +[mypy-onnxoptimizer.*] +disallow_untyped_defs = True + +[mypy-onnxoptimizer.onnx_opt_cpp2py_export] +ignore_missing_imports = True + +[mypy-onnx.*] +disallow_untyped_defs = True +ignore_missing_imports = True + +[mypy-tools.*] +disallow_untyped_defs = True + +# Ignore errors in setup.py +[mypy-setup] +ignore_errors = True + diff --git a/tools/mypy-onnx.py b/tools/mypy-onnx.py new file mode 100644 index 000000000..ead74291f --- /dev/null +++ b/tools/mypy-onnx.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +import subprocess +import os + + +def main(): # type: () -> None + try: + root_folder = os.path.realpath(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + os.chdir(root_folder) + + subprocess.check_call(["mypy", "."]) + subprocess.check_call(["mypy", "--py2", "."]) + + exit(0) + except subprocess.CalledProcessError: + # Catch this exception because we don't want it to output a backtrace that would clutter the mypy output + exit(1) + + +if __name__ == '__main__': + main() From 3311ebf6d937b85ddc148b54341db31f73404c3a Mon Sep 17 00:00:00 2001 From: daquexian Date: Sat, 29 Aug 2020 21:09:26 +0800 Subject: [PATCH 11/14] fix flake8 error --- onnxoptimizer/__init__.py | 7 +- onnxoptimizer/test/optimizer_test.py | 369 +++++++++++++++++---------- setup.py | 13 +- 3 files changed, 246 insertions(+), 143 deletions(-) diff --git a/onnxoptimizer/__init__.py b/onnxoptimizer/__init__.py index ec8429605..458b0d725 100644 --- a/onnxoptimizer/__init__.py +++ b/onnxoptimizer/__init__.py @@ -39,14 +39,16 @@ get_available_passes = C.get_available_passes -def optimize(model, passes=None, fixed_point=False): # type: (ModelProto, Optional[Sequence[Text]], bool) -> ModelProto +# type: (ModelProto, Optional[Sequence[Text]], bool) -> ModelProto +def optimize(model, passes=None, fixed_point=False): if passes is None: passes = ['eliminate_nop_transpose', 'eliminate_nop_pad', 'fuse_consecutive_transposes', 'fuse_transpose_into_gemm'] if not isinstance(model, ModelProto): - raise ValueError('Optimizer only accepts ModelProto, incorrect type: {}'.format(type(model))) + raise ValueError( + 'Optimizer only accepts ModelProto, incorrect type: {}'.format(type(model))) model_str = model.SerializeToString() if fixed_point: @@ -56,4 +58,5 @@ def optimize(model, passes=None, fixed_point=False): # type: (ModelProto, Optio return onnx.load_from_string(optimized_model_str) + __all__ = ['optimize', 'get_available_passes'] diff --git a/onnxoptimizer/test/optimizer_test.py b/onnxoptimizer/test/optimizer_test.py index 3dabd5423..e77c4751d 100644 --- a/onnxoptimizer/test/optimizer_test.py +++ b/onnxoptimizer/test/optimizer_test.py @@ -16,8 +16,10 @@ class TestOptimizer(unittest.TestCase): - def _optimized(self, graph, opts, fixed_point=False, **kwargs): # type: (GraphProto, Sequence[Text], bool, **Any) -> ModelProto - orig_model = helper.make_model(graph, producer_name='onnx-test', **kwargs) + # type: (GraphProto, Sequence[Text], bool, **Any) -> ModelProto + def _optimized(self, graph, opts, fixed_point=False, **kwargs): + orig_model = helper.make_model( + graph, producer_name='onnx-test', **kwargs) optimized_model = onnxoptimizer.optimize(orig_model, opts, fixed_point) checker.check_model(optimized_model) return optimized_model @@ -25,8 +27,10 @@ def _optimized(self, graph, opts, fixed_point=False, **kwargs): # type: (GraphP # input_types and output_types are lists of triples of (name, type, shape) def _make_fake_loop_op(self, body_nodes, # type: Sequence[NodeProto] - input_types, # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] - output_types # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + input_types, + # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + output_types ): # type: (...) -> List[NodeProto] zero = helper.make_tensor( "trip_count_value", TensorProto.INT64, (), [10]) @@ -62,7 +66,8 @@ def _make_fake_loop_op(self, def _make_fake_if_op(self, true_nodes, # type: Sequence[NodeProto] false_nodes, # type: Sequence[NodeProto] - output_types # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] + output_types ): # type: (...) -> List[NodeProto] true = helper.make_tensor("condition", TensorProto.BOOL, (), [True]) true_graph = helper.make_graph(true_nodes, "true_graph", [], []) @@ -77,7 +82,8 @@ def _make_fake_if_op(self, return retval_nodes # fn is a function that takes a single node as argument - def _visit_all_nodes_recursive(self, graph, fn): # type: (GraphProto, Callable[[NodeProto], None]) -> None + # type: (GraphProto, Callable[[NodeProto], None]) -> None + def _visit_all_nodes_recursive(self, graph, fn): for node in graph.node: fn(node) for attr in node.attribute: @@ -217,7 +223,8 @@ def test_nop_pad_opset10(self): # type: () -> None [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))]) assert len(graph.node) == 1 - optimized_model = self._optimized(graph, ["eliminate_nop_pad"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["eliminate_nop_pad"], False, opset_imports=[helper.make_opsetid("", 10)]) def check_pad(node): # type: (NodeProto) -> None assert node.op_type != "Pad" @@ -237,9 +244,10 @@ def test_nop_pad_graph_output(self): # type: () -> None helper.make_tensor_value_info("Pads", TensorProto.INT64, (2,))], [helper.make_tensor_value_info("B", TensorProto.FLOAT, (5,))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(2,), - vals=np.array([0, 0]).astype(np.int64).tobytes(), - raw=True)]) + dims=(2,), + vals=np.array([0, 0]).astype( + np.int64).tobytes(), + raw=True)]) # The existence of shape infos of graoh outputs is checked in _optimized optimized_model = self._optimized(graph, ["eliminate_nop_pad"]) @@ -257,9 +265,10 @@ def test_nop_pad(self): # type: () -> None helper.make_tensor_value_info("Pads", TensorProto.INT64, (4,))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(4,), - vals=np.array([0, 0, 0, 0]).astype(np.int64).tobytes(), - raw=True)]) + dims=(4,), + vals=np.array([0, 0, 0, 0]).astype( + np.int64).tobytes(), + raw=True)]) assert len(graph.node) == 1 optimized_model = self._optimized(graph, ["eliminate_nop_pad"]) @@ -277,7 +286,8 @@ def test_nop_pad_default_opset10(self): # type: () -> None "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 4))]) - optimized_model = self._optimized(graph, ["eliminate_nop_pad"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["eliminate_nop_pad"], False, opset_imports=[helper.make_opsetid("", 10)]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Pad" @@ -291,9 +301,10 @@ def test_nop_pad_default(self): # type: () -> None helper.make_tensor_value_info("Pads", TensorProto.INT64, (4,))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 4))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(4,), - vals=np.array([0, 1, 0, 0]).astype(np.int64).tobytes(), - raw=True)]) + dims=(4,), + vals=np.array([0, 1, 0, 0]).astype( + np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["eliminate_nop_pad"]) assert len(list(optimized_model.graph.node)) == 1 @@ -337,7 +348,8 @@ def test_eliminate_unused_initializer_input(self): # type: () -> None assert len(list(optimized_model.graph.initializer)) == 0 assert len(optimized_model.graph.input) == 2 - def test_eliminate_unused_initializer_no_eliminate_used_default(self): # type: () -> None + # type: () -> None + def test_eliminate_unused_initializer_no_eliminate_used_default(self): add = helper.make_node("Add", ["X", "A"], ["Z"]) graph = helper.make_graph( [add], @@ -355,7 +367,8 @@ def test_eliminate_unused_initializer_no_eliminate_used_default(self): # type: assert len(list(optimized_model.graph.initializer)) == 1 - def test_eliminate_unused_initializer_no_eliminate_used(self): # type: () -> None + # type: () -> None + def test_eliminate_unused_initializer_no_eliminate_used(self): nodes = [helper.make_node("Add", ["X", "A"], ["Z"])] nodes.extend(self._make_fake_loop_op( [helper.make_node("Add", ["_X", "_A"], ["_Z2"])], @@ -387,7 +400,8 @@ def test_eliminate_unused_initializer_no_eliminate_used(self): # type: () -> No assert len(list(optimized_model.graph.initializer)) == 1 - def test_eliminate_unused_initializer_no_eliminate_output(self): # type: () -> None + # type: () -> None + def test_eliminate_unused_initializer_no_eliminate_output(self): add = helper.make_node("Add", ["X", "Y"], ["Z"]) graph = helper.make_graph( [add], @@ -448,12 +462,12 @@ def test_fuse_concats(self): # type: () -> None nodes, "test", [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 4)), - helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 3, 4)), - helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3, 4)), - helper.make_tensor_value_info("D", TensorProto.FLOAT, (4, 3, 4)), - helper.make_tensor_value_info("E", TensorProto.FLOAT, (2, 3, 4)), - helper.make_tensor_value_info("F", TensorProto.FLOAT, (4, 3, 4)), - helper.make_tensor_value_info("G", TensorProto.FLOAT, (4, 3, 4))], + helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("D", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("E", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("F", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("G", TensorProto.FLOAT, (4, 3, 4))], [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (18, 3, 4))]) optimized_model = self._optimized( graph, ["fuse_consecutive_concats"], True) # two passes are needed to simplify the graph to its simplest state. @@ -472,11 +486,11 @@ def test_fuse_concats_different_axis(self): # type: () -> None nodes, "test", [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 4)), - helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 3, 4)), - helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3, 4)), - helper.make_tensor_value_info("D", TensorProto.FLOAT, (4, 3, 4)), - helper.make_tensor_value_info("E", TensorProto.FLOAT, (4, 3, 4)), - helper.make_tensor_value_info("F", TensorProto.FLOAT, (4, 3, 4))], + helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3, 4)), + helper.make_tensor_value_info("D", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("E", TensorProto.FLOAT, (4, 3, 4)), + helper.make_tensor_value_info("F", TensorProto.FLOAT, (4, 3, 4))], [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (18, 3, 4))]) optimized_model = self._optimized( graph, ["fuse_consecutive_concats"], True) # two passes are needed to simplify the graph to its simplest state. @@ -521,7 +535,8 @@ def test_fuse_transpose_default_graph_output(self): # type: () -> None helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3))], [helper.make_tensor_value_info("C", TensorProto.FLOAT, (2, 3))]) # The existence of shape infos of graoh outputs is checked in _optimized - optimized_model = self._optimized(graph, ["fuse_consecutive_transposes"]) + optimized_model = self._optimized( + graph, ["fuse_consecutive_transposes"]) def check_transpose(node): # type: (NodeProto) -> None assert node.op_type != "Transpose" @@ -618,7 +633,8 @@ def test_fuse_add_bias_into_conv_use_weight_shape(self): # type: () -> None # Output 1 since 0 is 'cond' assert optimized_model.graph.node[4].attribute[0].g.output[1].name == '_Z' - def test_fuse_add_bias_into_conv_use_weight_shape_with_tile(self): # type: () -> None + # type: () -> None + def test_fuse_add_bias_into_conv_use_weight_shape_with_tile(self): conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "A"], ["B"]) graph = helper.make_graph( @@ -674,7 +690,8 @@ def test_fuse_add_bias_into_conv_use_conv_shape(self): # type: () -> None assert len( optimized_model.graph.output[0].type.tensor_type.shape.dim) == 4 - def test_fuse_add_bias_into_conv_use_move_constant(self): # type: () -> None + # type: () -> None + def test_fuse_add_bias_into_conv_use_move_constant(self): conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) constant = helper.make_node("Constant", [], ["A"], value=helper.make_tensor( @@ -706,7 +723,8 @@ def test_fuse_add_bias_into_conv_use_move_constant(self): # type: () -> None assert len( optimized_model.graph.output[0].type.tensor_type.shape.dim) == 4 - def test_fuse_add_bias_into_conv_squeeze_1d_bias_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_add_bias_into_conv_squeeze_1d_bias_no_fuse(self): conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "A"], ["B"]) graph = helper.make_graph( @@ -729,7 +747,8 @@ def test_fuse_add_bias_into_conv_squeeze_1d_bias_no_fuse(self): # type: () -> N assert optimized_model.graph.node[0].op_type == 'Conv' assert optimized_model.graph.node[1].op_type == 'Add' - def test_fuse_add_bias_into_conv_squeeze_3d_bias_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_add_bias_into_conv_squeeze_3d_bias_no_fuse(self): conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "A"], ["B"]) graph = helper.make_graph( @@ -752,7 +771,8 @@ def test_fuse_add_bias_into_conv_squeeze_3d_bias_no_fuse(self): # type: () -> N assert optimized_model.graph.node[0].op_type == 'Conv' assert optimized_model.graph.node[1].op_type == 'Add' - def test_fuse_add_bias_into_conv_squeeze_4d_bias_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_add_bias_into_conv_squeeze_4d_bias_no_fuse(self): conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "A"], ["B"]) graph = helper.make_graph( @@ -782,7 +802,8 @@ def test_fuse_matmul_add_bias_into_gemm(self): # type: () -> None helper.make_tensor_value_info("B", TensorProto.FLOAT, (16,))], [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Gemm" @@ -798,12 +819,14 @@ def test_fuse_matmul_add_bias_into_gemm_2d_bias(self): # type: () -> None helper.make_tensor_value_info("B", TensorProto.FLOAT, (1, 16))], [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Gemm" - def test_fuse_matmul_add_bias_into_gemm_2d_bias_same_shape(self): # type: () -> None + # type: () -> None + def test_fuse_matmul_add_bias_into_gemm_2d_bias_same_shape(self): matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "B"], ["A"]) graph = helper.make_graph( @@ -814,12 +837,14 @@ def test_fuse_matmul_add_bias_into_gemm_2d_bias_same_shape(self): # type: () -> helper.make_tensor_value_info("B", TensorProto.FLOAT, (32, 16))], [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Gemm" - def test_fuse_matmul_add_bias_into_gemm_2d_bias_bcast_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_matmul_add_bias_into_gemm_2d_bias_bcast_no_fuse(self): matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "B"], ["A"]) graph = helper.make_graph( @@ -830,11 +855,13 @@ def test_fuse_matmul_add_bias_into_gemm_2d_bias_bcast_no_fuse(self): # type: () helper.make_tensor_value_info("B", TensorProto.FLOAT, (16, 16))], [helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 16))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert optimized_model.graph == graph - def test_fuse_matmul_add_bias_into_gemm_3d_matmul_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_matmul_add_bias_into_gemm_3d_matmul_no_fuse(self): matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "B"], ["A"]) graph = helper.make_graph( @@ -845,11 +872,13 @@ def test_fuse_matmul_add_bias_into_gemm_3d_matmul_no_fuse(self): # type: () -> helper.make_tensor_value_info("B", TensorProto.FLOAT, (3, 3))], [helper.make_tensor_value_info("A", TensorProto.FLOAT, (2, 3, 3))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert optimized_model.graph == graph - def test_fuse_matmul_add_bias_into_gemm_3d_bias_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_matmul_add_bias_into_gemm_3d_bias_no_fuse(self): matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) add = helper.make_node("Add", ["Z", "B"], ["A"]) graph = helper.make_graph( @@ -860,11 +889,13 @@ def test_fuse_matmul_add_bias_into_gemm_3d_bias_no_fuse(self): # type: () -> No helper.make_tensor_value_info("B", TensorProto.FLOAT, (4, 1, 16))], [helper.make_tensor_value_info("A", TensorProto.FLOAT, (32, 16))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert optimized_model.graph == graph - def test_fuse_matmul_add_bias_into_gemm_multiple_use_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_matmul_add_bias_into_gemm_multiple_use_no_fuse(self): matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"]) identity = helper.make_node("Identity", ["Z"], ["A1"]) add = helper.make_node("Add", ["Z", "B"], ["A2"]) @@ -877,11 +908,13 @@ def test_fuse_matmul_add_bias_into_gemm_multiple_use_no_fuse(self): # type: () [helper.make_tensor_value_info("A1", TensorProto.FLOAT, (32, 16)), helper.make_tensor_value_info("A2", TensorProto.FLOAT, (32, 16))] ) - optimized_model = self._optimized(graph, ["fuse_matmul_add_bias_into_gemm"]) + optimized_model = self._optimized( + graph, ["fuse_matmul_add_bias_into_gemm"]) assert optimized_model.graph == graph - def test_fuse_pad_into_conv_no_optional_value_opset10(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_no_optional_value_opset10(self): pad = helper.make_node( "Pad", ["X"], @@ -895,14 +928,17 @@ def test_fuse_pad_into_conv_no_optional_value_opset10(self): # type: () -> None "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))] ) - optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Conv" assert optimized_model.graph.node[0].attribute[0].name == "pads" - assert list(optimized_model.graph.node[0].attribute[0].ints) == [0, 0, 1, 1] + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 0, 1, 1] def test_fuse_pad_into_conv_no_optional_value(self): # type: () -> None pad = helper.make_node( @@ -918,17 +954,20 @@ def test_fuse_pad_into_conv_no_optional_value(self): # type: () -> None [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), - raw=True)]) + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype( + np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Conv" assert optimized_model.graph.node[0].attribute[0].name == "pads" - assert list(optimized_model.graph.node[0].attribute[0].ints) == [0, 0, 1, 1] + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 0, 1, 1] def test_fuse_pad_into_conv_with_optional_value(self): # type: () -> None pad = helper.make_node( @@ -943,25 +982,30 @@ def test_fuse_pad_into_conv_with_optional_value(self): # type: () -> None "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), - helper.make_tensor_value_info("Constant_value", TensorProto.FLOAT, ()), + helper.make_tensor_value_info( + "Constant_value", TensorProto.FLOAT, ()), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), - raw=True), + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype( + np.int64).tobytes(), + raw=True), helper.make_tensor("Constant_value", TensorProto.FLOAT, - dims=(), - vals=np.array([0]).astype(np.float32).tobytes(), - raw=True)]) + dims=(), + vals=np.array([0]).astype(np.float32).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Conv" assert optimized_model.graph.node[0].attribute[0].name == "pads" - assert list(optimized_model.graph.node[0].attribute[0].ints) == [0, 0, 1, 1] + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 0, 0, 1, 1] - def test_fuse_pad_into_conv_with_nonzero_optional_value(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_with_nonzero_optional_value(self): pad = helper.make_node( "Pad", ["X", "Pads", "Constant_value"], @@ -974,17 +1018,22 @@ def test_fuse_pad_into_conv_with_nonzero_optional_value(self): # type: () -> No "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), - helper.make_tensor_value_info("Constant_value", TensorProto.FLOAT, ()), + helper.make_tensor_value_info( + "Constant_value", TensorProto.FLOAT, ()), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), - raw=True), + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype( + np.int64).tobytes(), + raw=True), helper.make_tensor("Constant_value", TensorProto.FLOAT, - dims=(), - vals=np.array([25]).astype(np.float32).tobytes(), # non-zero Constant_value -> so no pad - raw=True)]) + dims=(), + # non-zero Constant_value -> so no pad + vals=np.array([25]).astype( + np.float32).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert optimized_model.graph == graph @@ -1005,7 +1054,8 @@ def test_fuse_pad_into_conv_1d_opset10(self): # type: () -> None helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 32))], [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1))] ) - optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Conv" @@ -1028,9 +1078,10 @@ def test_fuse_pad_into_conv_1d(self): # type: () -> None helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 32))], [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(6,), - vals=np.array([0, 0, 1, 0, 0, 1]).astype(np.int64).tobytes(), - raw=True)]) + dims=(6,), + vals=np.array([0, 0, 1, 0, 0, 1]).astype( + np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert len(list(optimized_model.graph.node)) == 1 @@ -1038,7 +1089,8 @@ def test_fuse_pad_into_conv_1d(self): # type: () -> None assert optimized_model.graph.node[0].attribute[0].name == "pads" assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1] - def test_fuse_pad_into_conv_existing_conv_pad_opset10(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_existing_conv_pad_opset10(self): pad = helper.make_node( "Pad", ["X"], @@ -1057,14 +1109,17 @@ def test_fuse_pad_into_conv_existing_conv_pad_opset10(self): # type: () -> None "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 4, 4))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))] ) - optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Conv" assert optimized_model.graph.node[0].attribute[0].name == "pads" - assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1, 1, 1] + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 1, 1, 1, 1] def test_fuse_pad_into_conv_existing_conv_pad(self): # type: () -> None pad = helper.make_node( @@ -1085,19 +1140,23 @@ def test_fuse_pad_into_conv_existing_conv_pad(self): # type: () -> None [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 4, 4))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), - raw=True)]) + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype( + np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert len(list(optimized_model.graph.node)) == 1 assert optimized_model.graph.node[0].op_type == "Conv" assert optimized_model.graph.node[0].attribute[0].name == "pads" - assert list(optimized_model.graph.node[0].attribute[0].ints) == [1, 1, 1, 1] + assert list(optimized_model.graph.node[0].attribute[0].ints) == [ + 1, 1, 1, 1] - def test_fuse_pad_into_conv_pad_feature_no_fuse_opset10(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_pad_feature_no_fuse_opset10(self): pad = helper.make_node( "Pad", ["X"], @@ -1111,9 +1170,11 @@ def test_fuse_pad_into_conv_pad_feature_no_fuse_opset10(self): # type: () -> No "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 4, 3, 3)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))] ) - optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) assert optimized_model.graph == graph @@ -1131,16 +1192,19 @@ def test_fuse_pad_into_conv_pad_feature_no_fuse(self): # type: () -> None [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 4, 3, 3)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 1, 0, 0, 0, 0, 0, 0]).astype(np.int64).tobytes(), - raw=True)]) + dims=(8,), + vals=np.array([0, 1, 0, 0, 0, 0, 0, 0]).astype( + np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert optimized_model.graph == graph - def test_fuse_pad_into_conv_negative_pad_no_fuse_opset10(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_negative_pad_no_fuse_opset10(self): pad = helper.make_node( "Pad", ["X"], @@ -1154,9 +1218,11 @@ def test_fuse_pad_into_conv_negative_pad_no_fuse_opset10(self): # type: () -> N "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 4, 4)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))] ) - optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) assert optimized_model.graph == graph @@ -1174,16 +1240,19 @@ def test_fuse_pad_into_conv_negative_pad_no_fuse(self): # type: () -> None [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 4, 4)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 0, 0, 0, 0, 0, -1, -1]).astype(np.int64).tobytes(), - raw=True)]) + dims=(8,), + vals=np.array( + [0, 0, 0, 0, 0, 0, -1, -1]).astype(np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert optimized_model.graph == graph - def test_fuse_pad_into_conv_reflection_pad_no_fuse_opset10(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_reflection_pad_no_fuse_opset10(self): pad = helper.make_node( "Pad", ["X"], @@ -1197,13 +1266,16 @@ def test_fuse_pad_into_conv_reflection_pad_no_fuse_opset10(self): # type: () -> "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))] + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))] ) - optimized_model = self._optimized(graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) + optimized_model = self._optimized( + graph, ["fuse_pad_into_conv"], False, opset_imports=[helper.make_opsetid("", 10)]) assert optimized_model.graph == graph - def test_fuse_pad_into_conv_reflection_pad_no_fuse(self): # type: () -> None + # type: () -> None + def test_fuse_pad_into_conv_reflection_pad_no_fuse(self): pad = helper.make_node( "Pad", ["X", "Pads"], @@ -1217,11 +1289,13 @@ def test_fuse_pad_into_conv_reflection_pad_no_fuse(self): # type: () -> None [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 2, 2)), helper.make_tensor_value_info("Pads", TensorProto.INT64, (8,)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], - [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 16, 1, 1))], + [helper.make_tensor_value_info( + "Z", TensorProto.FLOAT, (1, 16, 1, 1))], [helper.make_tensor("Pads", TensorProto.INT64, - dims=(8,), - vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype(np.int64).tobytes(), - raw=True)]) + dims=(8,), + vals=np.array([0, 0, 0, 0, 0, 0, 1, 1]).astype( + np.int64).tobytes(), + raw=True)]) optimized_model = self._optimized(graph, ["fuse_pad_into_conv"]) assert optimized_model.graph == graph @@ -1348,7 +1422,8 @@ def test_fuse_consecutive_softmax_log_side_effect(self): # type: () -> None assert graph == optimized_model.graph - def test_fuse_consecutive_softmax_log_multiple_out(self): # type: () -> None + # type: () -> None + def test_fuse_consecutive_softmax_log_multiple_out(self): softmax = helper.make_node("Softmax", ["X"], ["Y"], axis=2) log = helper.make_node("Log", ["Y"], ["Z"]) exp = helper.make_node("Exp", ["Z"], ["Z1"]) @@ -1539,7 +1614,8 @@ def test_deadend_elimination_simple(self): # type: () -> None def test_deadend_elimination_simple_fixed(self): # type: () -> None self._internal_test_deadend_elimination(True) - def test_eliminate_nop_monotone_argmax_basic_no_node_axis(self): # type: () -> None + # type: () -> None + def test_eliminate_nop_monotone_argmax_basic_no_node_axis(self): for node_name in ["Log", "Exp", "Sqrt"]: for axis in range(3): node = helper.make_node(node_name, ["X"], ["Y"]) @@ -1559,12 +1635,15 @@ def test_eliminate_nop_monotone_argmax_basic_no_node_axis(self): # type: () -> assert optimized_model.graph.node[0].attribute[0].name == "axis" assert optimized_model.graph.node[0].attribute[0].i == axis - def test_eliminate_nop_monotone_argmax_basic_with_node_axis(self): # type: () -> None + # type: () -> None + def test_eliminate_nop_monotone_argmax_basic_with_node_axis(self): for node_name in ["Softmax", "LogSoftmax"]: for axis_n in range(3): for axis_max in range(3): - node = helper.make_node(node_name, ["X"], ["Y"], axis=axis_n) - argmax = helper.make_node("ArgMax", ["Y"], ["Z"], axis=axis_max) + node = helper.make_node( + node_name, ["X"], ["Y"], axis=axis_n) + argmax = helper.make_node( + "ArgMax", ["Y"], ["Z"], axis=axis_max) graph = helper.make_graph( [node, argmax], "test", @@ -1583,7 +1662,8 @@ def test_eliminate_nop_monotone_argmax_basic_with_node_axis(self): # type: () - else: assert optimized_model.graph == graph - def test_eliminate_nop_monotone_argmax_multiple_out(self): # type: () -> None + # type: () -> None + def test_eliminate_nop_monotone_argmax_multiple_out(self): for node_name in ["Log", "Exp", "Sqrt"]: for axis in range(3): node = helper.make_node(node_name, ["X"], ["Y"]) @@ -1600,8 +1680,10 @@ def test_eliminate_nop_monotone_argmax_multiple_out(self): # type: () -> None graph, ["eliminate_nop_monotone_argmax"]) assert optimized_model.graph == graph - def test_eliminate_nop_monotone_argmax_consecutive(self): # type: () -> None - def _assertion(graph, optimized_model, axis_aligned, true_axis): # type: (GraphProto, ModelProto, bool, int) -> None + # type: () -> None + def test_eliminate_nop_monotone_argmax_consecutive(self): + # type: (GraphProto, ModelProto, bool, int) -> None + def _assertion(graph, optimized_model, axis_aligned, true_axis): if axis_aligned: assert len(optimized_model.graph.output) == 1 assert len(optimized_model.graph.node) == 1 @@ -1617,7 +1699,8 @@ def _assertion(graph, optimized_model, axis_aligned, true_axis): # type: (Graph for axis in range(3): node = helper.make_node(node_name_0, ["X"], ["Y"]) node2 = helper.make_node(node_name_1, ["Y"], ["Y1"]) - argmax = helper.make_node("ArgMax", ["Y1"], ["Z"], axis=axis) + argmax = helper.make_node( + "ArgMax", ["Y1"], ["Z"], axis=axis) graph = helper.make_graph( [node, node2, argmax], "test", @@ -1633,8 +1716,10 @@ def _assertion(graph, optimized_model, axis_aligned, true_axis): # type: (Graph for axis_0 in range(3): for axis_1 in range(3): node = helper.make_node(node_name_0, ["X"], ["Y"]) - node2 = helper.make_node(node_name_1, ["Y"], ["Y1"], axis=axis_0) - argmax = helper.make_node("ArgMax", ["Y1"], ["Z"], axis=axis_1) + node2 = helper.make_node( + node_name_1, ["Y"], ["Y1"], axis=axis_0) + argmax = helper.make_node( + "ArgMax", ["Y1"], ["Z"], axis=axis_1) graph = helper.make_graph( [node, node2, argmax], "test", @@ -1643,16 +1728,20 @@ def _assertion(graph, optimized_model, axis_aligned, true_axis): # type: (Graph [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (5, 7, 11))]) optimized_model = self._optimized( graph, ["eliminate_nop_monotone_argmax"], True) - _assertion(graph, optimized_model, axis_0 == axis_1, axis_1) + _assertion(graph, optimized_model, + axis_0 == axis_1, axis_1) # axis X axis test for node_name_0 in ["Softmax", "LogSoftmax"]: for node_name_1 in ["Softmax", "LogSoftmax"]: for axis_0 in range(3): for axis_1 in range(3): for axis_2 in range(3): - node = helper.make_node(node_name_0, ["X"], ["Y"], axis=axis_0) - node2 = helper.make_node(node_name_1, ["Y"], ["Y1"], axis=axis_1) - argmax = helper.make_node("ArgMax", ["Y1"], ["Z"], axis=axis_2) + node = helper.make_node( + node_name_0, ["X"], ["Y"], axis=axis_0) + node2 = helper.make_node( + node_name_1, ["Y"], ["Y1"], axis=axis_1) + argmax = helper.make_node( + "ArgMax", ["Y1"], ["Z"], axis=axis_2) graph = helper.make_graph( [node, node2, argmax], "test", @@ -1662,7 +1751,8 @@ def _assertion(graph, optimized_model, axis_aligned, true_axis): # type: (Graph optimized_model = self._optimized( graph, ["eliminate_nop_monotone_argmax"], True) if axis_0 == axis_1: # we can reduce both of the monotonic ops - _assertion(graph, optimized_model, axis_1 == axis_2, axis_2) + _assertion(graph, optimized_model, + axis_1 == axis_2, axis_2) elif axis_1 == axis_2: # we can reduce one of the monotonic ops assert len(optimized_model.graph.output) == 1 assert len(optimized_model.graph.node) == 2 @@ -1689,7 +1779,8 @@ def test_eliminate_nop_dropout(self): # type: () -> None # even when it';s an optional parameter (defaults to 0) assert optimized_model.graph == graph - def test_eliminate_nop_dropout_opset11_graph_output(self): # type: () -> None + # type: () -> None + def test_eliminate_nop_dropout_opset11_graph_output(self): node = helper.make_node("Log", ["X"], ["Y"]) node1 = helper.make_node("Dropout", ["Y"], ["Z"], ratio=0.0) graph = helper.make_graph( @@ -1726,12 +1817,15 @@ def test_eliminate_nop_dropout_opset11(self): # type: () -> None assert optimized_model.graph.node[0].op_type == "Log" def test_fuse_reduction_unsqueeze(self): # type: () -> None - def _calculate_post_transform_shape(input_shape, reduction_axes, unsqueeze_axes, keepdim): # type: (Tuple[int, ...], List[int], List[int], bool) -> Tuple[int, ...] + # type: (Tuple[int, ...], List[int], List[int], bool) -> Tuple[int, ...] + def _calculate_post_transform_shape(input_shape, reduction_axes, unsqueeze_axes, keepdim): post_reduce_shape = None if keepdim: - post_reduce_shape = tuple([(x if i not in reduction_axes else 1) for i, x in enumerate(input_shape)]) + post_reduce_shape = tuple( + [(x if i not in reduction_axes else 1) for i, x in enumerate(input_shape)]) else: - post_reduce_shape = tuple([x for i, x in enumerate(input_shape) if i not in reduction_axes]) + post_reduce_shape = tuple( + [x for i, x in enumerate(input_shape) if i not in reduction_axes]) post_unsqueeze_shape = list(post_reduce_shape) for ax in unsqueeze_axes: post_unsqueeze_shape.insert(ax, 1) @@ -1744,9 +1838,12 @@ def _calculate_post_transform_shape(input_shape, reduction_axes, unsqueeze_axes, for axes2 in [[1], [1, 2], [2]]: for keepdim in [False, True]: input_shape = (5, 7, 9) - output_shape = _calculate_post_transform_shape(input_shape, axes1, axes2, keepdim) # type: Tuple[int, ...] - node = helper.make_node(reduction, ["X"], ["Y"], axes=axes1, keepdims=keepdim) - node1 = helper.make_node("Unsqueeze", ["Y"], ["Z"], axes=axes2) + output_shape = _calculate_post_transform_shape( + input_shape, axes1, axes2, keepdim) # type: Tuple[int, ...] + node = helper.make_node( + reduction, ["X"], ["Y"], axes=axes1, keepdims=keepdim) + node1 = helper.make_node( + "Unsqueeze", ["Y"], ["Z"], axes=axes2) graph = helper.make_graph( [node, node1], "test", @@ -1765,10 +1862,10 @@ def _calculate_post_transform_shape(input_shape, reduction_axes, unsqueeze_axes, assert optimized_model.graph.node[-1].op_type == reduction assert optimized_model.graph.node[-1].attribute[0].name == "axes" assert optimized_model.graph.node[-1].attribute[0].ints == axes1 - optimized_output_shape = tuple(x.dim_value for x in optimized_model.graph.output[0].type.tensor_type.shape.dim) + optimized_output_shape = tuple( + x.dim_value for x in optimized_model.graph.output[0].type.tensor_type.shape.dim) assert optimized_output_shape == output_shape if __name__ == '__main__': unittest.main() - diff --git a/setup.py b/setup.py index 4bd207d90..fbacf560d 100644 --- a/setup.py +++ b/setup.py @@ -164,7 +164,8 @@ def run(self): '-DBUILD_ONNX_PYTHON=ON', '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON', '-DONNX_NAMESPACE={}'.format(ONNX_NAMESPACE), - '-DPY_EXT_SUFFIX={}'.format(sysconfig.get_config_var('EXT_SUFFIX') or ''), + '-DPY_EXT_SUFFIX={}'.format( + sysconfig.get_config_var('EXT_SUFFIX') or ''), ] if COVERAGE: cmake_args.append('-DONNX_COVERAGE=ON') @@ -178,7 +179,8 @@ def run(self): # we need to link with libpython on windows, so # passing python version to window in order to # find python in cmake - '-DPY_VERSION={}'.format('{0}.{1}'.format(*sys.version_info[:2])), + '-DPY_VERSION={}'.format('{0}.{1}'.format(* \ + sys.version_info[:2])), ]) if USE_MSVC_STATIC_RUNTIME: cmake_args.append('-DONNX_USE_MSVC_STATIC_RUNTIME=ON') @@ -252,7 +254,8 @@ def build_extensions(self): elif os.path.exists(release_lib_dir): lib_path = release_lib_dir src = os.path.join(lib_path, filename) - dst = os.path.join(os.path.realpath(self.build_lib), "onnxoptimizer", filename) + dst = os.path.join(os.path.realpath( + self.build_lib), "onnxoptimizer", filename) self.copy_file(src, dst) @@ -261,7 +264,8 @@ class mypy_type_check(ONNXCommand): def run(self): """Run command.""" - onnx_script = os.path.realpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "tools/mypy-onnx.py")) + onnx_script = os.path.realpath(os.path.join( + os.path.dirname(os.path.abspath(__file__)), "tools/mypy-onnx.py")) returncode = subprocess.call([sys.executable, onnx_script]) sys.exit(returncode) @@ -330,4 +334,3 @@ def run(self): author_email='onnx-technical-discuss@lists.lfai.foundation', url='https://github.com/onnx/optimizer', ) - From 755c49d42d4dee6b1776e1ada14f9b3599ee5469 Mon Sep 17 00:00:00 2001 From: daquexian Date: Sat, 29 Aug 2020 21:11:54 +0800 Subject: [PATCH 12/14] update ci yaml --- .azure-pipelines/Linux-CI.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.azure-pipelines/Linux-CI.yml b/.azure-pipelines/Linux-CI.yml index 3fe5610ea..acfb56034 100644 --- a/.azure-pipelines/Linux-CI.yml +++ b/.azure-pipelines/Linux-CI.yml @@ -64,8 +64,6 @@ jobs: # check line endings to be UNIX find . -type f -regextype posix-extended -regex '.*\.(py|cpp|md|h|cc|proto|proto3|in)' | xargs dos2unix --quiet - git status - git diff --exit-code # Do not hardcode onnx's namespace in the c++ source code, so that # other libraries who statically link with onnx can hide onnx symbols From e527581167bfb7d69bb43f6e2a4cb562c928a243 Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 6 Sep 2020 19:12:05 +0800 Subject: [PATCH 13/14] fix misplaced type annotations --- onnxoptimizer/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onnxoptimizer/__init__.py b/onnxoptimizer/__init__.py index 458b0d725..c3213be4a 100644 --- a/onnxoptimizer/__init__.py +++ b/onnxoptimizer/__init__.py @@ -39,8 +39,7 @@ get_available_passes = C.get_available_passes -# type: (ModelProto, Optional[Sequence[Text]], bool) -> ModelProto -def optimize(model, passes=None, fixed_point=False): +def optimize(model, passes=None, fixed_point=False): # type: (ModelProto, Optional[Sequence[Text]], bool) -> ModelProto if passes is None: passes = ['eliminate_nop_transpose', 'eliminate_nop_pad', From 0613c2047c1c03c9cc060c1cd4ac9536974f3517 Mon Sep 17 00:00:00 2001 From: daquexian Date: Sun, 6 Sep 2020 20:07:52 +0800 Subject: [PATCH 14/14] set c++ standard to c++11 --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ef6fcc930..2a3038964 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,11 @@ cmake_minimum_required(VERSION 3.1) project(onnx_optimizer C CXX) +# Set C++11 as standard for the whole project +if(NOT MSVC) + set(CMAKE_CXX_STANDARD 11) +endif(NOT MSVC) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(ONNX_ROOT ${PROJECT_SOURCE_DIR}/third_party/onnx)