Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[runtime] AOTExecutor implementation and c target code-generator #10283

Merged
merged 21 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" O
tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
tvm_option(USE_AOT_EXECUTOR "Build with AOT executor" ON)
tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
Expand Down Expand Up @@ -395,6 +396,13 @@ if(USE_PROFILER)
list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
endif(USE_PROFILER)

if(USE_AOT_EXECUTOR)
message(STATUS "Build with AOT Executor support...")
file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_AOT_EXECUTOR_SRCS})

endif(USE_AOT_EXECUTOR)

# Enable ctest if gtest is available
if(USE_GTEST)
# Check env var for backward compatibility. A better way to specify package
Expand Down
6 changes: 6 additions & 0 deletions include/tvm/relay/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ class AttrRegistry;

namespace relay {

/*! \brief Value used with Runtime::name to indicate the C++ runtime. */
static constexpr const char* kTvmRuntimeCpp = "cpp";

/*! \brief Value used with Runtime::name to indicate the C runtime. */
static constexpr const char* kTvmRuntimeCrt = "crt";

/*!
* \brief Runtime information.
*
Expand Down
13 changes: 11 additions & 2 deletions include/tvm/runtime/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@
#include <tvm/runtime/c_runtime_api.h>
#ifdef __cplusplus
#include <tvm/runtime/metadata_base.h>
#endif
#include <tvm/support/span.h>
#endif

// Version number recorded in emitted artifacts for runtime checking.
#define TVM_METADATA_VERSION 1

#ifdef __cplusplus
namespace tvm {
namespace runtime {
namespace metadata {
Expand All @@ -51,7 +52,6 @@ static const constexpr int64_t kMetadataVersion = TVM_METADATA_VERSION;
} // namespace runtime
} // namespace tvm

#ifdef __cplusplus
extern "C" {
#endif

Expand All @@ -75,6 +75,13 @@ struct TVMMetadata {
const struct TVMTensorInfo* outputs;
/*! \brief Number of elements in `outputs` array. */
int64_t num_outputs;
/*! \brief Memory Pools needed by the AOT main function.
* The order of the elements is the same as in the arguments to run_model. That is to say,
* this array specifies the last `num_pools` arguments to run_model.
*/
const struct TVMTensorInfo* pools;
/*! \brief Number of elements in `pools` array. */
int64_t num_pools;
/*! \brief Name of the model, as passed to tvm.relay.build. */
const char* mod_name;
};
Expand Down Expand Up @@ -114,6 +121,8 @@ class MetadataNode : public MetadataBaseNode {
ArrayAccessor<struct TVMTensorInfo, TensorInfo> inputs();
inline int64_t num_outputs() const { return data_->num_outputs; }
ArrayAccessor<struct TVMTensorInfo, TensorInfo> outputs();
inline int64_t num_pools() const { return data_->num_pools; }
ArrayAccessor<struct TVMTensorInfo, TensorInfo> pools();
inline ::tvm::runtime::String mod_name() const { return ::tvm::runtime::String(data_->mod_name); }
const struct ::TVMMetadata* data() const { return data_; }
TVM_DECLARE_FINAL_OBJECT_INFO(MetadataNode, MetadataBaseNode);
Expand Down
2 changes: 2 additions & 0 deletions include/tvm/runtime/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ TVM_DLL bool RuntimeEnabled(const std::string& target);

/*! \brief namespace for constant symbols */
namespace symbol {
/*! \brief A PackedFunc that retrieves exported metadata. */
constexpr const char* tvm_get_c_metadata = "get_c_metadata";
/*! \brief Global variable to store module context. */
constexpr const char* tvm_module_ctx = "__tvm_module_ctx";
/*! \brief Global variable to store device module blob */
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/contrib/graph_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def set_input(self, key=None, value=None, **params):
keys.sort(key=lambda x: -np.prod(params[x].shape))
for k in keys:
# TODO(zhiics) Skip the weights for submodule in a better way.
# We should use MetadataModule for initialization and remove
# We should use ConstLoaderModule for initialization and remove
# params from set_input
val = self._get_input(k)
if val:
Expand Down
9 changes: 9 additions & 0 deletions python/tvm/micro/model_library_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ def generate_c_interface_header(
return metadata_header


# List of type_key for modules which are ephemeral and do not need to be exported.
EPHEMERAL_MODULE_TYPE_KEYS = ("metadata_module",)


def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
"""Populate the codegen sub-directory as part of a Model Library Format export.

Expand All @@ -79,6 +83,11 @@ def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
"""
dso_modules = mod._collect_dso_modules()
non_dso_modules = mod._collect_from_import_tree(lambda m: m not in dso_modules)

# Filter ephemeral modules which cannot be exported.
dso_modules = [m for m in dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
non_dso_modules = [m for m in non_dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]

if non_dso_modules:
raise UnsupportedInModelLibraryFormatError(
f"Don't know how to export non-c or non-llvm modules; found: {non_dso_modules!r}"
Expand Down
10 changes: 10 additions & 0 deletions python/tvm/relay/backend/executor_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ def __init__(
executor_codegen_metadata,
devices,
):
fcreate = get_global_func("tvm.aot_executor_factory.create")
args = []
for k, v in params.items():
args.append(k)
args.append(ndarray.array(v))

self.module = fcreate(libmod, libmod_name, *args)
self.ir_mod = ir_mod
self.lowered_ir_mods = lowered_ir_mods
self.target = target
Expand All @@ -134,6 +141,9 @@ def get_executor_config(self):
def get_lib(self):
return self.lib

def export_library(self, file_name, fcompile=None, addons=None, **kwargs):
return self.module.export_library(file_name, fcompile, addons, **kwargs)


class GraphExecutorFactoryModule(ExecutorFactoryModule):
"""Graph executor factory module.
Expand Down
82 changes: 77 additions & 5 deletions python/tvm/relay/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@
from tvm.tir import expr as tvm_expr
from tvm.target import Target
from .. import nd as _nd, autotvm, register_func
from ..runtime import load_module
from ..runtime.executor import aot_executor as _aot_executor
from ..target import Target
from ..contrib import graph_executor as _graph_rt
from ..contrib import graph_executor as _graph_executor
from ..contrib import utils as contrib_utils
from . import _build_module
from . import ty as _ty
from . import expr as _expr
Expand Down Expand Up @@ -612,7 +615,7 @@ def _make_executor(self, expr=None):
"Graph Executor only supports static graphs, got output type", ret_type
)
mod = build(self.mod, target=self.target)
gmodule = _graph_rt.GraphModule(mod["default"](self.device))
gmodule = _graph_executor.GraphModule(mod["default"](self.device))

def _unflatten(flat_iter, cur_type):
if isinstance(cur_type, _ty.TensorType):
Expand Down Expand Up @@ -641,6 +644,74 @@ def _graph_wrapper(*args, **kwargs):
return _graph_wrapper


class AotExecutor(_interpreter.Executor):
"""Implements the Executor interface for AOT.

Parameters
----------
mod : :py:class:`~tvm.IRModule`
The module to support the execution.

device : :py:class:`Device`
The runtime device to run the code on.

target : :py:class:`Target`
The target option to build the function.
"""

def __init__(self, mod, device, target):
assert mod is not None
self.mod = mod
self.device = device
self.target = target
assert target.attrs.get("executor", "graph") == "aot"

def _make_executor(self, expr=None):
if expr:
self.mod["main"] = expr
self.mod = InferType()(self.mod)
ret_type = self.mod["main"].checked_type.ret_type
if _ty.is_dynamic(ret_type):
raise ValueError("AOT Executor only supports static graphs, got output type", ret_type)
mod = build(self.mod, target=self.target)

# NOTE: Given AOT requires use of the "c" backend, must export/import to compile the
# generated code.
temp_so_dir = contrib_utils.TempDirectory()
temp_so = temp_so_dir / "temp.so"
mod.export_library(temp_so)

mod = load_module(temp_so)
aot_mod = mod["default"](self.device)
gmodule = _aot_executor.AotModule(aot_mod)

def _unflatten(flat_iter, cur_type):
if isinstance(cur_type, _ty.TensorType):
return next(flat_iter)
if isinstance(cur_type, _ty.TupleType):
fields = []
for field_type in cur_type.fields:
field = _unflatten(flat_iter, field_type)
fields.append(field)
return fields
raise ValueError("Return type", ret_type, "contains unsupported type", cur_type)

def _aot_wrapper(*args, **kwargs):
args = self._convert_args(self.mod["main"], args, kwargs)
# Create map of inputs.
for i, arg in enumerate(args):
gmodule.set_input(i, arg)
# Run the module, and fetch the output.
gmodule.run()
flattened = []
for i in range(gmodule.get_num_outputs()):
flattened.append(gmodule.get_output(i).copyto(_nd.cpu(0)))
unflattened = _unflatten(iter(flattened), ret_type)
return unflattened

return _aot_wrapper


# TODO(mbs): Collapse the create_executor/evaluate phases together since a) most callers don't
# reuse the executor for multiple expressions and b) any preparation necessary for the expression
# evaluation needs to (currently) be done along with preparation for the module.
Expand All @@ -664,9 +735,8 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm", params=N
Parameters
----------
kind : str
The type of executor. Avaliable options are `debug` for the
interpreter, `graph` for the graph executor, and `vm` for the virtual
machine.
The type of executor. Avaliable options are `debug` for the interpreter, `graph` for the
graph executor, `aot` for the aot executor, and `vm` for the virtual machine.

mod : :py:class:`~tvm.IRModule`
The Relay module containing collection of functions
Expand Down Expand Up @@ -703,4 +773,6 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm", params=N
return GraphExecutor(mod, device, target)
if kind == "vm":
return VMExecutor(mod, device, target)
if kind == "aot":
return AotExecutor(mod, device, target)
raise RuntimeError("unknown execution strategy: {0}".format(kind))
2 changes: 2 additions & 0 deletions python/tvm/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@
from .module import load_module, enabled, system_lib
from .container import String, ShapeTuple
from .params import save_param_dict, load_param_dict

from . import executor
26 changes: 26 additions & 0 deletions python/tvm/runtime/executor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""This module contains Python wrappers for the TVM C++ Executor implementations.

NOTE: at present, only AOT Executor is contained here. The others are:
- GraphExecutor, in python/tvm/contrib/graph_executor.py
- VM Executor, in python/tvm/runtime/vm.py

TODO(areusch): Consolidate these into this module.
Copy link
Member

@masahi masahi Mar 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized that, we have two notions of executor. One is the runtime one above, the other is

def create_executor(kind="debug", mod=None, device=None, target="llvm", params=None):

which is used a lot in the test cases.

Do we intend to support create_executor(kind="aot", ...), given that we can now run things via the cpp runtime?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that is a good point. added support here.

"""
from .aot_executor import AotModule
Loading