diff --git a/cmake/modules/StandaloneCrt.cmake b/cmake/modules/StandaloneCrt.cmake
index c6e6dc77b442..e7c132651ca4 100644
--- a/cmake/modules/StandaloneCrt.cmake
+++ b/cmake/modules/StandaloneCrt.cmake
@@ -28,9 +28,12 @@ if(USE_MICRO)
          "3rdparty/dlpack/include *.h -> include"
          "3rdparty/dmlc-core/include *.h -> include"
          "include/tvm/runtime c_*_api.h -> include/tvm/runtime"
+         "include/tvm/runtime metadata_types.h -> include/tvm/runtime"
          "include/tvm/runtime/crt *.h -> include/tvm/runtime/crt"
          "src/runtime/crt Makefile -> ."
          "src/runtime/crt/include *.h -> include"
+         "src/runtime/crt/aot_executor *.c -> src/runtime/crt/aot_executor"
+         "src/runtime/crt/aot_executor_module *.c -> src/runtime/crt/aot_executor_module"
          "src/runtime/crt/common *.c -> src/runtime/crt/common"
          "src/runtime/crt/graph_executor *.c -> src/runtime/crt/graph_executor"
          "src/runtime/crt/graph_executor_module *.c -> src/runtime/crt/graph_executor_module"
diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
index 161d2a2beeae..085935101cd2 100644
--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -298,7 +298,7 @@ TVM_DLL int TVMCbArgToReturn(TVMValue* value, int* code);
  * \param type_codes The type codes of the arguments
  * \param num_args Number of arguments.
  * \param ret The return value handle.
- * \param resource_handle The handle additional resouce handle from fron-end.
+ * \param resource_handle The handle additional resouce handle from front-end.
  * \return 0 if success, -1 if failure happens, set error via TVMAPISetLastError.
  * \sa TVMCFuncSetReturn
  */
@@ -307,7 +307,7 @@ typedef int (*TVMPackedCFunc)(TVMValue* args, int* type_codes, int num_args, TVM
 
 /*!
  * \brief C callback to free the resource handle in C packed function.
- * \param resource_handle The handle additional resouce handle from fron-end.
+ * \param resource_handle The handle additional resouce handle from front-end.
  */
 typedef void (*TVMPackedCFuncFinalizer)(void* resource_handle);
 
diff --git a/include/tvm/runtime/crt/aot_executor.h b/include/tvm/runtime/crt/aot_executor.h
new file mode 100644
index 000000000000..c6a9f022d25e
--- /dev/null
+++ b/include/tvm/runtime/crt/aot_executor.h
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file aot_executor.h
+ * \brief AoT Executor
+ */
+#ifndef TVM_RUNTIME_CRT_AOT_EXECUTOR_H_
+#define TVM_RUNTIME_CRT_AOT_EXECUTOR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <dlpack/dlpack.h>
+#include <tvm/runtime/crt/internal/common/ndarray.h>
+#include <tvm/runtime/metadata_types.h>
+
+typedef struct TVMMetadata TVMMetadata;
+
+typedef struct TVMAotExecutor {
+  /*! \brief The top-level metadata structure supplied by the generated code */
+  const TVMMetadata* metadata;
+  /*! \brief The code module that contains the compiled model */
+  TVMModuleHandle module_handle;
+  /*! \brief The device type */
+  DLDevice device;
+  /*! \brief List of allocated arguments, input(s), output(s), and pool(s)*/
+  TVMNDArray* args;
+  int64_t num_args;
+} TVMAotExecutor;
+
+/*!
+ * \brief Allocate a new AotExecutor with TVMPlatformMemoryAllocate and initialize it.
+ *
+ * \param module_handle TVM Module that exposes the functions to call.
+ * \param device Runtime execution device, only supports device type kDLCPU, index 0.
+ * \param executor Pointer which receives a pointer to the newly-created instance.
+ * \param module_name TVM Module name prefix, typically "default".
+ * \return 0 if successful.
+ */
+int TVMAotExecutor_Create(TVMModuleHandle module_handle, const DLDevice device,
+                          TVMAotExecutor** executor, const char* module_name);
+
+/*!
+ * \brief Release the AoT executor created by TVMAotExecutor_Create().
+ *
+ * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
+ * \param device Runtime execution device, only supports device type kDLCPU, index 0.
+ * \return 0 if successful.
+ */
+int TVMAotExecutor_Release(TVMAotExecutor* executor, const DLDevice device);
+
+/*!
+ * \brief Return the number of inputs.
+ *
+ * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
+ * \return Number of inputs.
+ */
+int TVMAotExecutor_GetNumInputs(TVMAotExecutor* executor);
+
+/*!
+ * \brief Return the number of outputs.
+ *
+ * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
+ * \return Number of outputs.
+ */
+int TVMAotExecutor_GetNumOutputs(TVMAotExecutor* executor);
+
+/*!
+ * \brief Return the input index of the specified input name
+ *
+ * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
+ * \param name Input name for retrieving index.
+ * \return Input index.
+ */
+int TVMAotExecutor_GetInputIndex(TVMAotExecutor* executor, const char* name);
+
+/*!
+ * \brief Run the generated program.
+ *
+ * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
+ * \return 0 if successful.
+ */
+int TVMAotExecutor_Run(TVMAotExecutor* executor);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TVM_RUNTIME_CRT_AOT_EXECUTOR_H_
diff --git a/include/tvm/runtime/crt/aot_executor_module.h b/include/tvm/runtime/crt/aot_executor_module.h
new file mode 100644
index 000000000000..bd539c9b08c9
--- /dev/null
+++ b/include/tvm/runtime/crt/aot_executor_module.h
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file graph_executor.h
+ * \brief Tiny AoT executor
+ */
+#ifndef TVM_RUNTIME_CRT_AOT_EXECUTOR_MODULE_H_
+#define TVM_RUNTIME_CRT_AOT_EXECUTOR_MODULE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <tvm/runtime/crt/error_codes.h>
+
+/*!
+ * \brief Register the "tvm.aot_executor.create" constructor PackedFunc.
+ */
+tvm_crt_error_t TVMAotExecutorModule_Register();
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TVM_RUNTIME_CRT_AOT_EXECUTOR_MODULE_H_
diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h
index 776691c4c7fc..2495cad50b48 100644
--- a/include/tvm/runtime/crt/error_codes.h
+++ b/include/tvm/runtime/crt/error_codes.h
@@ -42,7 +42,7 @@ typedef enum {
   kTvmErrorCategorySession = 4,
   kTvmErrorCategoryPlatform = 5,
   kTvmErrorCategoryGenerated = 6,
-  kTvmErrorCategoryGraphExecutor = 7,
+  kTvmErrorCategoryExecutor = 7,
   kTvmErrorCategoryFunctionCall = 8,
   kTvmErrorCategoryTimeEvaluator = 9,
 } tvm_crt_error_category_t;
@@ -84,10 +84,10 @@ typedef enum {
   // Common error codes returned from generated functions.
   kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0),
 
-  // Graph executor
-  kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 0),
-  kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 1),
-  kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 2),
+  // Graph or AoT executor
+  kTvmErrorExecutorModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryExecutor, 0),
+  kTvmErrorExecutorModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryExecutor, 1),
+  kTvmErrorExecutorModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryExecutor, 2),
 
   // Function Calls - common problems encountered calling functions.
   kTvmErrorFunctionCallNumArguments = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 0),
@@ -100,7 +100,7 @@ typedef enum {
 
   // System errors are always negative integers; this mask indicates presence of a system error.
   // Cast tvm_crt_error_t to a signed integer to interpret the negative error code.
-  kTvmErrorSystemErrorMask = (1 << (sizeof(int) * 4 - 1)),
+  kTvmErrorSystemErrorMask = (1 << (sizeof(int) * 8 - 1)),
 } tvm_crt_error_t;
 
 #ifdef __cplusplus
diff --git a/include/tvm/runtime/crt/graph_executor_module.h b/include/tvm/runtime/crt/graph_executor_module.h
index 10a879e9ba30..5eb3994835a8 100644
--- a/include/tvm/runtime/crt/graph_executor_module.h
+++ b/include/tvm/runtime/crt/graph_executor_module.h
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file graph_executor.h
+ * \file graph_executor_module.h
  * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
diff --git a/include/tvm/runtime/metadata.h b/include/tvm/runtime/metadata.h
index b7f7c6c0a458..640d52ff80e7 100644
--- a/include/tvm/runtime/metadata.h
+++ b/include/tvm/runtime/metadata.h
@@ -24,22 +24,19 @@
 #ifndef TVM_RUNTIME_METADATA_H_
 #define TVM_RUNTIME_METADATA_H_
 
-#include <inttypes.h>
-#ifdef __cplusplus
-#include <memory>
-#include <string>
-#include <vector>
-#endif
 #include <tvm/runtime/c_runtime_api.h>
-#ifdef __cplusplus
 #include <tvm/runtime/metadata_base.h>
+#include <tvm/runtime/metadata_types.h>
+#include <tvm/runtime/object.h>
 #include <tvm/support/span.h>
-#endif
+
+#include <memory>
+#include <string>
+#include <vector>
 
 // Version number recorded in emitted artifacts for runtime checking.
 #define TVM_METADATA_VERSION 1
 
-#ifdef __cplusplus
 namespace tvm {
 namespace runtime {
 namespace metadata {
@@ -52,59 +49,6 @@ static const constexpr int64_t kMetadataVersion = TVM_METADATA_VERSION;
 }  // namespace runtime
 }  // namespace tvm
 
-extern "C" {
-#endif
-
-/*!
- * \brief Top-level metadata structure. Holds all other metadata types.
- */
-struct TVMMetadata {
-  /*! \brief Version identifier for this metadata. */
-  int64_t version;
-  /*! \brief Inputs to the AOT run_model function.
-   * The order of the elements is the same as in the arguments to run_model. That is to say,
-   * this array specifies the first `num_inputs` arguments to run_model.
-   */
-  const struct TVMTensorInfo* inputs;
-  /*! \brief Number of elements in `inputs` array. */
-  int64_t num_inputs;
-  /*! \brief Outputs of the AOT run_model function.
-   * The order of the elements is the same as in the arguments to run_model. That is to say,
-   * this array specifies the last `num_outputs` arguments to run_model.
-   */
-  const struct TVMTensorInfo* outputs;
-  /*! \brief Number of elements in `outputs` array. */
-  int64_t num_outputs;
-  /*! \brief Memory Pools needed by the AOT main function.
-   * The order of the elements is the same as in the arguments to run_model. That is to say,
-   * this array specifies the last `num_pools` arguments to run_model.
-   */
-  const struct TVMTensorInfo* pools;
-  /*! \brief Number of elements in `pools` array. */
-  int64_t num_pools;
-  /*! \brief Name of the model, as passed to tvm.relay.build. */
-  const char* mod_name;
-};
-
-/*!
- * \brief Describes one tensor argument to `run_model`.
- * NOTE: while TIR allows for other types of arguments, such as scalars, the AOT run_model
- * function does not currently accept these. Therefore it's not possible to express those
- * in this metadata. A future patch may modify this.
- */
-struct TVMTensorInfo {
-  /*! \brief Name of the tensor, as specified in the Relay program. */
-  const char* name;
-  /*! \brief Shape of the tensor. */
-  const int64_t* shape;
-  /*! \brief Rank of this tensor. */
-  int64_t num_shape;
-  /*! \brief Data type of one element of this tensor. */
-  DLDataType dtype;
-};
-#ifdef __cplusplus
-}  // extern "C"
-#include <tvm/runtime/object.h>
 namespace tvm {
 namespace runtime {
 namespace metadata {
@@ -166,6 +110,5 @@ class TensorInfo : public MetadataBase {
 }  // namespace metadata
 }  // namespace runtime
 }  // namespace tvm
-#endif  // defined(__cplusplus)
 
 #endif  // TVM_RUNTIME_METADATA_H_
diff --git a/include/tvm/runtime/metadata_types.h b/include/tvm/runtime/metadata_types.h
new file mode 100644
index 000000000000..36d690cf34bc
--- /dev/null
+++ b/include/tvm/runtime/metadata_types.h
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// LINT_C_FILE
+
+/*!
+ * \file tvm/runtime/metadata_types.h
+ * \brief Defines types which can be used in metadata here which
+ * are also shared between C and C++ code bases.
+ */
+#ifndef TVM_RUNTIME_METADATA_TYPES_H_
+#define TVM_RUNTIME_METADATA_TYPES_H_
+
+#include <inttypes.h>
+#include <tvm/runtime/c_runtime_api.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * \brief Top-level metadata structure. Holds all other metadata types.
+ */
+struct TVMMetadata {
+  /*! \brief Version identifier for this metadata. */
+  int64_t version;
+  /*! \brief Inputs to the AOT run_model function.
+   * The order of the elements is the same as in the arguments to run_model. That is to say,
+   * this array specifies the first `num_inputs` arguments to run_model.
+   */
+  const struct TVMTensorInfo* inputs;
+  /*! \brief Number of elements in `inputs` array. */
+  int64_t num_inputs;
+  /*! \brief Outputs of the AOT run_model function.
+   * The order of the elements is the same as in the arguments to run_model. That is to say,
+   * this array specifies the last `num_outputs` arguments to run_model.
+   */
+  const struct TVMTensorInfo* outputs;
+  /*! \brief Number of elements in `outputs` array. */
+  int64_t num_outputs;
+  /*! \brief Memory Pools needed by the AOT main function.
+   * The order of the elements is the same as in the arguments to run_model. That is to say,
+   * this array specifies the last `num_pools` arguments to run_model.
+   */
+  const struct TVMTensorInfo* pools;
+  /*! \brief Number of elements in `pools` array. */
+  int64_t num_pools;
+  /*! \brief Name of the model, as passed to tvm.relay.build. */
+  const char* mod_name;
+};
+
+/*!
+ * \brief Describes one tensor argument to `run_model`.
+ * NOTE: while TIR allows for other types of arguments, such as scalars, the AOT run_model
+ * function does not currently accept these. Therefore it's not possible to express those
+ * in this metadata. A future patch may modify this.
+ */
+struct TVMTensorInfo {
+  /*! \brief Name of the tensor, as specified in the Relay program. */
+  const char* name;
+  /*! \brief Shape of the tensor. */
+  const int64_t* shape;
+  /*! \brief Rank of this tensor. */
+  int64_t num_shape;
+  /*! \brief Data type of one element of this tensor. */
+  DLDataType dtype;
+};
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TVM_RUNTIME_METADATA_TYPES_H_
diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 9ddddeb389f3..8c1d83d39b09 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -450,9 +450,9 @@ class RelayBuildModule : public runtime::ModuleNode {
     }
 
     auto ext_mods = executor_codegen_->GetExternalModules();
-    ret_.mod =
-        tvm::codegen::CreateMetadataModule(ret_.params, ret_.mod, ext_mods, host_target, runtime_,
-                                           executor_codegen_->GetExecutorCodegenMetadata());
+    ret_.mod = tvm::codegen::CreateMetadataModule(ret_.params, ret_.mod, ext_mods, host_target,
+                                                  runtime_, executor_,
+                                                  executor_codegen_->GetExecutorCodegenMetadata());
     // Remove external params which were stored in metadata module.
     for (tvm::runtime::Module mod : ext_mods) {
       auto pf_var = mod.GetFunction("get_const_vars");
diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc
index e6aeb0bc4a0f..5a62ac66f736 100644
--- a/src/relay/backend/vm/compiler.cc
+++ b/src/relay/backend/vm/compiler.cc
@@ -1170,9 +1170,10 @@ void VMCompiler::Codegen() {
     lib = tvm::TIRToRuntime(per_tvm_target_modules, config_->host_target);
   }
 
-  lib = codegen::CreateMetadataModule(params_, lib, ext_mods, config_->host_target,
-                                      Runtime::Create("cpp"),
-                                      relay::backend::ExecutorCodegenMetadata());
+  lib =
+      codegen::CreateMetadataModule(params_, lib, ext_mods, config_->host_target,
+                                    Runtime::Create("cpp"), Executor::Create("graph"),  // DNS HACK
+                                    relay::backend::ExecutorCodegenMetadata());
   exec_->SetLib(lib);
 }
 
diff --git a/src/runtime/crt/Makefile b/src/runtime/crt/Makefile
index 99efdda62ee9..a9987b8b7cb1 100644
--- a/src/runtime/crt/Makefile
+++ b/src/runtime/crt/Makefile
@@ -66,6 +66,8 @@ $(notdir $(1)): $${BUILD_DIR}/lib$(notdir $(1)).a
 endef
 
 LIBS = \
+	src/runtime/crt/aot_executor \
+	src/runtime/crt/aot_executor_module \
 	src/runtime/crt/common \
 	src/runtime/crt/graph_executor \
 	src/runtime/crt/graph_executor_module \
diff --git a/src/runtime/crt/aot_executor/aot_executor.c b/src/runtime/crt/aot_executor/aot_executor.c
new file mode 100644
index 000000000000..1360c40b0fa4
--- /dev/null
+++ b/src/runtime/crt/aot_executor/aot_executor.c
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// LINT_C_FILE
+
+/*!
+ * \file aot_executor.c
+ * \brief implement AoT executor in C
+ */
+
+#include <inttypes.h>
+#include <string.h>
+#include <tvm/runtime/c_runtime_api.h>
+#include <tvm/runtime/crt/aot_executor.h>
+#include <tvm/runtime/crt/logging.h>
+#include <tvm/runtime/crt/module.h>
+#include <tvm/runtime/crt/packed_func.h>
+#include <tvm/runtime/crt/page_allocator.h>
+
+static void DumpMetadata(const TVMMetadata* md) {
+  LOG_DEBUG("%s:\n", __FUNCTION__);
+  LOG_DEBUG("\tmod_name=%s\n", md->mod_name);
+  LOG_DEBUG("\tversion=%" PRId64 "\n", md->version);
+  LOG_DEBUG("\tnum_inputs=%" PRId64 "\n", md->num_inputs);
+  LOG_DEBUG("\tnum_outputs=%" PRId64 "\n", md->num_outputs);
+  LOG_DEBUG("\tnum_pools=%" PRId64 "\n", md->num_pools);
+
+  int i;
+
+  for (i = 0; i < md->num_inputs; ++i) {
+    LOG_DEBUG("\tinput[%d]: %s\n", i, md->inputs[i].name);
+  }
+
+  for (i = 0; i < md->num_outputs; ++i) {
+    LOG_DEBUG("\toutput[%d]: %s\n", i, md->outputs[i].name);
+  }
+
+  for (i = 0; i < md->num_pools; ++i) {
+    LOG_DEBUG("\tpools[%d]: %s\n", i, md->pools[i].name);
+  }
+}
+
+int TVMAotExecutor_GetNumInputs(TVMAotExecutor* executor) { return executor->metadata->num_inputs; }
+
+int TVMAotExecutor_GetNumOutputs(TVMAotExecutor* executor) {
+  return executor->metadata->num_outputs;
+}
+
+int TVMAotExecutor_GetInputIndex(TVMAotExecutor* executor, const char* name) {
+  int i;
+  int rv = -1;
+
+  const TVMMetadata* md = executor->metadata;
+  for (i = 0; i < md->num_inputs; ++i) {
+    if (!strcmp(md->inputs[i].name, name)) {
+      rv = i;
+      break;
+    }
+  }
+  CHECK_GE(rv, 0, "cannot find '%s' among input.", name);
+  return rv;
+}
+
+int TVMAotExecutor_Run(TVMAotExecutor* executor) {
+  const char* tvm_main_suffix = "___tvm_main__";
+  char tvm_main_name[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
+
+  {
+    const size_t max_strlen = TVM_CRT_MAX_STRLEN_FUNCTION_NAME;
+    size_t len = strnlen(executor->metadata->mod_name, max_strlen);
+    len += strnlen(tvm_main_suffix, max_strlen);
+
+    CHECK_LT(len, max_strlen, "tvm_main name too long %zu\n", len);
+  }
+
+  // create main function name string, e.g. "tvmgen_default___tvm_main__"
+  snprintf(tvm_main_name, sizeof(tvm_main_name), "%s%s", executor->metadata->mod_name,
+           tvm_main_suffix);
+
+  TVMPackedFunc tvm_main;
+  TVMArgs temp_args;
+
+  CHECK_LE(executor->num_args, TVM_CRT_MAX_ARGS, "too many args %" PRId64 "\n", executor->num_args);
+
+  int i;
+  for (i = 0; i < executor->num_args; ++i) {
+    temp_args.values[i].v_handle = &executor->args[i].dl_tensor;
+    temp_args.tcodes[i] = kTVMDLTensorHandle;
+  }
+  temp_args.values_count = executor->num_args;
+
+  int status =
+      TVMPackedFunc_InitModuleFunc(&tvm_main, executor->module_handle, tvm_main_name, &temp_args);
+
+  if (status != 0) {
+    return status;
+  }
+
+  CHECK_EQ(tvm_main.Call(&tvm_main), 0, "call to %s failed", tvm_main_name);
+
+  return 0;
+}
+
+int TVMAotExecutor_Init(TVMAotExecutor* executor, TVMModuleHandle module_handle,
+                        const DLDevice device, const char* module_name) {
+  executor->module_handle = module_handle;
+  executor->device = device;
+
+  // get a pointer to the PackedFunc get_c_metadata() which gives us access to the top-level
+  // metadata structure
+  TVMPackedFunc get_c_metadata;
+  TVMArgs temp_args;
+  temp_args.values_count = 0;
+
+  const char* tvmgen_prefix = "tvmgen_";
+  const char* get_c_metdata_suffix = "_get_c_metadata";
+  char get_c_metdata_name[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
+
+  {
+    size_t max_strlen = TVM_CRT_MAX_STRLEN_FUNCTION_NAME;
+    size_t len = strnlen(tvmgen_prefix, max_strlen);
+    len += strnlen(module_name, max_strlen);
+    len += strnlen(get_c_metdata_suffix, max_strlen);
+
+    CHECK_LT(len, max_strlen, "get_c_metadata name too long %zu\n", len);
+  }
+
+  // create get_c_metadata() function name string, e.g. "tvmgen_default_get_c_metadata()"
+  snprintf(get_c_metdata_name, sizeof(get_c_metdata_name), "%s%s%s", tvmgen_prefix, module_name,
+           get_c_metdata_suffix);
+
+  int status = TVMPackedFunc_InitModuleFunc(&get_c_metadata, executor->module_handle,
+                                            get_c_metdata_name, &temp_args);
+  if (status != 0) {
+    return status;
+  }
+
+  CHECK_EQ(get_c_metadata.Call(&get_c_metadata), 0, "get_c_metadata");
+
+  // save the returned pointer to the top-level metadata
+  executor->metadata = (TVMMetadata*)get_c_metadata.ret_value.values[0].v_handle;
+
+  const TVMMetadata* md = executor->metadata;
+
+  DumpMetadata(md);
+
+  executor->num_args = md->num_inputs + md->num_outputs + md->num_pools;
+
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(executor->num_args * sizeof(*executor->args),
+                                                  executor->device, (void**)(&executor->args));
+  if (err != kTvmErrorNoError) {
+    return -1;
+  }
+
+  int i;
+  int arg_idx = 0;
+  for (i = 0; i < md->num_inputs; ++i) {
+    LOG_DEBUG("input allocate[%d]: %s\n", i, md->inputs[i].name);
+
+    status = TVMNDArray_Empty(md->inputs[i].num_shape, md->inputs[i].shape, md->inputs[i].dtype,
+                              executor->device, &executor->args[arg_idx++]);
+    if (status != 0) {
+      return status;
+    }
+  }
+
+  for (i = 0; i < md->num_outputs; ++i) {
+    LOG_DEBUG("output allocate[%d]: %s\n", i, md->outputs[i].name);
+
+    status = TVMNDArray_Empty(md->outputs[i].num_shape, md->outputs[i].shape, md->outputs[i].dtype,
+                              executor->device, &executor->args[arg_idx++]);
+    if (status != 0) {
+      return status;
+    }
+  }
+
+  for (i = 0; i < md->num_pools; ++i) {
+    LOG_DEBUG("pools allocate[%d]: %s\n", i, md->pools[i].name);
+
+    status = TVMNDArray_Empty(md->pools[i].num_shape, md->pools[i].shape, md->pools[i].dtype,
+                              executor->device, &executor->args[arg_idx++]);
+    if (status != 0) {
+      return status;
+    }
+  }
+
+  return status;
+}
+
+int TVMAotExecutor_Create(TVMModuleHandle module_handle, const DLDevice device,
+                          TVMAotExecutor** executor, const char* module_name) {
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(**executor), device, (void**)executor);
+  if (err != kTvmErrorNoError) {
+    return -1;
+  }
+
+  memset(*executor, 0, sizeof(**executor));
+
+  return TVMAotExecutor_Init(*executor, module_handle, device, module_name);
+}
+
+int TVMAotExecutor_Release(TVMAotExecutor* executor, const DLDevice device) {
+  int status;
+
+  if (executor->num_args > 0) {
+    // free TVMNDArray data memory for each each argument
+    int i;
+    for (i = 0; i < executor->num_args; ++i) {
+      status = TVMNDArray_Release(&executor->args[i]);
+      if (status != 0) {
+        return status;
+      }
+    }
+
+    // free TVMNDArray argument list
+    status = TVMPlatformMemoryFree(executor->args, executor->device);
+    if (status != 0) {
+      return status;
+    }
+  }
+
+  status = TVMPlatformMemoryFree(executor, device);
+  if (status != 0) {
+    return status;
+  }
+
+  return 0;
+}
diff --git a/src/runtime/crt/aot_executor_module/aot_executor_module.c b/src/runtime/crt/aot_executor_module/aot_executor_module.c
new file mode 100644
index 000000000000..d4b3755c1314
--- /dev/null
+++ b/src/runtime/crt/aot_executor_module/aot_executor_module.c
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// LINT_C_FILE
+
+/*!
+ * \file aot_executor_module.c
+ * \brief wrap aot_executor into a TVMModule for use with RPC.
+ */
+
+#include <stdio.h>
+#include <tvm/runtime/crt/aot_executor.h>
+#include <tvm/runtime/crt/aot_executor_module.h>
+#include <tvm/runtime/crt/func_registry.h>
+#include <tvm/runtime/crt/module.h>
+
+typedef struct {
+  TVMModule mod;
+  TVMAotExecutor* executor;
+} AotExecutorModule;
+
+static AotExecutorModule aot_executor;
+
+int32_t TVMAotExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+                                    int* ret_tcodes, void* resource_handle) {
+  if (aot_executor.executor != NULL) {
+    return kTvmErrorExecutorModuleAlreadyCreated;
+  }
+
+  if (nargs != 3) {
+    return kTvmErrorFunctionCallNumArguments;
+  }
+
+  if (tcodes[0] != kTVMModuleHandle || tcodes[1] != kDLDevice || tcodes[2] != kTVMStr) {
+    return kTvmErrorFunctionCallWrongArgType;
+  }
+
+  DLDevice dev = args[1].v_device;
+
+  if (dev.device_type != kDLCPU) {
+    return kTvmErrorExecutorModuleBadContext;
+  }
+
+  TVMAotExecutor_Create(args[0].v_handle, dev, &aot_executor.executor, args[2].v_str);
+
+  TVMModuleHandle out_mod;
+  int status = TVMModCreateFromCModule(&aot_executor.mod, &out_mod);
+  if (status != 0) {
+    ret_tcodes[0] = kTVMNullptr;
+    TVMAotExecutor_Release(aot_executor.executor, dev);
+    return status;
+  }
+
+  ret_values[0].v_handle = out_mod;
+  ret_tcodes[0] = kTVMModuleHandle;
+  return kTvmErrorNoError;
+}
+
+int32_t TVMAotExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
+                                            TVMValue* ret_values, int* ret_tcodes,
+                                            void* resource_handle) {
+  return kTvmErrorFunctionCallNotImplemented;
+}
+
+int32_t TVMAotExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+                                      int* ret_tcodes, void* resource_handle) {
+  int index = TVMAotExecutor_GetInputIndex(aot_executor.executor, args[0].v_str);
+
+  if (index < 0) {
+    return kTvmErrorExecutorModuleNoSuchInput;
+  }
+
+  ret_values[0].v_handle = (void*)&aot_executor.executor->args[index].dl_tensor;
+  ret_tcodes[0] = kTVMNDArrayHandle;
+
+  return 0;
+}
+
+int32_t TVMAotExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+                                       int* ret_tcodes, void* resource_handle) {
+  if (nargs != 1) {
+    return kTvmErrorFunctionCallNumArguments;
+  }
+
+  if (args[0].v_int64 > TVMAotExecutor_GetNumOutputs(aot_executor.executor)) {
+    return kTvmErrorFunctionCallInvalidArg;
+  }
+
+  // index past the input entries
+  int64_t idx = args[0].v_int64 + TVMAotExecutor_GetNumInputs(aot_executor.executor);
+
+  ret_values[0].v_handle = (void*)&aot_executor.executor->args[idx].dl_tensor;
+  ret_tcodes[0] = kTVMNDArrayHandle;
+
+  return 0;
+}
+
+int32_t TVMAotExecutorModule_GetInputIndex(TVMValue* args, int* tcodes, int nargs,
+                                           TVMValue* ret_values, int* ret_tcodes,
+                                           void* resource_handle) {
+  if (nargs != 1) {
+    return kTvmErrorFunctionCallNumArguments;
+  }
+
+  int index = TVMAotExecutor_GetInputIndex(aot_executor.executor, args[0].v_str);
+
+  if (index < 0) {
+    return kTvmErrorExecutorModuleNoSuchInput;
+  }
+
+  ret_values[0].v_int64 = index;
+  ret_tcodes[0] = kTVMArgInt;
+  return 0;
+}
+
+int32_t TVMAotExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
+                                          TVMValue* ret_values, int* ret_tcodes,
+                                          void* resource_handle) {
+  if (nargs != 0) {
+    return kTvmErrorFunctionCallNumArguments;
+  }
+
+  ret_values[0].v_int64 = TVMAotExecutor_GetNumInputs(aot_executor.executor);
+  ret_tcodes[0] = kTVMArgInt;
+  return 0;
+}
+
+int32_t TVMAotExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
+                                           TVMValue* ret_values, int* ret_tcodes,
+                                           void* resource_handle) {
+  if (nargs != 0) {
+    return kTvmErrorFunctionCallNumArguments;
+  }
+
+  ret_values[0].v_int64 = TVMAotExecutor_GetNumOutputs(aot_executor.executor);
+  ret_tcodes[0] = kTVMArgInt;
+  return 0;
+}
+
+int32_t TVMAotExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+                                 int* ret_tcodes, void* resource_handle) {
+  if (nargs != 0) {
+    return kTvmErrorFunctionCallNumArguments;
+  }
+
+  return TVMAotExecutor_Run(aot_executor.executor);
+}
+
+static const TVMBackendPackedCFunc aot_executor_registry_funcs[] = {
+    &TVMAotExecutorModule_GetInput,        // get_input
+    &TVMAotExecutorModule_GetInputIndex,   // get_input_index
+    &TVMAotExecutorModule_NotImplemented,  // get_input_info (do not implement)
+    &TVMAotExecutorModule_GetNumInputs,    // get_num_inputs
+    &TVMAotExecutorModule_GetNumOutputs,   // get_num_outputs
+    &TVMAotExecutorModule_GetOutput,       // get_output
+    &TVMAotExecutorModule_NotImplemented,  // load_params (do not implement)
+    &TVMAotExecutorModule_Run,             // run
+    &TVMAotExecutorModule_NotImplemented,  // set_input (implemented via python wrapper)
+    &TVMAotExecutorModule_NotImplemented,  // share_params (do not implement)
+};
+
+static const TVMFuncRegistry aot_executor_registry = {
+    "\x0aget_input\0"
+    "get_input_index\0"
+    "get_input_info\0"
+    "get_num_inputs\0"
+    "get_num_outputs\0"
+    "get_output\0"
+    "load_params\0"
+    "run\0"
+    "set_input\0"
+    "share_params\0",
+    aot_executor_registry_funcs};
+
+tvm_crt_error_t TVMAotExecutorModule_Register() {
+  aot_executor.mod.registry = &aot_executor_registry;
+  aot_executor.executor = NULL;
+
+  return TVMFuncRegisterGlobal("tvm.aot_executor.create", &TVMAotExecutorModule_Create, 0);
+}
diff --git a/src/runtime/crt/graph_executor_module/graph_executor_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
index 7b2a25040d08..280130a99414 100644
--- a/src/runtime/crt/graph_executor_module/graph_executor_module.c
+++ b/src/runtime/crt/graph_executor_module/graph_executor_module.c
@@ -41,7 +41,7 @@ static GraphExecutorModule graph_executor;
 int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                       int* ret_tcodes, void* resource_handle) {
   if (graph_executor.executor != NULL) {
-    return kTvmErrorGraphModuleAlreadyCreated;
+    return kTvmErrorExecutorModuleAlreadyCreated;
   }
 
   if (nargs != 4) {
@@ -54,7 +54,7 @@ int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TV
   }
 
   if (args[2].v_int64 != kDLCPU || args[3].v_int64 != 0) {
-    return kTvmErrorGraphModuleBadContext;
+    return kTvmErrorExecutorModuleBadContext;
   }
 
   DLDevice dev = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64};
@@ -90,7 +90,7 @@ int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
 
   int index = TVMGraphExecutor_GetInputIndex(graph_executor.executor, args[0].v_str);
   if (index < 0) {
-    return kTvmErrorGraphModuleNoSuchInput;
+    return kTvmErrorExecutorModuleNoSuchInput;
   }
 
   uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor,
@@ -100,6 +100,20 @@ int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
   return 0;
 }
 
+int32_t TVMGraphExecutorModule_GetInputIndex(TVMValue* args, int* tcodes, int nargs,
+                                             TVMValue* ret_values, int* ret_tcodes,
+                                             void* resource_handle) {
+  int index = TVMGraphExecutor_GetInputIndex(graph_executor.executor, args[0].v_str);
+
+  if (index < 0) {
+    return kTvmErrorExecutorModuleNoSuchInput;
+  }
+
+  ret_values[0].v_int64 = index;
+  ret_tcodes[0] = kTVMArgInt;
+  return 0;
+}
+
 int32_t TVMGraphExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
                                             TVMValue* ret_values, int* ret_tcodes,
                                             void* resource_handle) {
@@ -137,7 +151,7 @@ int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
 
   int output_index = args[0].v_int64;
   if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_executor.executor)) {
-    return kTvmErrorGraphModuleNoSuchInput;
+    return kTvmErrorExecutorModuleNoSuchInput;
   }
 
   uint32_t nid = graph_executor.executor->outputs[output_index].node_id;
@@ -202,14 +216,22 @@ int32_t TVMGraphExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int n
 }
 
 static const TVMBackendPackedCFunc graph_executor_registry_funcs[] = {
-    &TVMGraphExecutorModule_GetInput,      &TVMGraphExecutorModule_GetNumInputs,
-    &TVMGraphExecutorModule_GetNumOutputs, &TVMGraphExecutorModule_GetOutput,
-    &TVMGraphExecutorModule_LoadParams,    &TVMGraphExecutorModule_Run,
-    &TVMGraphExecutorModule_SetInput,      &TVMGraphExecutorModule_NotImplemented,
+    &TVMGraphExecutorModule_GetInput,
+    &TVMGraphExecutorModule_GetInputIndex,
+    &TVMGraphExecutorModule_NotImplemented,  // get_input_info
+    &TVMGraphExecutorModule_GetNumInputs,
+    &TVMGraphExecutorModule_GetNumOutputs,
+    &TVMGraphExecutorModule_GetOutput,
+    &TVMGraphExecutorModule_LoadParams,
+    &TVMGraphExecutorModule_Run,
+    &TVMGraphExecutorModule_SetInput,
+    &TVMGraphExecutorModule_NotImplemented,  // share_params
 };
 
 static const TVMFuncRegistry graph_executor_registry = {
     "\x08get_input\0"
+    "get_input_index\0"
+    "get_input_info\0"
     "get_num_inputs\0"
     "get_num_outputs\0"
     "get_output\0"
diff --git a/src/runtime/crt/host/Makefile b/src/runtime/crt/host/Makefile
index 98a810e0d1b1..f5f9ef8a2af2 100644
--- a/src/runtime/crt/host/Makefile
+++ b/src/runtime/crt/host/Makefile
@@ -17,11 +17,11 @@
 
 INCLUDES ?= -isystem crt/include -Icrt_config
 CFLAGS ?= -Werror -Wall
-CXXFLAGS ?= -Werror -Wall -std=c++11
+CXXFLAGS ?= -Werror -Wall -std=c++11 -DTVM_HOST_USE_GRAPH_EXECUTOR_MODULE
 LDFLAGS ?= -Werror -Wall
 
 # Codegen produces spurious lines like: int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)];
-MODEL_CFLAGS ?= -Wno-error=unused-variable
+MODEL_CFLAGS ?= -Wno-error=unused-variable -Wno-error=missing-braces
 
 AR ?= ${PREFIX}ar
 CC ?= ${PREFIX}gcc
@@ -36,7 +36,13 @@ endif
 
 PWD = $(shell pwd)
 BUILD_DIR = build
-CRT_LIB_NAMES = microtvm_rpc_server microtvm_rpc_common graph_executor graph_executor_module common memory
+
+CRT_LIB_NAMES = \
+	microtvm_rpc_server microtvm_rpc_common \
+	aot_executor_module aot_executor \
+	graph_executor_module graph_executor \
+	common memory
+
 CRT_LIBS = $(patsubst %, $(BUILD_DIR)/crt/lib%.a, $(CRT_LIB_NAMES))
 
 CRT_INCLUDES = $(glob crt/include/**)
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
index 65027dd67e8c..bf4a98569e33 100644
--- a/src/runtime/crt/host/main.cc
+++ b/src/runtime/crt/host/main.cc
@@ -38,6 +38,8 @@
 #include <tvm/runtime/crt/graph_executor_module.h>
 #endif
 
+#include <tvm/runtime/crt/aot_executor_module.h>
+
 using namespace std::chrono;
 
 extern "C" {
@@ -137,6 +139,9 @@ int main(int argc, char** argv) {
            "failed to register GraphExecutor TVMModule");
 #endif
 
+  CHECK_EQ(TVMAotExecutorModule_Register(), kTvmErrorNoError,
+           "failed to register AoT Executor TVMModule");
+
   int error = TVMFuncRegisterGlobal("tvm.testing.reset_server",
                                     (TVMFunctionHandle)&testonly_reset_server, 0);
   if (error) {
diff --git a/src/target/metadata_module.cc b/src/target/metadata_module.cc
index 5457946322c3..840ba5cab210 100644
--- a/src/target/metadata_module.cc
+++ b/src/target/metadata_module.cc
@@ -36,8 +36,11 @@
 namespace tvm {
 namespace codegen {
 
+static runtime::metadata::Metadata ConvertMetaData(
+    relay::backend::ExecutorCodegenMetadata metadata);
+
 static runtime::Module CreateCrtMetadataModule(
-    runtime::Module target_module, Target target, relay::Runtime runtime,
+    runtime::Module target_module, Target target, relay::Runtime runtime, relay::Executor executor,
     relay::backend::ExecutorCodegenMetadata metadata,
     Array<runtime::Module> non_crt_exportable_modules,
     Array<runtime::Module> crt_exportable_modules,
@@ -62,9 +65,14 @@ static runtime::Module CreateCrtMetadataModule(
   }
 
   if (target->kind->name == "c") {
+    runtime::metadata::Metadata aot_metadata;
+    if (executor->GetAttr<String>("interface-api", tvm::String("packed")) == "packed") {
+      aot_metadata = ConvertMetaData(metadata);
+    }
+
     crt_exportable_modules.push_back(target_module);
-    target_module =
-        CreateCSourceCrtMetadataModule(crt_exportable_modules, target, runtime, metadata);
+    target_module = CreateCSourceCrtMetadataModule(crt_exportable_modules, target, runtime,
+                                                   metadata, aot_metadata);
   } else if (target->kind->name == "llvm") {
 #ifdef TVM_LLVM_VERSION
     crt_exportable_modules.push_back(target_module);
@@ -173,7 +181,8 @@ static runtime::Module CreateCppMetadataModule(
 runtime::Module CreateMetadataModule(
     const std::unordered_map<std::string, runtime::NDArray>& const_var_ndarray,
     tvm::runtime::Module target_module, const Array<runtime::Module>& ext_modules, Target target,
-    tvm::relay::Runtime runtime, relay::backend::ExecutorCodegenMetadata metadata) {
+    tvm::relay::Runtime runtime, tvm::relay::Executor executor,
+    relay::backend::ExecutorCodegenMetadata metadata) {
   // Here we split modules into two groups:
   //  1. Those modules which can be exported to C-runtime. These are DSO-exportable
   //     (i.e. llvm or c) modules which return nothing from get_const_vars().
@@ -219,7 +228,7 @@ runtime::Module CreateMetadataModule(
   }
 
   if (is_targeting_crt) {
-    return CreateCrtMetadataModule(target_module, target, runtime, metadata,
+    return CreateCrtMetadataModule(target_module, target, runtime, executor, metadata,
                                    non_crt_exportable_modules, crt_exportable_modules,
                                    const_var_ndarray);
   } else {
diff --git a/src/target/metadata_module.h b/src/target/metadata_module.h
index 2afcf3497ab8..daeaf212c992 100644
--- a/src/target/metadata_module.h
+++ b/src/target/metadata_module.h
@@ -25,6 +25,7 @@
 #ifndef TVM_TARGET_METADATA_MODULE_H_
 #define TVM_TARGET_METADATA_MODULE_H_
 
+#include <tvm/relay/executor.h>
 #include <tvm/relay/runtime.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
@@ -54,7 +55,7 @@ namespace codegen {
 runtime::Module CreateMetadataModule(
     const std::unordered_map<std::string, runtime::NDArray>& params, runtime::Module target_module,
     const Array<runtime::Module>& ext_modules, Target target, tvm::relay::Runtime runtime,
-    relay::backend::ExecutorCodegenMetadata metadata);
+    tvm::relay::Executor executor, relay::backend::ExecutorCodegenMetadata metadata);
 
 }  // namespace codegen
 }  // namespace tvm
diff --git a/src/target/source/source_module.cc b/src/target/source/source_module.cc
index 11ff409e1da4..8f581f4cbbb2 100644
--- a/src/target/source/source_module.cc
+++ b/src/target/source/source_module.cc
@@ -203,8 +203,8 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode {
       code_ << "extern \"C\"\n";
       code_ << "#endif\n";
       code_ << "TVM_DLL int32_t " << fname.data();
-      code_ << "(TVMValue* args, int* type_code, int num_args, TVMValue* out_value, int* "
-               "out_type_code);\n";
+      code_ << "(TVMValue* args, int* type_code, int num_args, TVMValue* out_value, "
+               "int* out_type_code, void* resource_handle);\n";
     }
     code_ << "static TVMBackendPackedCFunc _tvm_func_array[] = {\n";
     for (auto f : func_names_) {
@@ -379,11 +379,11 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode {
   void GenerateEntrypointForPackedAPI(const std::string& entrypoint_name,
                                       const std::string& run_func) {
     code_ << "TVM_DLL int32_t " << run_func;
-    code_ << "(void* args, void* type_code, int num_args, void* out_value, void* "
+    code_ << "(TVMValue* args, int* type_code, int num_args, TVMValue* out_value, int* "
              "out_type_code, void* resource_handle);\n\n";
 
     code_ << "int32_t " << entrypoint_name;
-    code_ << "(void* args, void* type_code, int num_args, void* out_value, void* "
+    code_ << "(TVMValue* args, int* type_code, int num_args, TVMValue* out_value, int* "
              "out_type_code, void* resource_handle) {\n";
 
     // We are creating a copy of the set of pointers
@@ -747,7 +747,7 @@ class MetadataSerializer : public AttrVisitor {
  public:
   void CodegenMetadata(::tvm::runtime::metadata::Metadata metadata) {
     decl_ << "#include <inttypes.h>" << std::endl
-          << "#include <tvm/runtime/metadata.h>" << std::endl
+          << "#include <tvm/runtime/metadata_types.h>" << std::endl
           << "#include <tvm/runtime/c_runtime_api.h>" << std::endl;
     std::vector<metadata::DiscoverArraysVisitor::DiscoveredArray> queue;
     metadata::DiscoverArraysVisitor array_discover{&queue};
@@ -760,6 +760,7 @@ class MetadataSerializer : public AttrVisitor {
       auto arr = std::get<1>(item);
 
       // Prepend const with everything except C-string, which needs appending.
+      code_ << "static ";
       if (arr->kind != MetadataKind::kString) {
         code_ << "const ";
       }
@@ -777,7 +778,7 @@ class MetadataSerializer : public AttrVisitor {
 
     // Finally, emit overall struct.
     address_.push_back(metadata::kMetadataGlobalSymbol);
-    code_ << "const struct TVMMetadata " << metadata::AddressFromParts(address_) << " = {"
+    code_ << "static const struct TVMMetadata " << metadata::AddressFromParts(address_) << " = {"
           << std::endl;
     Visit(nullptr, &metadata);
     code_ << "};" << std::endl;
@@ -795,11 +796,55 @@ class MetadataSerializer : public AttrVisitor {
   std::vector<bool> is_defining_struct_;
 };
 
+namespace {
+runtime::Module CreateAotMetadataModule(runtime::metadata::Metadata aot_metadata,
+                                        bool is_c_runtime) {
+  MetadataSerializer serializer;
+  serializer.CodegenMetadata(aot_metadata);
+  std::stringstream lookup_func;
+  std::string get_c_metadata_func_name;
+
+  // NOTE: mangling is not needed in the c++ runtime because the function
+  //       name is looked-up via LibraryModule.
+  // TODO(alanmacd): unify these two approaches
+
+  if (is_c_runtime == true) {
+    get_c_metadata_func_name = runtime::get_name_mangled(
+        aot_metadata->mod_name(), ::tvm::runtime::symbol::tvm_get_c_metadata);
+  } else {
+    get_c_metadata_func_name = ::tvm::runtime::symbol::tvm_get_c_metadata;
+  }
+
+  lookup_func << "#ifdef __cplusplus\n"
+              << "extern \"C\"\n"
+              << "#endif\n";
+
+  lookup_func << "TVM_DLL int32_t " << get_c_metadata_func_name
+              << "(TVMValue* arg_values, int* arg_tcodes, int "
+                 "num_args, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) {"
+              << std::endl;
+  lookup_func << "    ret_values[0].v_handle = (void*) &" << MetadataSerializer::kGlobalSymbol
+              << ";" << std::endl;
+  lookup_func << "    ret_tcodes[0] = kTVMOpaqueHandle;" << std::endl;
+  lookup_func << "    return 0;" << std::endl;
+  lookup_func << "};" << std::endl;
+  std::vector<String> func_names{get_c_metadata_func_name};
+  return CSourceModuleCreate(serializer.GetOutput() + lookup_func.str(), "c", func_names,
+                             Array<String>());
+}
+}  // namespace
+
 runtime::Module CreateCSourceCrtMetadataModule(const Array<runtime::Module>& modules, Target target,
                                                relay::Runtime runtime,
-                                               relay::backend::ExecutorCodegenMetadata metadata) {
+                                               relay::backend::ExecutorCodegenMetadata metadata,
+                                               runtime::metadata::Metadata aot_metadata) {
+  Array<runtime::Module> final_modules(modules);
+  if (aot_metadata.defined()) {
+    final_modules.push_back(CreateAotMetadataModule(aot_metadata, true));
+  }
+
   Array<String> func_names;
-  for (runtime::Module mod : modules) {
+  for (runtime::Module mod : final_modules) {
     auto pf_funcs = mod.GetFunction("get_func_names");
     if (pf_funcs != nullptr) {
       Array<String> func_names_ = pf_funcs();
@@ -808,11 +853,13 @@ runtime::Module CreateCSourceCrtMetadataModule(const Array<runtime::Module>& mod
       }
     }
   }
+
   auto n = make_object<CSourceCrtMetadataModuleNode>(func_names, "c", target, runtime, metadata);
   auto csrc_metadata_module = runtime::Module(n);
-  for (const auto& mod : modules) {
+  for (const auto& mod : final_modules) {
     csrc_metadata_module.Import(mod);
   }
+
   return std::move(csrc_metadata_module);
 }
 
@@ -835,10 +882,7 @@ runtime::Module CreateCSourceCppMetadataModule(runtime::metadata::Metadata metad
   lookup_func << "};" << std::endl;
 
   auto mod = MetadataModuleCreate(metadata);
-  std::vector<String> func_names{::tvm::runtime::symbol::tvm_get_c_metadata};
-  auto c = CSourceModuleCreate(serializer.GetOutput() + lookup_func.str(), "c", func_names,
-                               Array<String>());
-  mod->Import(c);
+  mod->Import(CreateAotMetadataModule(metadata, false));
   return mod;
 }
 
@@ -908,7 +952,8 @@ TVM_REGISTER_GLOBAL("runtime.CreateCSourceCrtMetadataModule")
                        relay::Runtime runtime) {
       // Note that we don't need metadata when we compile a single operator
       return CreateCSourceCrtMetadataModule(modules, target, runtime,
-                                            relay::backend::ExecutorCodegenMetadata());
+                                            relay::backend::ExecutorCodegenMetadata(),
+                                            runtime::metadata::Metadata());
     });
 
 }  // namespace codegen
diff --git a/src/target/source/source_module.h b/src/target/source/source_module.h
index 2a63a8eeb814..e01445ce2ca5 100644
--- a/src/target/source/source_module.h
+++ b/src/target/source/source_module.h
@@ -43,11 +43,13 @@ namespace codegen {
  * \param target the target the modules are compiled for.
  * \param runtime the runtime to code generate against
  * \param metadata Compiler-generated metadata exported to runtime.
+ * \param aot_metadata If supplied, metadata for the AOTExecutor module.
  * \return The wrapped module.
  */
 runtime::Module CreateCSourceCrtMetadataModule(const Array<runtime::Module>& modules, Target target,
                                                relay::Runtime runtime,
-                                               relay::backend::ExecutorCodegenMetadata metadata);
+                                               relay::backend::ExecutorCodegenMetadata metadata,
+                                               runtime::metadata::Metadata aot_metadata);
 
 /*!
  * \brief Create C++-runtime targeted metadata module for "c" backend.
diff --git a/tests/micro/arduino/test_arduino_workflow.py b/tests/micro/arduino/test_arduino_workflow.py
index feccafa727d3..d566a44c0756 100644
--- a/tests/micro/arduino/test_arduino_workflow.py
+++ b/tests/micro/arduino/test_arduino_workflow.py
@@ -71,7 +71,7 @@ def test_project_folder_structure(project_dir, project):
 def test_project_model_integrity(project_dir, project):
     model_dir = project_dir / "src" / "model"
     assert _get_directory_elements(model_dir) == set(
-        ["default_lib0.c", "default_lib1.c", "model.tar"]
+        ["default_lib0.c", "default_lib1.c", "default_lib2.c", "model.tar"]
     )
 
 
diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py
index 365dbdb6bf23..bd783b00fa51 100644
--- a/tests/python/driver/tvmc/test_compiler.py
+++ b/tests/python/driver/tvmc/test_compiler.py
@@ -427,7 +427,7 @@ def test_compile_tflite_module_with_external_codegen_cmsisnn(
             for name in mlf_package.getnames()
             if re.match(r"\./codegen/host/src/\D+\d+\.c", name)
         ]
-        assert len(c_source_files) == 3
+        assert len(c_source_files) == 4
 
 
 @pytest.mark.skipif(
@@ -510,8 +510,8 @@ def test_compile_tflite_module_with_external_codegen_ethosu(
             # The number of c_source_files depends on the number of fused subgraphs that
             # get offloaded to the NPU, e.g. conv2d->depthwise_conv2d->conv2d gets offloaded
             # as a single subgraph if both of these operators are supported by the NPU.
-            # Currently there are two source files for CPU execution and one offload graph
-            assert len(c_source_files) == 3
+            # Currently there are three source files for CPU execution and one offload graph
+            assert len(c_source_files) == 4
 
 
 @mock.patch("tvm.relay.build")
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index bb535fb2441c..3a93dbc89b1f 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -32,6 +32,7 @@
 import tvm.testing
 from tvm.target import Target
 from tvm.relay.backend import Runtime
+from tvm.relay.backend import Executor
 
 from tvm.topi.utils import get_const_tuple
 from tvm.topi.testing import conv2d_nchw_python
@@ -149,20 +150,89 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
         factory = tvm.relay.build(relay_mod, target=TARGET, runtime=runtime)
 
-    with _make_session(temp_dir, factory) as sess:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            factory.get_graph_json(), sess.get_system_lib(), sess.device
-        )
+    def do_test(graph_mod):
+
         A_data = tvm.nd.array(np.array([2, 3], dtype="uint8"), device=sess.device)
         assert (A_data.numpy() == np.array([2, 3])).all()
         B_data = tvm.nd.array(np.array([4, 7], dtype="uint8"), device=sess.device)
         assert (B_data.numpy() == np.array([4, 7])).all()
 
+        assert graph_mod.get_input_index("a") == 0
+        assert graph_mod.get_input_index("b") == 1
+
         graph_mod.run(a=A_data, b=B_data)
 
         out = graph_mod.get_output(0)
         assert (out.numpy() == np.array([6, 10])).all()
 
+    with _make_session(temp_dir, factory) as sess:
+
+        graph_mod_local = tvm.micro.create_local_graph_executor(
+            factory.get_graph_json(), sess.get_system_lib(), sess.device
+        )
+
+        do_test(graph_mod_local)
+
+        graph_mod = tvm.contrib.graph_executor.create(
+            factory.get_graph_json(), sess.get_system_lib(), sess.device
+        )
+
+        do_test(graph_mod)
+
+
+@tvm.testing.requires_micro
+def test_aot_executor():
+    """Test use of the AOT executor with microTVM."""
+
+    ws_root = pathlib.Path(os.path.dirname(__file__) + "/micro-workspace")
+    if ws_root.exists():
+        shutil.rmtree(ws_root)
+    temp_dir = tvm.contrib.utils.tempdir(ws_root.resolve())
+    relay_mod = tvm.parser.fromtext(
+        """
+      #[version = "0.0.5"]
+      def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
+          %0 = %a + %b;
+          %0
+      }"""
+    )
+
+    runtime = Runtime("crt", {"system-lib": True})
+    executor = Executor("aot")
+    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
+        factory = tvm.relay.build(relay_mod, target=TARGET, runtime=runtime, executor=executor)
+
+    def do_test():
+        aot_executor = tvm.runtime.executor.aot_executor.AotModule(
+            sess._rpc.get_function("tvm.aot_executor.create")(
+                sess.get_system_lib(), sess.device, "default"
+            )
+        )
+
+        assert aot_executor.get_input_index("a") == 0
+        assert aot_executor.get_input_index("b") == 1
+
+        assert aot_executor.get_num_inputs() == 2
+        assert aot_executor.get_num_outputs() == 1
+
+        A_np = np.array([[2, 3]], dtype="uint8")
+        B_np = np.array([[4, 7]], dtype="uint8")
+
+        A_data = aot_executor.get_input("a").copyfrom(A_np)
+        B_data = aot_executor.get_input("b").copyfrom(B_np)
+
+        aot_executor.run()
+
+        out = aot_executor.get_output(0)
+        assert (out.numpy() == np.array([6, 10])).all()
+
+        B_np_new = np.array([[5, 8]])
+        aot_executor.set_input("b", B_np_new)
+        assert (B_data.numpy() == B_np_new).all()
+
+    with _make_session(temp_dir, factory) as sess:
+        do_test()
+
 
 @tvm.testing.requires_micro
 def test_std_math_functions():