From 4344ca1d90dc806e9cdebcd8d6ec74b46ea4b47f Mon Sep 17 00:00:00 2001
From: Andrew Tulloch <andrew@tullo.ch>
Date: Wed, 17 Jul 2019 16:27:40 -0700
Subject: [PATCH] [RFC] [Contrib] Minimal runtime (~12kb .text on ARMv7/x86)
 for subset of TVM models

This is an alternative implementation of a subset of the TVM runtime API (and
graph runtime) that focuses entirely on reducing code size, at the expense of
functionality (no tvm.extern(..) calls via PackedFunc, CPU only, etc). It might
be worth incrementally expanding the surface area if there's interest.

The motivation for this work was seeing what the minimal useful subset of the
TVM runtime is. This is relevant for e.g. super code-size constrained
applications in e.g. embedded/mobile. The current runtime is more like O(100KiB)
or so, so this might be compelling for some users.

The smaller surface area for auditing might make this relevant for
https://github.com/dmlc/tvm/issues/3159, or the usecases I was thinking about in
https://github.com/dmlc/tvm/issues/2523#issuecomment-459165815 re: the Rust
runtime.

The symbols in the tvm::minimalruntime space (i.e. excluding std:: and
picojson::) are about 5KiB, so I think there's a bunch of room here (i.e. we
could replace picojson:: with [`jsmn`](https://zserge.com/jsmn.html) or
something, and we could replace more of the `std::unordered_map` usage, etc with
custom primitives as well (similar to the `DynArray`).
---
 3rdparty/picojson/README.md                   |   20 +
 3rdparty/picojson/picojson.h                  | 1204 +++++++++++++++++
 CMakeLists.txt                                |    4 +
 cmake/config.cmake                            |    3 +
 .../contrib/MicroStandaloneRuntime.cmake      |   23 +
 .../runtime/micro/standalone/utvm_runtime.h   |   44 +
 src/runtime/micro/standalone/README.md        |   18 +
 src/runtime/micro/standalone/minimal_vector.h |  107 ++
 .../micro/standalone/utvm_graph_runtime.cc    |  395 ++++++
 .../micro/standalone/utvm_graph_runtime.h     |  167 +++
 src/runtime/micro/standalone/utvm_runtime.cc  |   53 +
 .../micro/standalone/utvm_runtime_api.cc      |   54 +
 .../micro/standalone/utvm_runtime_api.h       |   53 +
 tests/cpp/utvm_runtime_standalone_test.cc     |  137 ++
 14 files changed, 2282 insertions(+)
 create mode 100644 3rdparty/picojson/README.md
 create mode 100644 3rdparty/picojson/picojson.h
 create mode 100644 cmake/modules/contrib/MicroStandaloneRuntime.cmake
 create mode 100644 include/tvm/runtime/micro/standalone/utvm_runtime.h
 create mode 100644 src/runtime/micro/standalone/README.md
 create mode 100644 src/runtime/micro/standalone/minimal_vector.h
 create mode 100644 src/runtime/micro/standalone/utvm_graph_runtime.cc
 create mode 100644 src/runtime/micro/standalone/utvm_graph_runtime.h
 create mode 100644 src/runtime/micro/standalone/utvm_runtime.cc
 create mode 100644 src/runtime/micro/standalone/utvm_runtime_api.cc
 create mode 100644 src/runtime/micro/standalone/utvm_runtime_api.h
 create mode 100644 tests/cpp/utvm_runtime_standalone_test.cc

diff --git a/3rdparty/picojson/README.md b/3rdparty/picojson/README.md
new file mode 100644
index 000000000000..0170437089e0
--- /dev/null
+++ b/3rdparty/picojson/README.md
@@ -0,0 +1,20 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+## Notes
+
+`picojson.h` is derived from https://github.com/kazuho/picojson.
diff --git a/3rdparty/picojson/picojson.h b/3rdparty/picojson/picojson.h
new file mode 100644
index 000000000000..90093cb534b4
--- /dev/null
+++ b/3rdparty/picojson/picojson.h
@@ -0,0 +1,1204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright 2009-2010 Cybozu Labs, Inc.
+ * Copyright 2011-2014 Kazuho Oku
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+// for isnan/isinf
+#if __cplusplus >= 201103L
+#include <cmath>
+#else
+extern "C" {
+#ifdef _MSC_VER
+#include <float.h>
+#elif defined(__INTEL_COMPILER)
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+}
+#endif
+
+#ifndef PICOJSON_USE_RVALUE_REFERENCE
+#if (defined(__cpp_rvalue_references) && __cpp_rvalue_references >= 200610) || \
+    (defined(_MSC_VER) && _MSC_VER >= 1600)
+#define PICOJSON_USE_RVALUE_REFERENCE 1
+#else
+#define PICOJSON_USE_RVALUE_REFERENCE 0
+#endif
+#endif  // PICOJSON_USE_RVALUE_REFERENCE
+
+#ifndef PICOJSON_NOEXCEPT
+#if PICOJSON_USE_RVALUE_REFERENCE
+#define PICOJSON_NOEXCEPT noexcept
+#else
+#define PICOJSON_NOEXCEPT throw()
+#endif
+#endif
+
+// experimental support for int64_t (see README.mkdn for detail)
+#ifdef PICOJSON_USE_INT64
+#define __STDC_FORMAT_MACROS
+#include <errno.h>
+#include <inttypes.h>
+#endif
+
+// to disable the use of localeconv(3), set PICOJSON_USE_LOCALE to 0
+#ifndef PICOJSON_USE_LOCALE
+#define PICOJSON_USE_LOCALE 1
+#endif
+#if PICOJSON_USE_LOCALE
+extern "C" {
+#include <locale.h>
+}
+#endif
+
+#ifndef PICOJSON_ASSERT
+#ifndef PICOJSON_DISABLE_EXCEPTION
+#define PICOJSON_ASSERT(e)                  \
+  do {                                      \
+    if (!(e)) throw std::runtime_error(#e); \
+  } while (0)
+#else
+#define PICOJSON_ASSERT(e)  \
+  do {                      \
+    if (!(e)) std::abort(); \
+  } while (0)
+#endif  // PICOJSON_DISABLE_EXCEPTION
+#endif
+
+#ifdef _MSC_VER
+#define SNPRINTF _snprintf_s
+#pragma warning(push)
+#pragma warning(disable : 4244)  // conversion from int to char
+#pragma warning(disable : 4127)  // conditional expression is constant
+#pragma warning(disable : 4702)  // unreachable code
+#else
+#define SNPRINTF snprintf
+#endif
+
+namespace picojson {
+
+enum {
+  null_type,
+  boolean_type,
+  number_type,
+  string_type,
+  array_type,
+  object_type
+#ifdef PICOJSON_USE_INT64
+  ,
+  int64_type
+#endif
+};
+
+enum { INDENT_WIDTH = 2 };
+
+struct null {};
+
+class value {
+ public:
+  typedef std::vector<value> array;
+  typedef std::unordered_map<std::string, value> object;
+  union _storage {
+    bool boolean_;
+    double number_;
+#ifdef PICOJSON_USE_INT64
+    int64_t int64_;
+#endif
+    std::string* string_;
+    array* array_;
+    object* object_;
+  };
+
+ protected:
+  int type_;
+  _storage u_;
+
+ public:
+  value();
+  value(int type, bool);
+  explicit value(bool b);
+#ifdef PICOJSON_USE_INT64
+  explicit value(int64_t i);
+#endif
+  explicit value(double n);
+  explicit value(const std::string& s);
+  explicit value(const array& a);
+  explicit value(const object& o);
+#if PICOJSON_USE_RVALUE_REFERENCE
+  explicit value(std::string&& s);
+  explicit value(array&& a);
+  explicit value(object&& o);
+#endif
+  explicit value(const char* s);
+  value(const char* s, size_t len);
+  ~value();
+  value(const value& x);
+  value& operator=(const value& x);
+#if PICOJSON_USE_RVALUE_REFERENCE
+  value(value&& x) PICOJSON_NOEXCEPT;
+  value& operator=(value&& x) PICOJSON_NOEXCEPT;
+#endif
+  void swap(value& x) PICOJSON_NOEXCEPT;
+  template <typename T>
+  bool is() const;
+  template <typename T>
+  const T& get() const;
+  template <typename T>
+  T& get();
+  template <typename T>
+  void set(const T&);
+#if PICOJSON_USE_RVALUE_REFERENCE
+  template <typename T>
+  void set(T&&);
+#endif
+  bool evaluate_as_boolean() const;
+  const value& get(const size_t idx) const;
+  const value& get(const std::string& key) const;
+  value& get(const size_t idx);
+  value& get(const std::string& key);
+
+  bool contains(const size_t idx) const;
+  bool contains(const std::string& key) const;
+  std::string to_str() const;
+  template <typename Iter>
+  void serialize(Iter os, bool prettify = false) const;
+  std::string serialize(bool prettify = false) const;
+
+ private:
+  template <typename T>
+  // NOLINTNEXTLINE(runtime/explicit)
+  value(const T*);  // intentionally defined to block implicit conversion of
+                    // pointer to bool
+  template <typename Iter>
+  static void _indent(Iter os, int indent);
+  template <typename Iter>
+  void _serialize(Iter os, int indent) const;
+  std::string _serialize(int indent) const;
+  void clear();
+};
+
+typedef value::array array;
+typedef value::object object;
+
+inline value::value() : type_(null_type), u_() {}
+
+inline value::value(int type, bool) : type_(type), u_() {
+  switch (type) {
+#define INIT(p, v) \
+  case p##type:    \
+    u_.p = v;      \
+    break
+    INIT(boolean_, false);
+    INIT(number_, 0.0);
+#ifdef PICOJSON_USE_INT64
+    INIT(int64_, 0);
+#endif
+    INIT(string_, new std::string());
+    INIT(array_, new array());
+    INIT(object_, new object());
+#undef INIT
+    default:
+      break;
+  }
+}
+
+inline value::value(bool b) : type_(boolean_type), u_() { u_.boolean_ = b; }
+
+#ifdef PICOJSON_USE_INT64
+inline value::value(int64_t i) : type_(int64_type), u_() { u_.int64_ = i; }
+#endif
+
+inline value::value(double n) : type_(number_type), u_() {
+  if (
+#ifdef _MSC_VER
+      !_finite(n)
+#elif __cplusplus >= 201103L
+      std::isnan(n) || std::isinf(n)
+#else
+      isnan(n) || isinf(n)
+#endif
+  ) {
+#ifndef PICOJSON_DISABLE_EXCEPTION
+    throw std::overflow_error("");
+#else
+    std::abort();
+#endif
+  }
+  u_.number_ = n;
+}
+
+inline value::value(const std::string& s) : type_(string_type), u_() {
+  u_.string_ = new std::string(s);
+}
+
+inline value::value(const array& a) : type_(array_type), u_() { u_.array_ = new array(a); }
+
+inline value::value(const object& o) : type_(object_type), u_() { u_.object_ = new object(o); }
+
+#if PICOJSON_USE_RVALUE_REFERENCE
+inline value::value(std::string&& s) : type_(string_type), u_() {
+  u_.string_ = new std::string(std::move(s));
+}
+
+inline value::value(array&& a) : type_(array_type), u_() { u_.array_ = new array(std::move(a)); }
+
+inline value::value(object&& o) : type_(object_type), u_() {
+  u_.object_ = new object(std::move(o));
+}
+#endif
+
+inline value::value(const char* s) : type_(string_type), u_() { u_.string_ = new std::string(s); }
+
+inline value::value(const char* s, size_t len) : type_(string_type), u_() {
+  u_.string_ = new std::string(s, len);
+}
+
+inline void value::clear() {
+  switch (type_) {
+#define DEINIT(p) \
+  case p##type:   \
+    delete u_.p;  \
+    break
+    DEINIT(string_);
+    DEINIT(array_);
+    DEINIT(object_);
+#undef DEINIT
+    default:
+      break;
+  }
+}
+
+inline value::~value() { clear(); }
+
+inline value::value(const value& x) : type_(x.type_), u_() {
+  switch (type_) {
+#define INIT(p, v) \
+  case p##type:    \
+    u_.p = v;      \
+    break
+    INIT(string_, new std::string(*x.u_.string_));
+    INIT(array_, new array(*x.u_.array_));
+    INIT(object_, new object(*x.u_.object_));
+#undef INIT
+    default:
+      u_ = x.u_;
+      break;
+  }
+}
+
+inline value& value::operator=(const value& x) {
+  if (this != &x) {
+    value t(x);
+    swap(t);
+  }
+  return *this;
+}
+
+#if PICOJSON_USE_RVALUE_REFERENCE
+inline value::value(value&& x) PICOJSON_NOEXCEPT : type_(null_type), u_() { swap(x); }
+inline value& value::operator=(value&& x) PICOJSON_NOEXCEPT {
+  swap(x);
+  return *this;
+}
+#endif
+inline void value::swap(value& x) PICOJSON_NOEXCEPT {
+  std::swap(type_, x.type_);
+  std::swap(u_, x.u_);
+}
+
+#define IS(ctype, jtype)                 \
+  template <>                            \
+  inline bool value::is<ctype>() const { \
+    return type_ == jtype##_type;        \
+  }
+IS(null, null)
+IS(bool, boolean)
+#ifdef PICOJSON_USE_INT64
+IS(int64_t, int64)
+#endif
+IS(std::string, string)
+IS(array, array)
+IS(object, object)
+#undef IS
+template <>
+inline bool value::is<double>() const {
+  return type_ == number_type
+#ifdef PICOJSON_USE_INT64
+         || type_ == int64_type
+#endif
+      // NOLINTNEXTLINE(whitespace/semicolon)
+      ;
+}
+
+#define GET(ctype, var)                                                                  \
+  template <>                                                                            \
+  inline const ctype& value::get<ctype>() const {                                        \
+    PICOJSON_ASSERT("type mismatch! call is<type>() before get<type>()" && is<ctype>()); \
+    return var;                                                                          \
+  }                                                                                      \
+  template <>                                                                            \
+  inline ctype& value::get<ctype>() {                                                    \
+    PICOJSON_ASSERT("type mismatch! call is<type>() before get<type>()" && is<ctype>()); \
+    return var;                                                                          \
+  }
+GET(bool, u_.boolean_)
+GET(std::string, *u_.string_)
+GET(array, *u_.array_)
+GET(object, *u_.object_)
+#ifdef PICOJSON_USE_INT64
+GET(double, (type_ == int64_type && (const_cast<value*>(this)->type_ = number_type,
+                                     const_cast<value*>(this)->u_.number_ = u_.int64_),
+             u_.number_))
+GET(int64_t, u_.int64_)
+#else
+GET(double, u_.number_)
+#endif
+#undef GET
+
+#define SET(ctype, jtype, setter)                    \
+  template <>                                        \
+  inline void value::set<ctype>(const ctype& _val) { \
+    clear();                                         \
+    type_ = jtype##_type;                            \
+    setter                                           \
+  }
+SET(bool, boolean, u_.boolean_ = _val;)
+SET(std::string, string, u_.string_ = new std::string(_val);)
+SET(array, array, u_.array_ = new array(_val);)
+SET(object, object, u_.object_ = new object(_val);)
+SET(double, number, u_.number_ = _val;)
+#ifdef PICOJSON_USE_INT64
+SET(int64_t, int64, u_.int64_ = _val;)
+#endif
+#undef SET
+
+#if PICOJSON_USE_RVALUE_REFERENCE
+#define MOVESET(ctype, jtype, setter)            \
+  template <>                                    \
+  inline void value::set<ctype>(ctype && _val) { \
+    clear();                                     \
+    type_ = jtype##_type;                        \
+    setter                                       \
+  }
+MOVESET(std::string, string, u_.string_ = new std::string(std::move(_val));)
+MOVESET(array, array, u_.array_ = new array(std::move(_val));)
+MOVESET(object, object, u_.object_ = new object(std::move(_val));)
+#undef MOVESET
+#endif
+
+inline bool value::evaluate_as_boolean() const {
+  switch (type_) {
+    case null_type:
+      return false;
+    case boolean_type:
+      return u_.boolean_;
+    case number_type:
+      return u_.number_ != 0;
+#ifdef PICOJSON_USE_INT64
+    case int64_type:
+      return u_.int64_ != 0;
+#endif
+    case string_type:
+      return !u_.string_->empty();
+    default:
+      return true;
+  }
+}
+
+inline const value& value::get(const size_t idx) const {
+  static value s_null;
+  PICOJSON_ASSERT(is<array>());
+  return idx < u_.array_->size() ? (*u_.array_)[idx] : s_null;
+}
+
+inline value& value::get(const size_t idx) {
+  static value s_null;
+  PICOJSON_ASSERT(is<array>());
+  return idx < u_.array_->size() ? (*u_.array_)[idx] : s_null;
+}
+
+inline const value& value::get(const std::string& key) const {
+  static value s_null;
+  PICOJSON_ASSERT(is<object>());
+  object::const_iterator i = u_.object_->find(key);
+  return i != u_.object_->end() ? i->second : s_null;
+}
+
+inline value& value::get(const std::string& key) {
+  static value s_null;
+  PICOJSON_ASSERT(is<object>());
+  object::iterator i = u_.object_->find(key);
+  return i != u_.object_->end() ? i->second : s_null;
+}
+
+inline bool value::contains(const size_t idx) const {
+  PICOJSON_ASSERT(is<array>());
+  return idx < u_.array_->size();
+}
+
+inline bool value::contains(const std::string& key) const {
+  PICOJSON_ASSERT(is<object>());
+  object::const_iterator i = u_.object_->find(key);
+  return i != u_.object_->end();
+}
+
+inline std::string value::to_str() const {
+  switch (type_) {
+    case null_type:
+      return "null";
+    case boolean_type:
+      return u_.boolean_ ? "true" : "false";
+#ifdef PICOJSON_USE_INT64
+    case int64_type: {
+      char buf[sizeof("-9223372036854775808")];
+      SNPRINTF(buf, sizeof(buf), "%" PRId64, u_.int64_);
+      return buf;
+    }
+#endif
+    case number_type: {
+      char buf[256];
+      double tmp;
+      SNPRINTF(buf, sizeof(buf),
+               fabs(u_.number_) < (1ULL << 53) && modf(u_.number_, &tmp) == 0 ? "%.f" : "%.17g",
+               u_.number_);
+#if PICOJSON_USE_LOCALE
+      char* decimal_point = localeconv()->decimal_point;
+      if (strcmp(decimal_point, ".") != 0) {
+        size_t decimal_point_len = strlen(decimal_point);
+        for (char* p = buf; *p != '\0'; ++p) {
+          if (strncmp(p, decimal_point, decimal_point_len) == 0) {
+            return std::string(buf, p) + "." + (p + decimal_point_len);
+          }
+        }
+      }
+#endif
+      return buf;
+    }
+    case string_type:
+      return *u_.string_;
+    case array_type:
+      return "array";
+    case object_type:
+      return "object";
+    default:
+      PICOJSON_ASSERT(0);
+#ifdef _MSC_VER
+      __assume(0);
+#endif
+  }
+  return std::string();
+}
+
+template <typename Iter>
+void copy(const std::string& s, Iter oi) {
+  std::copy(s.begin(), s.end(), oi);
+}
+
+template <typename Iter>
+struct serialize_str_char {
+  Iter oi;
+  void operator()(char c) {
+    switch (c) {
+#define MAP(val, sym) \
+  case val:           \
+    copy(sym, oi);    \
+    break
+      MAP('"', "\\\"");
+      MAP('\\', "\\\\");
+      MAP('/', "\\/");
+      MAP('\b', "\\b");
+      MAP('\f', "\\f");
+      MAP('\n', "\\n");
+      MAP('\r', "\\r");
+      MAP('\t', "\\t");
+#undef MAP
+      default:
+        if (static_cast<unsigned char>(c) < 0x20 || c == 0x7f) {
+          char buf[7];
+          SNPRINTF(buf, sizeof(buf), "\\u%04x", c & 0xff);
+          copy(buf, buf + 6, oi);
+        } else {
+          *oi++ = c;
+        }
+        break;
+    }
+  }
+};
+
+template <typename Iter>
+void serialize_str(const std::string& s, Iter oi) {
+  *oi++ = '"';
+  serialize_str_char<Iter> process_char = {oi};
+  std::for_each(s.begin(), s.end(), process_char);
+  *oi++ = '"';
+}
+
+template <typename Iter>
+void value::serialize(Iter oi, bool prettify) const {
+  return _serialize(oi, prettify ? 0 : -1);
+}
+
+inline std::string value::serialize(bool prettify) const { return _serialize(prettify ? 0 : -1); }
+
+template <typename Iter>
+void value::_indent(Iter oi, int indent) {
+  *oi++ = '\n';
+  for (int i = 0; i < indent * INDENT_WIDTH; ++i) {
+    *oi++ = ' ';
+  }
+}
+
+template <typename Iter>
+void value::_serialize(Iter oi, int indent) const {
+  switch (type_) {
+    case string_type:
+      serialize_str(*u_.string_, oi);
+      break;
+    case array_type: {
+      *oi++ = '[';
+      if (indent != -1) {
+        ++indent;
+      }
+      for (array::const_iterator i = u_.array_->begin(); i != u_.array_->end(); ++i) {
+        if (i != u_.array_->begin()) {
+          *oi++ = ',';
+        }
+        if (indent != -1) {
+          _indent(oi, indent);
+        }
+        i->_serialize(oi, indent);
+      }
+      if (indent != -1) {
+        --indent;
+        if (!u_.array_->empty()) {
+          _indent(oi, indent);
+        }
+      }
+      *oi++ = ']';
+      break;
+    }
+    case object_type: {
+      *oi++ = '{';
+      if (indent != -1) {
+        ++indent;
+      }
+      for (object::const_iterator i = u_.object_->begin(); i != u_.object_->end(); ++i) {
+        if (i != u_.object_->begin()) {
+          *oi++ = ',';
+        }
+        if (indent != -1) {
+          _indent(oi, indent);
+        }
+        serialize_str(i->first, oi);
+        *oi++ = ':';
+        if (indent != -1) {
+          *oi++ = ' ';
+        }
+        i->second._serialize(oi, indent);
+      }
+      if (indent != -1) {
+        --indent;
+        if (!u_.object_->empty()) {
+          _indent(oi, indent);
+        }
+      }
+      *oi++ = '}';
+      break;
+    }
+    default:
+      copy(to_str(), oi);
+      break;
+  }
+  if (indent == 0) {
+    *oi++ = '\n';
+  }
+}
+
+inline std::string value::_serialize(int indent) const {
+  std::string s;
+  _serialize(std::back_inserter(s), indent);
+  return s;
+}
+
+template <typename Iter>
+class input {
+ protected:
+  Iter cur_, end_;
+  bool consumed_;
+  int line_;
+
+ public:
+  input(const Iter& first, const Iter& last)
+      : cur_(first), end_(last), consumed_(false), line_(1) {}
+  int getc() {
+    if (consumed_) {
+      if (*cur_ == '\n') {
+        ++line_;
+      }
+      ++cur_;
+    }
+    if (cur_ == end_) {
+      consumed_ = false;
+      return -1;
+    }
+    consumed_ = true;
+    return *cur_ & 0xff;
+  }
+  void ungetc() { consumed_ = false; }
+  Iter cur() const {
+    if (consumed_) {
+      input<Iter>* self = const_cast<input<Iter>*>(this);
+      self->consumed_ = false;
+      ++self->cur_;
+    }
+    return cur_;
+  }
+  int line() const { return line_; }
+  void skip_ws() {
+    while (1) {
+      int ch = getc();
+      if (!(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')) {
+        ungetc();
+        break;
+      }
+    }
+  }
+  bool expect(const int expected) {
+    skip_ws();
+    if (getc() != expected) {
+      ungetc();
+      return false;
+    }
+    return true;
+  }
+  bool match(const std::string& pattern) {
+    for (std::string::const_iterator pi(pattern.begin()); pi != pattern.end(); ++pi) {
+      if (getc() != *pi) {
+        ungetc();
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+template <typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline int _parse_quadhex(input<Iter>& in) {
+  int uni_ch = 0, hex;
+  for (int i = 0; i < 4; i++) {
+    if ((hex = in.getc()) == -1) {
+      return -1;
+    }
+    if ('0' <= hex && hex <= '9') {
+      hex -= '0';
+    } else if ('A' <= hex && hex <= 'F') {
+      hex -= 'A' - 0xa;
+    } else if ('a' <= hex && hex <= 'f') {
+      hex -= 'a' - 0xa;
+    } else {
+      in.ungetc();
+      return -1;
+    }
+    uni_ch = uni_ch * 16 + hex;
+  }
+  return uni_ch;
+}
+
+template <typename String, typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline bool _parse_codepoint(String& out, input<Iter>& in) {
+  int uni_ch;
+  if ((uni_ch = _parse_quadhex(in)) == -1) {
+    return false;
+  }
+  if (0xd800 <= uni_ch && uni_ch <= 0xdfff) {
+    if (0xdc00 <= uni_ch) {
+      // a second 16-bit of a surrogate pair appeared
+      return false;
+    }
+    // first 16-bit of surrogate pair, get the next one
+    if (in.getc() != '\\' || in.getc() != 'u') {
+      in.ungetc();
+      return false;
+    }
+    int second = _parse_quadhex(in);
+    if (!(0xdc00 <= second && second <= 0xdfff)) {
+      return false;
+    }
+    uni_ch = ((uni_ch - 0xd800) << 10) | ((second - 0xdc00) & 0x3ff);
+    uni_ch += 0x10000;
+  }
+  if (uni_ch < 0x80) {
+    out.push_back(static_cast<char>(uni_ch));
+  } else {
+    if (uni_ch < 0x800) {
+      out.push_back(static_cast<char>(0xc0 | (uni_ch >> 6)));
+    } else {
+      if (uni_ch < 0x10000) {
+        out.push_back(static_cast<char>(0xe0 | (uni_ch >> 12)));
+      } else {
+        out.push_back(static_cast<char>(0xf0 | (uni_ch >> 18)));
+        out.push_back(static_cast<char>(0x80 | ((uni_ch >> 12) & 0x3f)));
+      }
+      out.push_back(static_cast<char>(0x80 | ((uni_ch >> 6) & 0x3f)));
+    }
+    out.push_back(static_cast<char>(0x80 | (uni_ch & 0x3f)));
+  }
+  return true;
+}
+
+template <typename String, typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline bool _parse_string(String& out, input<Iter>& in) {
+  while (1) {
+    int ch = in.getc();
+    if (ch < ' ') {
+      in.ungetc();
+      return false;
+    } else if (ch == '"') {
+      return true;
+    } else if (ch == '\\') {
+      if ((ch = in.getc()) == -1) {
+        return false;
+      }
+      switch (ch) {
+#define MAP(sym, val)   \
+  case sym:             \
+    out.push_back(val); \
+    break
+        MAP('"', '\"');
+        MAP('\\', '\\');
+        MAP('/', '/');
+        MAP('b', '\b');
+        MAP('f', '\f');
+        MAP('n', '\n');
+        MAP('r', '\r');
+        MAP('t', '\t');
+#undef MAP
+        case 'u':
+          if (!_parse_codepoint(out, in)) {
+            return false;
+          }
+          break;
+        default:
+          return false;
+      }
+    } else {
+      out.push_back(static_cast<char>(ch));
+    }
+  }
+  return false;
+}
+
+template <typename Context, typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline bool _parse_array(Context& ctx, input<Iter>& in) {
+  if (!ctx.parse_array_start()) {
+    return false;
+  }
+  size_t idx = 0;
+  if (in.expect(']')) {
+    return ctx.parse_array_stop(idx);
+  }
+  do {
+    if (!ctx.parse_array_item(in, idx)) {
+      return false;
+    }
+    idx++;
+  } while (in.expect(','));
+  return in.expect(']') && ctx.parse_array_stop(idx);
+}
+
+template <typename Context, typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline bool _parse_object(Context& ctx, input<Iter>& in) {
+  if (!ctx.parse_object_start()) {
+    return false;
+  }
+  if (in.expect('}')) {
+    return true;
+  }
+  do {
+    std::string key;
+    if (!in.expect('"') || !_parse_string(key, in) || !in.expect(':')) {
+      return false;
+    }
+    if (!ctx.parse_object_item(in, key)) {
+      return false;
+    }
+  } while (in.expect(','));
+  return in.expect('}');
+}
+
+template <typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline std::string _parse_number(input<Iter>& in) {
+  std::string num_str;
+  while (1) {
+    int ch = in.getc();
+    if (('0' <= ch && ch <= '9') || ch == '+' || ch == '-' || ch == 'e' || ch == 'E') {
+      num_str.push_back(static_cast<char>(ch));
+    } else if (ch == '.') {
+#if PICOJSON_USE_LOCALE
+      num_str += localeconv()->decimal_point;
+#else
+      num_str.push_back('.');
+#endif
+    } else {
+      in.ungetc();
+      break;
+    }
+  }
+  return num_str;
+}
+
+template <typename Context, typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline bool _parse(Context& ctx, input<Iter>& in) {
+  in.skip_ws();
+  int ch = in.getc();
+  switch (ch) {
+#define IS(ch, text, op)        \
+  case ch:                      \
+    if (in.match(text) && op) { \
+      return true;              \
+    } else {                    \
+      return false;             \
+    }
+    IS('n', "ull", ctx.set_null());
+    IS('f', "alse", ctx.set_bool(false));
+    IS('t', "rue", ctx.set_bool(true));
+#undef IS
+    case '"':
+      return ctx.parse_string(in);
+    case '[':
+      return _parse_array(ctx, in);
+    case '{':
+      return _parse_object(ctx, in);
+    default:
+      if (('0' <= ch && ch <= '9') || ch == '-') {
+        double f;
+        char* endp;
+        in.ungetc();
+        std::string num_str(_parse_number(in));
+        if (num_str.empty()) {
+          return false;
+        }
+#ifdef PICOJSON_USE_INT64
+        {
+          errno = 0;
+          intmax_t ival = strtoimax(num_str.c_str(), &endp, 10);
+          if (errno == 0 && std::numeric_limits<int64_t>::min() <= ival &&
+              ival <= std::numeric_limits<int64_t>::max() &&
+              endp == num_str.c_str() + num_str.size()) {
+            ctx.set_int64(ival);
+            return true;
+          }
+        }
+#endif
+        f = strtod(num_str.c_str(), &endp);
+        if (endp == num_str.c_str() + num_str.size()) {
+          ctx.set_number(f);
+          return true;
+        }
+        return false;
+      }
+      break;
+  }
+  in.ungetc();
+  return false;
+}
+
+class deny_parse_context {
+ public:
+  bool set_null() { return false; }
+  bool set_bool(bool) { return false; }
+#ifdef PICOJSON_USE_INT64
+  bool set_int64(int64_t) { return false; }
+#endif
+  bool set_number(double) { return false; }
+  template <typename Iter>
+  bool parse_string(input<Iter>&) {
+    return false;
+  }
+  bool parse_array_start() { return false; }
+  template <typename Iter>
+  bool parse_array_item(input<Iter>&, size_t) {
+    return false;
+  }
+  bool parse_array_stop(size_t) { return false; }
+  bool parse_object_start() { return false; }
+  template <typename Iter>
+  bool parse_object_item(input<Iter>&, const std::string&) {
+    return false;
+  }
+};
+
+class default_parse_context {
+ protected:
+  value* out_;
+
+ public:
+  // NOLINTNEXTLINE(runtime/explicit)
+  default_parse_context(value* out) : out_(out) {}
+  bool set_null() {
+    *out_ = value();
+    return true;
+  }
+  bool set_bool(bool b) {
+    *out_ = value(b);
+    return true;
+  }
+#ifdef PICOJSON_USE_INT64
+  bool set_int64(int64_t i) {
+    *out_ = value(i);
+    return true;
+  }
+#endif
+  bool set_number(double f) {
+    *out_ = value(f);
+    return true;
+  }
+  template <typename Iter>
+  // NOLINTNEXTLINE(runtime/references)
+  bool parse_string(input<Iter>& in) {
+    *out_ = value(string_type, false);
+    return _parse_string(out_->get<std::string>(), in);
+  }
+  bool parse_array_start() {
+    *out_ = value(array_type, false);
+    return true;
+  }
+  template <typename Iter>
+  // NOLINTNEXTLINE(runtime/references)
+  bool parse_array_item(input<Iter>& in, size_t) {
+    array& a = out_->get<array>();
+    a.push_back(value());
+    default_parse_context ctx(&a.back());
+    return _parse(ctx, in);
+  }
+  bool parse_array_stop(size_t) { return true; }
+  bool parse_object_start() {
+    *out_ = value(object_type, false);
+    return true;
+  }
+  template <typename Iter>
+  // NOLINTNEXTLINE(runtime/references)
+  bool parse_object_item(input<Iter>& in, const std::string& key) {
+    object& o = out_->get<object>();
+    default_parse_context ctx(&o[key]);
+    return _parse(ctx, in);
+  }
+
+ private:
+  default_parse_context(const default_parse_context&);
+  default_parse_context& operator=(const default_parse_context&);
+};
+
+class null_parse_context {
+ public:
+  struct dummy_str {
+    void push_back(int) {}
+  };
+
+ public:
+  null_parse_context() {}
+  bool set_null() { return true; }
+  bool set_bool(bool) { return true; }
+#ifdef PICOJSON_USE_INT64
+  bool set_int64(int64_t) { return true; }
+#endif
+  bool set_number(double) { return true; }
+  template <typename Iter>
+  // NOLINTNEXTLINE(runtime/references)
+  bool parse_string(input<Iter>& in) {
+    dummy_str s;
+    return _parse_string(s, in);
+  }
+  bool parse_array_start() { return true; }
+  template <typename Iter>
+  // NOLINTNEXTLINE(runtime/references)
+  bool parse_array_item(input<Iter>& in, size_t) {
+    return _parse(*this, in);
+  }
+  bool parse_array_stop(size_t) { return true; }
+  bool parse_object_start() { return true; }
+  template <typename Iter>
+  // NOLINTNEXTLINE(runtime/references)
+  bool parse_object_item(input<Iter>& in, const std::string&) {
+    return _parse(*this, in);
+  }
+
+ private:
+  null_parse_context(const null_parse_context&);
+  null_parse_context& operator=(const null_parse_context&);
+};
+
+// obsolete, use the version below
+template <typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline std::string parse(value& out, Iter& pos, const Iter& last) {
+  std::string err;
+  pos = parse(out, pos, last, &err);
+  return err;
+}
+
+template <typename Context, typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline Iter _parse(Context& ctx, const Iter& first, const Iter& last, std::string* err) {
+  input<Iter> in(first, last);
+  if (!_parse(ctx, in) && err != NULL) {
+    char buf[64];
+    SNPRINTF(buf, sizeof(buf), "syntax error at line %d near: ", in.line());
+    *err = buf;
+    while (1) {
+      int ch = in.getc();
+      if (ch == -1 || ch == '\n') {
+        break;
+      } else if (ch >= ' ') {
+        err->push_back(static_cast<char>(ch));
+      }
+    }
+  }
+  return in.cur();
+}
+
+template <typename Iter>
+// NOLINTNEXTLINE(runtime/references)
+inline Iter parse(value& out, const Iter& first, const Iter& last, std::string* err) {
+  default_parse_context ctx(&out);
+  return _parse(ctx, first, last, err);
+}
+
+// NOLINTNEXTLINE(runtime/references)
+inline std::string parse(value& out, const std::string& s) {
+  std::string err;
+  parse(out, s.begin(), s.end(), &err);
+  return err;
+}
+
+// NOLINTNEXTLINE(runtime/references)
+inline std::string parse(value& out, std::istream& is) {
+  std::string err;
+  parse(out, std::istreambuf_iterator<char>(is.rdbuf()), std::istreambuf_iterator<char>(), &err);
+  return err;
+}
+
+template <typename T>
+struct last_error_t {
+  static std::string s;
+};
+template <typename T>
+// NOLINTNEXTLINE(runtime/string)
+std::string last_error_t<T>::s;
+
+inline void set_last_error(const std::string& s) { last_error_t<bool>::s = s; }
+
+inline const std::string& get_last_error() { return last_error_t<bool>::s; }
+
+inline bool operator==(const value& x, const value& y) {
+  if (x.is<null>()) return y.is<null>();
+#define PICOJSON_CMP(type) \
+  if (x.is<type>()) return y.is<type>() && x.get<type>() == y.get<type>()
+  PICOJSON_CMP(bool);
+  PICOJSON_CMP(double);
+  PICOJSON_CMP(std::string);
+  PICOJSON_CMP(array);
+  PICOJSON_CMP(object);
+#undef PICOJSON_CMP
+  PICOJSON_ASSERT(0);
+#ifdef _MSC_VER
+  __assume(0);
+#endif
+  return false;
+}
+
+inline bool operator!=(const value& x, const value& y) { return !(x == y); }
+}  // namespace picojson
+
+#if !PICOJSON_USE_RVALUE_REFERENCE
+namespace std {
+template <>
+inline void swap(picojson::value& x, picojson::value& y) {
+  x.swap(y);
+}
+}  // namespace std
+#endif
+
+inline std::istream& operator>>(std::istream& is, picojson::value& x) {
+  picojson::set_last_error(std::string());
+  const std::string err(picojson::parse(x, is));
+  if (!err.empty()) {
+    picojson::set_last_error(err);
+    is.setstate(std::ios::failbit);
+  }
+  return is;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const picojson::value& x) {
+  x.serialize(std::ostream_iterator<char>(os));
+  return os;
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32898e68f790..754aa6498156 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,6 +46,7 @@ tvm_option(DLPACK_PATH "Path to DLPACK" "3rdparty/dlpack/include")
 tvm_option(DMLC_PATH "Path to DMLC" "3rdparty/dmlc-core/include")
 tvm_option(RANG_PATH "Path to RANG" "3rdparty/rang/include")
 tvm_option(COMPILER_RT_PATH "Path to COMPILER-RT" "3rdparty/compiler-rt")
+tvm_option(PICOJSON_PATH "Path to PicoJSON" "3rdparty/picojson")
 
 # Contrib library options
 tvm_option(USE_BLAS "The blas library to be linked" none)
@@ -57,6 +58,7 @@ tvm_option(USE_ROCBLAS "Build with ROCM:RoCBLAS" OFF)
 tvm_option(USE_SORT "Build with sort support" OFF)
 tvm_option(USE_NNPACK "Build with nnpack support" OFF)
 tvm_option(USE_RANDOM "Build with random support" OFF)
+tvm_option(USE_MICRO_STANDALONE_RUNTIME "Build with micro.standalone_runtime support" OFF)
 tvm_option(USE_ANTLR "Build with ANTLR for Relay parsing" OFF)
 
 # include directories
@@ -66,6 +68,7 @@ include_directories(${DLPACK_PATH})
 include_directories(${DMLC_PATH})
 include_directories(${RANG_PATH})
 include_directories(${COMPILER_RT_PATH})
+include_directories(${PICOJSON_PATH})
 
 # initial variables
 set(TVM_LINKER_LIBS "")
@@ -239,6 +242,7 @@ include(cmake/modules/Micro.cmake)
 include(cmake/modules/ANTLR.cmake)
 include(cmake/modules/contrib/BLAS.cmake)
 include(cmake/modules/contrib/Random.cmake)
+include(cmake/modules/contrib/MicroStandaloneRuntime.cmake)
 include(cmake/modules/contrib/Sort.cmake)
 include(cmake/modules/contrib/NNPack.cmake)
 include(cmake/modules/contrib/HybridDump.cmake)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 2a61099dbbf6..d92c2151d9c8 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -91,6 +91,9 @@ set(USE_GRAPH_RUNTIME_DEBUG OFF)
 # Whether enable additional vm profiler functions
 set(USE_VM_PROFILER OFF)
 
+# Whether enable uTVM standalone runtime
+set(USE_MICRO_STANDALONE_RUNTIME ON)
+
 # Whether build with LLVM support
 # Requires LLVM version >= 4.0
 #
diff --git a/cmake/modules/contrib/MicroStandaloneRuntime.cmake b/cmake/modules/contrib/MicroStandaloneRuntime.cmake
new file mode 100644
index 000000000000..221ab327a97c
--- /dev/null
+++ b/cmake/modules/contrib/MicroStandaloneRuntime.cmake
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_MICRO_STANDALONE_RUNTIME)
+  message(STATUS "Build with micro.standalone_runtime")
+  file(GLOB MICRO_STANDALONE_RUNTIME_SRC src/runtime/micro/standalone/*.cc)
+  list(APPEND RUNTIME_SRCS ${MICRO_STANDALONE_RUNTIME_SRC})
+  add_definitions(-DUSE_MICRO_STANDALONE_RUNTIME=1)
+endif(USE_MICRO_STANDALONE_RUNTIME)
diff --git a/include/tvm/runtime/micro/standalone/utvm_runtime.h b/include/tvm/runtime/micro/standalone/utvm_runtime.h
new file mode 100644
index 000000000000..ef6cd4023dba
--- /dev/null
+++ b/include/tvm/runtime/micro/standalone/utvm_runtime.h
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_RUNTIME_H_
+#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_RUNTIME_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define TVM_MICRO_RUNTIME_API_API extern "C" __attribute__((visibility("default")))
+
+TVM_MICRO_RUNTIME_API_API void* UTVMRuntimeCreate(const char* json, size_t json_len, void* module);
+
+TVM_MICRO_RUNTIME_API_API void UTVMRuntimeDestroy(void* handle);
+
+TVM_MICRO_RUNTIME_API_API void UTVMRuntimeSetInput(void* handle, int index, void* tensor);
+
+TVM_MICRO_RUNTIME_API_API void UTVMRuntimeRun(void* handle);
+
+TVM_MICRO_RUNTIME_API_API void UTVMRuntimeGetOutput(void* handle, int index, void* tensor);
+
+TVM_MICRO_RUNTIME_API_API void* UTVMRuntimeDSOModuleCreate(const char* so, size_t so_len);
+
+TVM_MICRO_RUNTIME_API_API void UTVMRuntimeDSOModuleDestroy(void* module);
+
+#undef TVM_MICRO_RUNTIME_API_API
+
+#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_RUNTIME_H_
diff --git a/src/runtime/micro/standalone/README.md b/src/runtime/micro/standalone/README.md
new file mode 100644
index 000000000000..60614270b008
--- /dev/null
+++ b/src/runtime/micro/standalone/README.md
@@ -0,0 +1,18 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+## A replacement implementation of the TVM runtime, focused on a minimal subset of the overall runtime.
diff --git a/src/runtime/micro/standalone/minimal_vector.h b/src/runtime/micro/standalone/minimal_vector.h
new file mode 100644
index 000000000000..4d04e526329f
--- /dev/null
+++ b/src/runtime/micro/standalone/minimal_vector.h
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_MICRO_STANDALONE_MINIMAL_VECTOR_H_
+#define TVM_RUNTIME_MICRO_STANDALONE_MINIMAL_VECTOR_H_
+
+#include <algorithm>
+#include <cassert>
+#include <memory>
+
+namespace tvm {
+namespace micro {
+
+
+// A minimal wrapper, derived from https://github.com/Robbepop/dynarray/, that
+// supports a minimal subset of the std::vector API with a minimized code size.
+template <typename T>
+struct DynArray {
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = std::ptrdiff_t;
+  using reference = value_type&;
+  using const_reference = value_type const&;
+  using pointer = value_type*;
+  using const_pointer = value_type const*;
+  using iterator = pointer;
+  using const_iterator = const_pointer;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+  explicit DynArray(size_type size = 0) { resize(size); }
+
+  DynArray(const DynArray& other) {
+    resize(other.size());
+    std::copy(other.begin(), other.end(), begin());
+  }
+
+  DynArray& operator=(const DynArray& other) {
+    resize(other.size());
+    std::copy(other.begin(), other.end(), begin());
+    return *this;
+  }
+
+  void resize(size_type size) {
+    if (size > 0) {
+      data_.reset(new T[size]);
+    } else {
+      data_.reset();
+    }
+    size_ = size;
+  }
+
+  size_type size() const { return size_; }
+
+  reference operator[](size_type pos) { return data_[pos]; }
+
+  const_reference operator[](size_type pos) const { return data_[pos]; }
+
+  pointer data() { return data_.get(); }
+
+  const_pointer data() const { return data_.get(); }
+
+  iterator begin() { return data_.get(); }
+
+  const_iterator begin() const { return data_.get(); }
+
+  const_iterator cbegin() const { return data_.get(); }
+
+  iterator end() { return data_.get() + size_; }
+
+  const_iterator end() const { return data_.get() + size_; }
+
+  const_iterator cend() const { return data_.get() + size_; }
+
+  reference front() { return data_[0]; }
+
+  const_reference front() const { return data_[0]; }
+
+  reference back() { return data_[size_ - 1]; }
+
+  const_reference back() const { return data_[size_ - 1]; }
+
+ private:
+  std::unique_ptr<T[]> data_;
+  size_type size_;
+};
+
+}  // namespace micro
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_MICRO_STANDALONE_MINIMAL_VECTOR_H_
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.cc b/src/runtime/micro/standalone/utvm_graph_runtime.cc
new file mode 100644
index 000000000000..edfb1ffd7144
--- /dev/null
+++ b/src/runtime/micro/standalone/utvm_graph_runtime.cc
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "utvm_graph_runtime.h"
+
+#include <dlfcn.h>
+#include <cassert>
+#include <string>
+#include "picojson.h"
+
+namespace tvm {
+namespace micro {
+namespace {
+
+int TVMSToI(const std::string& str) {
+  // For platforms (e.g. older NDK versions) where std::stoi(...) is not available.
+  char* end;
+  return std::strtol(str.c_str(), &end, 10);
+}
+
+void ParseOutputs(const picojson::array& joutputs, DynArray<NodeEntry>* outputs) {
+  outputs->resize(joutputs.size());
+  for (size_t i = 0; i < joutputs.size(); ++i) {
+    const auto& joutput_i = joutputs[i].get<picojson::array>();
+    (*outputs)[i] = NodeEntry{static_cast<uint32_t>(joutput_i[0].get<double>()),
+                              static_cast<uint32_t>(joutput_i[1].get<double>()),
+                              static_cast<uint32_t>(joutput_i[2].get<double>())};
+  }
+}
+
+void ParseAttrs(const picojson::object& jattr, GraphAttr* attr) {
+  // parse dltype
+  for (const auto& jdltype_ : jattr.at("dltype").get<picojson::array>()) {
+    if (jdltype_.is<std::string>()) {
+      continue;
+    }
+    const auto& jdltype = jdltype_.get<picojson::array>();
+
+    attr->dltype.resize(jdltype.size());
+    for (size_t i = 0; i < jdltype.size(); ++i) {
+      attr->dltype[i] = jdltype[i].get<std::string>();
+    }
+  }
+  for (const auto& jstorage_id_ : jattr.at("storage_id").get<picojson::array>()) {
+    if (jstorage_id_.is<std::string>()) {
+      continue;
+    }
+    const auto& jstorage_id = jstorage_id_.get<picojson::array>();
+
+    attr->storage_id.resize(jstorage_id.size());
+    for (size_t i = 0; i < jstorage_id.size(); ++i) {
+      attr->storage_id[i] = static_cast<int>(jstorage_id[i].get<double>());
+    }
+  }
+  for (const auto& jshape_ : jattr.at("shape").get<picojson::array>()) {
+    if (jshape_.is<std::string>()) {
+      continue;
+    }
+    const auto& jshape = jshape_.get<picojson::array>();
+    attr->shape.resize(jshape.size());
+    for (size_t i = 0; i < jshape.size(); ++i) {
+      const auto& jshape_i = jshape[i].get<picojson::array>();
+      attr->shape[i].resize(jshape_i.size());
+      for (size_t j = 0; j < jshape_i.size(); ++j) {
+        attr->shape[i][j] = static_cast<int64_t>(jshape_i[j].get<double>());
+      }
+    }
+  }
+}
+
+void ParseNodes(const picojson::array& jnodes, DynArray<Node>* nodes) {
+  nodes->resize(jnodes.size());
+  for (size_t i = 0; i < nodes->size(); ++i) {
+    auto* n = &(*nodes)[i];
+    const auto& jn = jnodes[i].get<picojson::object>();
+    n->op_type = jn.at("op").get<std::string>();
+    n->name = jn.at("name").get<std::string>();
+    const auto jinputs = jn.at("inputs").get<picojson::array>();
+    n->inputs.resize(jinputs.size());
+    for (size_t i = 0; i < jinputs.size(); ++i) {
+      const auto& jinput_i = jinputs[i].get<picojson::array>();
+      n->inputs[i] = NodeEntry{static_cast<uint32_t>(jinput_i[0].get<double>()),
+                               static_cast<uint32_t>(jinput_i[1].get<double>()),
+                               static_cast<uint32_t>(jinput_i[2].get<double>())};
+    }
+    const auto& jattrs_ = jn.find("attrs");
+    if (jattrs_ != jn.end()) {
+      const auto& jattrs = jattrs_->second.get<picojson::object>();
+      n->param.func_name = jattrs.at("func_name").get<std::string>();
+      n->param.num_inputs = TVMSToI(jattrs.at("num_inputs").get<std::string>());
+      n->param.num_outputs = TVMSToI(jattrs.at("num_outputs").get<std::string>());
+      n->param.flatten_data = TVMSToI(jattrs.at("flatten_data").get<std::string>());
+    }
+  }
+}
+
+void ParseArgNodes(const picojson::array& jinput_nodes, DynArray<uint32_t>* input_nodes) {
+  input_nodes->resize(jinput_nodes.size());
+  for (size_t i = 0; i < jinput_nodes.size(); ++i) {
+    (*input_nodes)[i] = static_cast<uint32_t>(jinput_nodes[i].get<double>());
+  }
+}
+}  // namespace
+
+NDArray::~NDArray() {}
+
+NDArray NDArray::Empty(const DynArray<int64_t>& shape, DLDataType dtype, DLContext ctx) {
+  NDArray r;
+  int64_t nbytes = (dtype.bits * dtype.lanes + 7) / 8;
+  for (const auto& s : shape) {
+    nbytes *= s;
+  }
+
+  r.storage_ = std::shared_ptr<void>(
+      TVMBackendAllocWorkspace(static_cast<int>(ctx.device_type), static_cast<int>(ctx.device_id),
+                               nbytes, dtype.code, dtype.bits),
+      [=](void* ptr) {
+        if (ptr) {
+          TVMBackendFreeWorkspace(ctx.device_type, ctx.device_id, ptr);
+        }
+      });
+  r.shape_ = shape;
+  r.dtype_ = dtype;
+  r.ctx_ = ctx;
+  return r;
+}
+
+NDArray NDArray::CreateView(const DynArray<int64_t>& shape, DLDataType dtype) {
+  NDArray r;
+  r.storage_ = storage_;
+  r.shape_ = shape;
+  r.dtype_ = dtype;
+  r.ctx_ = ctx_;
+  return r;
+}
+
+DLTensor NDArray::ToDLTensor() {
+  DLTensor r;
+  r.data = storage_.get();
+  assert(r.data != nullptr);
+  r.ctx = ctx_;
+  r.ndim = shape_.size();
+  r.dtype = dtype_;
+  r.shape = shape_.data();
+  r.strides = nullptr;
+  r.byte_offset = 0;
+  return r;
+}
+
+size_t GetDataSize(const DLTensor& arr) {
+  size_t size = 1;
+  for (size_t i = 0; i < static_cast<size_t>(arr.ndim); ++i) {
+    size *= static_cast<size_t>(arr.shape[i]);
+  }
+  size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8;
+  return size;
+}
+
+void NDArray::CopyFrom(DLTensor* src) {
+  std::memcpy(storage_.get(),
+              reinterpret_cast<const uint8_t*>(src->data) + static_cast<size_t>(src->byte_offset),
+              GetDataSize(*src));
+}
+
+void NDArray::CopyTo(DLTensor* dst) const {
+  std::memcpy(reinterpret_cast<uint8_t*>(dst->data) + static_cast<size_t>(dst->byte_offset),
+              storage_.get(), GetDataSize(*dst));
+}
+
+DSOModule::DSOModule(const std::string& name) {
+  dlerror();
+  lib_handle_ = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
+  assert(!dlerror());
+  assert(lib_handle_ != nullptr);
+
+#define TVM_INIT_CONTEXT_FUNC(FuncName)                                               \
+  if (auto* fp = reinterpret_cast<decltype(&FuncName)*>(GetSymbol("__" #FuncName))) { \
+    *fp = FuncName;                                                                   \
+  }
+  // Initialize the functions
+  TVM_INIT_CONTEXT_FUNC(TVMAPISetLastError);
+  TVM_INIT_CONTEXT_FUNC(TVMBackendAllocWorkspace);
+  TVM_INIT_CONTEXT_FUNC(TVMBackendFreeWorkspace);
+  TVM_INIT_CONTEXT_FUNC(TVMBackendParallelLaunch);
+// TODO(tulloch): implement these functions?
+// TVM_INIT_CONTEXT_FUNC(TVMFuncCall);
+// TVM_INIT_CONTEXT_FUNC(TVMBackendGetFuncFromEnv);
+// TVM_INIT_CONTEXT_FUNC(TVMBackendParallelBarrier);
+#undef TVM_INIT_CONTEXT_FUNC
+}
+
+DSOModule::~DSOModule() {
+  if (lib_handle_) {
+    dlclose(lib_handle_);
+  }
+}
+
+BackendPackedCFunc DSOModule::GetFunction(const std::string& name) const {
+  auto faddr = reinterpret_cast<BackendPackedCFunc>(GetSymbol(name.c_str()));
+  assert(faddr);
+  return faddr;
+}
+
+void* DSOModule::GetSymbol(const char* name) const {
+  dlerror();
+  auto* f = dlsym(lib_handle_, name);
+  assert(!dlerror());
+  return f;
+}
+
+MicroGraphRuntime::MicroGraphRuntime(const std::string& graph_json, DSOModule* module) {
+  assert(module);
+  module_ = module;
+  picojson::value v;
+  picojson::parse(v, graph_json);
+  ParseNodes(v.get<picojson::object>()["nodes"].get<picojson::array>(), &nodes_);
+  ParseArgNodes(v.get<picojson::object>()["arg_nodes"].get<picojson::array>(), &input_nodes_);
+  ParseArgNodes(v.get<picojson::object>()["node_row_ptr"].get<picojson::array>(), &node_row_ptr_);
+  ParseOutputs(v.get<picojson::object>()["heads"].get<picojson::array>(), &outputs_);
+  ParseAttrs(v.get<picojson::object>()["attrs"].get<picojson::object>(), &attrs_);
+  SetupStorage();
+  SetupOpExecs();
+}
+
+MicroGraphRuntime::~MicroGraphRuntime() {}
+
+void MicroGraphRuntime::Run() {
+  for (size_t i = 0; i < op_execs_.size(); ++i) {
+    if (op_execs_[i]) op_execs_[i]();
+  }
+}
+
+void MicroGraphRuntime::SetInput(int index, DLTensor* data_in) {
+  assert(static_cast<size_t>(index) < input_nodes_.size());
+  uint32_t eid = this->entry_id(input_nodes_[index], 0);
+  data_entry_[eid].CopyFrom(data_in);
+}
+
+void MicroGraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
+  assert(static_cast<size_t>(index) < outputs_.size());
+  uint32_t eid = this->entry_id(outputs_[index]);
+  const NDArray& data = data_entry_[eid];
+  data.CopyTo(data_out);
+}
+
+void MicroGraphRuntime::SetupStorage() {
+  // Grab saved optimization plan from graph.
+  DynArray<DLDataType> vtype(attrs_.dltype.size());
+  for (size_t i = 0; i < attrs_.dltype.size(); ++i) {
+    assert(attrs_.dltype[i] == "float32");
+    DLDataType ty;
+    ty.bits = 32;
+    ty.lanes = 1;
+    ty.code = kDLFloat;
+    vtype[i] = ty;
+  }
+
+  // Size and device type of each storage pool entry.
+  std::vector<PoolEntry> pool_entry;
+  // Find the maximum space size.
+  for (size_t i = 0; i < attrs_.shape.size(); ++i) {
+    int storage_id = attrs_.storage_id[i];
+    // Use the fallback device if no device index is available.
+    int device_type = static_cast<int>(ctx_.device_type);
+    size_t size = 1;
+    for (int64_t sz : attrs_.shape[i]) {
+      size *= static_cast<size_t>(sz);
+    }
+    assert(storage_id >= 0);
+    DLDataType t = vtype[i];
+    size_t bits = t.bits * t.lanes;
+    assert(bits % 8U == 0U || bits == 1U);
+    size_t bytes = ((bits + 7U) / 8U) * size;
+
+    uint32_t sid = static_cast<uint32_t>(storage_id);
+    if (sid >= pool_entry.size()) {
+      pool_entry.resize(sid + 1, {0, -1});
+    } else {
+      assert(pool_entry[sid].device_type == -1 || pool_entry[sid].device_type == device_type);
+    }
+    pool_entry[sid].size = std::max(pool_entry[sid].size, bytes);
+    pool_entry[sid].device_type = device_type;
+  }
+
+  // Allocate the space.
+  storage_pool_.resize(pool_entry.size());
+  for (size_t i = 0; i < pool_entry.size(); ++i) {
+    const auto& pit = pool_entry[i];
+    DynArray<int64_t> shape(1);
+    shape[0] = static_cast<int64_t>(pit.size + 3) / 4;
+    storage_pool_[i] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx_);
+  }
+
+  // Assign the pooled entries. A unified memory pool is used to simplify
+  // memory assignment for each node entry. The allocated memory on each device
+  // is mapped to this pool.
+  data_entry_.resize(num_node_entries());
+  for (size_t i = 0; i < data_entry_.size(); ++i) {
+    int storage_id = attrs_.storage_id[i];
+    assert(static_cast<size_t>(storage_id) < storage_pool_.size());
+    data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]);
+  }
+}
+
+std::function<void()> CreateTVMOp(const DSOModule& module, const TVMOpParam& param,
+                                  const DynArray<DLTensor>& args, size_t num_inputs) {
+  typedef union {
+    void* v_handle;
+  } TVMValue;
+  /*typedef*/ enum {
+    kArrayHandle = 7U,
+  } /*TVMTypeCode*/;
+  struct OpArgs {
+    DynArray<DLTensor> args;
+    DynArray<TVMValue> arg_values;
+    DynArray<int> arg_tcodes;
+    DynArray<int64_t> shape_data;
+  };
+
+  std::shared_ptr<OpArgs> arg_ptr = std::make_shared<OpArgs>();
+  arg_ptr->args = args;
+  if (param.flatten_data) {
+    arg_ptr->shape_data.resize(arg_ptr->args.size());
+  }
+  arg_ptr->arg_values.resize(arg_ptr->args.size());
+  arg_ptr->arg_tcodes.resize(arg_ptr->args.size());
+  for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
+    TVMValue v;
+    DLTensor* t = &(arg_ptr->args[i]);
+    v.v_handle = t;
+    arg_ptr->arg_values[i] = v;
+    arg_ptr->arg_tcodes[i] = kArrayHandle;
+    if (param.flatten_data) {
+      arg_ptr->shape_data[i] =
+          std::accumulate(t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>());
+      t->ndim = 1;
+      t->shape = &(arg_ptr->shape_data[i]);
+    }
+  }
+
+  if (param.func_name == "__nop") {
+    return []() {};
+  } else if (param.func_name == "__copy") {
+    assert(false);
+  }
+
+  BackendPackedCFunc pf = module.GetFunction(param.func_name);
+  assert(pf != nullptr);
+
+  auto fexec = [arg_ptr, pf]() {
+    assert(pf);
+    (pf)(arg_ptr->arg_values.data(), arg_ptr->arg_tcodes.data(),
+         static_cast<int>(arg_ptr->arg_values.size()));
+  };
+  return fexec;
+}
+
+void MicroGraphRuntime::SetupOpExecs() {
+  op_execs_.resize(nodes_.size());
+  // setup the array and requirements.
+  for (uint32_t nid = 0; nid < nodes_.size(); ++nid) {
+    const auto& inode = nodes_[nid];
+    if (inode.op_type == "null") continue;
+    DynArray<DLTensor> args(inode.inputs.size() + inode.param.num_outputs);
+    for (size_t i = 0; i < inode.inputs.size(); ++i) {
+      const auto& e = inode.inputs[i];
+      args[i] = data_entry_[this->entry_id(e)].ToDLTensor();
+    }
+    for (size_t index = 0; index < inode.param.num_outputs; ++index) {
+      uint32_t eid = this->entry_id(nid, index);
+      args[index + inode.inputs.size()] = data_entry_[eid].ToDLTensor();
+    }
+    assert(inode.op_type == "tvm_op");
+    op_execs_[nid] = CreateTVMOp(*module_, inode.param, args, inode.inputs.size());
+  }
+}
+
+}  // namespace micro
+}  // namespace tvm
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.h b/src/runtime/micro/standalone/utvm_graph_runtime.h
new file mode 100644
index 000000000000..b479193861bb
--- /dev/null
+++ b/src/runtime/micro/standalone/utvm_graph_runtime.h
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
+#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
+
+#include <dlpack/dlpack.h>
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "minimal_vector.h"
+#include "utvm_runtime_api.h"
+
+namespace tvm {
+namespace micro {
+
+typedef int (*BackendPackedCFunc)(void* args, int* type_codes, int num_args);
+
+// dlopen/dlsym/dlclose abstraction.
+class DSOModule {
+ public:
+  explicit DSOModule(const std::string& name);
+  ~DSOModule();
+  BackendPackedCFunc GetFunction(const std::string& name) const;
+
+ private:
+  void* GetSymbol(const char* name) const;
+  void* lib_handle_{nullptr};
+};
+
+// The graph attribute fields.
+struct GraphAttr {
+  DynArray<int> storage_id;
+  DynArray<std::string> dltype;
+  DynArray<DynArray<int64_t>> shape;
+};
+
+// Memory pool entry.
+struct PoolEntry {
+  size_t size;
+  int device_type;
+};
+
+// Node entry
+struct NodeEntry {
+  uint32_t node_id;
+  uint32_t index;
+  uint32_t version;
+};
+
+// Operator attributes about TVMOp
+struct TVMOpParam {
+  std::string func_name;
+  uint32_t num_inputs;
+  uint32_t num_outputs;
+  uint32_t flatten_data;
+};
+
+// Node
+struct Node {
+  // operator type in string
+  std::string op_type;
+  // name of the op
+  std::string name;
+  // parameters
+  TVMOpParam param;
+  // inputs
+  DynArray<NodeEntry> inputs;
+};
+
+// Minimal NDArray abstraction
+class NDArray {
+ public:
+  // initialize NDArray with shape/dtype/ctx
+  static NDArray Empty(const DynArray<int64_t>& shape, DLDataType dtype, DLContext ctx);
+  // create a view of the NDArray storage, with the given shape/dtype
+  NDArray CreateView(const DynArray<int64_t>& shape, DLDataType dtype);
+  // Copy into the internal storage.
+  void CopyFrom(DLTensor* src);
+  // Copy out of the internal storage
+  void CopyTo(DLTensor* dst) const;
+  // View `this` as a DLTensor
+  DLTensor ToDLTensor();
+  ~NDArray();
+
+ private:
+  // reference-counted storage
+  std::shared_ptr<void> storage_;
+  // tensor shape
+  DynArray<int64_t> shape_;
+  // tensor dtype
+  DLDataType dtype_;
+  // tensor context
+  DLContext ctx_;
+};
+
+// Minimal GraphRuntime implementation
+class MicroGraphRuntime {
+ public:
+  // Construct a GraphRuntime with the given graph and DSOModule.
+  MicroGraphRuntime(const std::string& graph_json, DSOModule* module);
+  ~MicroGraphRuntime();
+  // Run the graph
+  void Run();
+  // Set the input at `index` to a copy of the tensor `data_in`
+  void SetInput(int index, DLTensor* data_in);
+  // Copy the output at `index` into `data_out`
+  void CopyOutputTo(int index, DLTensor* data_out);
+
+ private:
+  void SetupStorage();
+  void SetupOpExecs();
+
+  uint32_t num_node_entries() const { return node_row_ptr_.back(); }
+  uint32_t entry_id(uint32_t nid, uint32_t index) const { return node_row_ptr_[nid] + index; }
+  uint32_t entry_id(const NodeEntry& e) const { return entry_id(e.node_id, e.index); }
+
+  DSOModule* module_;
+
+  // TODO(tulloch): these are essentially unused after construction.
+  // The graph nodes
+  DynArray<Node> nodes_;
+  // The argument noes
+  DynArray<uint32_t> input_nodes_;
+  // Used for quick entry indexing
+  DynArray<uint32_t> node_row_ptr_;
+  // Output entries
+  DynArray<NodeEntry> outputs_;
+  // Additional graph attributes
+  GraphAttr attrs_;
+  // Execution context
+  DLContext ctx_{kDLCPU, 0};
+
+  // Common storage pool
+  DynArray<NDArray> storage_pool_;
+  // Data entry for each node
+  DynArray<NDArray> data_entry_;
+  // Operator for each node
+  DynArray<std::function<void()>> op_execs_;
+};
+
+}  // namespace micro
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
diff --git a/src/runtime/micro/standalone/utvm_runtime.cc b/src/runtime/micro/standalone/utvm_runtime.cc
new file mode 100644
index 000000000000..418443818bf1
--- /dev/null
+++ b/src/runtime/micro/standalone/utvm_runtime.cc
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <cassert>
+
+#include "tvm/runtime/micro/standalone/utvm_runtime.h"
+#include "utvm_graph_runtime.h"
+
+void* UTVMRuntimeCreate(const char* json, size_t json_len, void* module) {
+  return new tvm::micro::MicroGraphRuntime(
+      std::string(json, json + json_len),
+      reinterpret_cast<tvm::micro::DSOModule*>(module));
+}
+
+void UTVMRuntimeDestroy(void* handle) {
+  delete reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle);
+}
+
+void UTVMRuntimeSetInput(void* handle, int index, void* tensor) {
+  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->SetInput(
+      index, reinterpret_cast<DLTensor*>(tensor));
+}
+
+void UTVMRuntimeRun(void* handle) {
+  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->Run();
+}
+
+void UTVMRuntimeGetOutput(void* handle, int index, void* tensor) {
+  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->CopyOutputTo(
+      index, reinterpret_cast<DLTensor*>(tensor));
+}
+void* UTVMRuntimeDSOModuleCreate(const char* so, size_t so_len) {
+  return new tvm::micro::DSOModule(std::string(so, so + so_len));
+}
+
+void UTVMRuntimeDSOModuleDestroy(void* module) {
+  delete reinterpret_cast<tvm::micro::DSOModule*>(module);
+}
diff --git a/src/runtime/micro/standalone/utvm_runtime_api.cc b/src/runtime/micro/standalone/utvm_runtime_api.cc
new file mode 100644
index 000000000000..896ff578da9e
--- /dev/null
+++ b/src/runtime/micro/standalone/utvm_runtime_api.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "utvm_runtime_api.h"
+
+#include <stdlib.h>
+#include <cassert>
+#include <string>
+
+void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, int dtype_code_hint,
+                               int dtype_bits_hint) {
+  void* ptr = nullptr;
+  assert(nbytes > 0);
+#ifdef __ANDROID__
+  ptr = memalign(64, nbytes);
+#else
+  const int ret = posix_memalign(&ptr, 64, nbytes);
+  (void)ret;
+  assert(ret == 0);
+#endif
+  return ptr;
+}
+
+int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
+  free(ptr);
+  return 0;
+}
+
+static thread_local std::string g_last_error;
+void TVMAPISetLastError(const char* msg) { g_last_error = msg; }
+const char* TVMGetLastError(void) { return g_last_error.c_str(); }
+
+int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void* cdata, int num_task) {
+  TVMParallelGroupEnv env;
+  env.num_task = 1;
+  flambda(0, &env, cdata);
+  return 0;
+}
diff --git a/src/runtime/micro/standalone/utvm_runtime_api.h b/src/runtime/micro/standalone/utvm_runtime_api.h
new file mode 100644
index 000000000000..1b87052840d4
--- /dev/null
+++ b/src/runtime/micro/standalone/utvm_runtime_api.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_RUNTIME_API_H_
+#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_RUNTIME_API_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <cassert>
+
+// The subset of the TVM runtime API that is implemented by the minimal runtime API.
+
+#define TVM_MICRO_RUNTIME_API_BACKEND_API extern "C" __attribute__((weak, visibility("default")))
+
+TVM_MICRO_RUNTIME_API_BACKEND_API int TVMBackendFreeWorkspace(int device_type, int device_id,
+                                                              void* ptr);
+
+TVM_MICRO_RUNTIME_API_BACKEND_API void* TVMBackendAllocWorkspace(int device_type, int device_id,
+                                                                 uint64_t nbytes,
+                                                                 int dtype_code_hint,
+                                                                 int dtype_bits_hint);
+
+typedef struct {
+  void* sync_handle;
+  int32_t num_task;
+} TVMParallelGroupEnv;
+
+typedef int (*FTVMParallelLambda)(int task_id, TVMParallelGroupEnv* penv, void* cdata);
+
+TVM_MICRO_RUNTIME_API_BACKEND_API int TVMBackendParallelLaunch(FTVMParallelLambda flambda,
+                                                               void* cdata, int num_task);
+
+TVM_MICRO_RUNTIME_API_BACKEND_API void TVMAPISetLastError(const char* msg);
+TVM_MICRO_RUNTIME_API_BACKEND_API const char* TVMGetLastError(void);
+
+#undef TVM_MICRO_RUNTIME_API_BACKEND_API
+
+#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_RUNTIME_API_H_
diff --git a/tests/cpp/utvm_runtime_standalone_test.cc b/tests/cpp/utvm_runtime_standalone_test.cc
new file mode 100644
index 000000000000..a3720cd4d1a2
--- /dev/null
+++ b/tests/cpp/utvm_runtime_standalone_test.cc
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <random>
+
+#include <dlpack/dlpack.h>
+#include <gtest/gtest.h>
+#include <map>
+#include <vector>
+
+#ifdef USE_MICRO_STANDALONE_RUNTIME
+
+// Use system(..), `gcc -shared -fPIC`, thus restrict the test to OS X for now.
+#if defined(__APPLE__) && defined(__MACH__)
+
+#include <gtest/gtest.h>
+#include <topi/generic/injective.h>
+#include <tvm/build_module.h>
+#include <tvm/operation.h>
+#include <tvm/packed_func_ext.h>
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/transform.h>
+#include <tvm/relay/type.h>
+#include <tvm/runtime/micro/standalone/utvm_runtime.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/packed_func.h>
+#include <tvm/runtime/registry.h>
+
+#include <spawn.h>
+#include <sys/wait.h>
+
+TVM_REGISTER_GLOBAL("test.sch").set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
+  *rv = topi::generic::schedule_injective(args[0], args[1]);
+});
+
+TEST(MicroStandaloneRuntime, BuildModule) {
+  using namespace tvm;
+  auto tensor_type = relay::TensorTypeNode::make({2, 3}, ::tvm::Float(32));
+  auto a = relay::VarNode::make("a", tensor_type);
+  auto b = relay::VarNode::make("b", tensor_type);
+  auto add_op = relay::Op::Get("add");
+  auto x = relay::CallNode::make(add_op, {a, b}, tvm::Attrs(), {});
+  auto c = relay::VarNode::make("c", tensor_type);
+  auto y = relay::CallNode::make(add_op, {x, c}, tvm::Attrs(), {});
+  auto func = relay::FunctionNode::make(relay::FreeVars(y), y, relay::Type(), {});
+  auto A = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
+  auto B = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
+  auto C = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
+
+  auto pA = (float*)A.ToDLPack()->dl_tensor.data;
+  auto pB = (float*)B.ToDLPack()->dl_tensor.data;
+  auto pC = (float*)C.ToDLPack()->dl_tensor.data;
+
+  for (int i = 0; i < 6; ++i) {
+    pA[i] = i;
+    pB[i] = i + 1;
+    pC[i] = i + 2;
+  }
+  // get schedule
+  auto reg = tvm::runtime::Registry::Get("relay.op._Register");
+  auto s_i = tvm::runtime::Registry::Get("test.sch");
+  if (!reg) {
+    LOG(FATAL) << "no _Register";
+  }
+  if (!s_i) {
+    LOG(FATAL) << "no test_sch";
+  }
+  (*reg)("add", "FTVMSchedule", *s_i, 10);
+  // build
+  auto pfb = tvm::runtime::Registry::Get("relay.build_module._BuildModule");
+  tvm::runtime::Module build_mod = (*pfb)();
+  auto build_f = build_mod.GetFunction("build", false);
+  auto json_f = build_mod.GetFunction("get_graph_json", false);
+  auto mod_f = build_mod.GetFunction("get_module", false);
+  Map<tvm::Integer, tvm::Target> targets;
+
+  Target llvm_tgt = Target::Create("llvm");
+  targets.Set(0, llvm_tgt);
+  build_f(func, targets, llvm_tgt);
+  std::string json = json_f();
+  tvm::runtime::Module mod = mod_f();
+  std::string o_fname = std::tmpnam(nullptr);
+  std::string so_fname = std::tmpnam(nullptr);
+  mod->SaveToFile(o_fname, "o");
+  const std::vector<std::string> args = {"gcc", "-shared", "-fPIC", "-o", so_fname, o_fname};
+  std::stringstream s;
+  for (auto& c : args) {
+    s << c << " ";
+  }
+  const auto ss = s.str();
+  const auto ret = system(ss.c_str());
+  ASSERT_EQ(ret, 0);
+  // Now, execute the minimal runtime.
+  auto* dsoModule = UTVMRuntimeDSOModuleCreate(so_fname.c_str(), so_fname.size());
+  ASSERT_NE(dsoModule, nullptr);
+  auto* handle = UTVMRuntimeCreate(json.c_str(), json.size(), dsoModule);
+  ASSERT_NE(handle, nullptr);
+
+  UTVMRuntimeSetInput(handle, 0, &A.ToDLPack()->dl_tensor);
+  UTVMRuntimeSetInput(handle, 1, &B.ToDLPack()->dl_tensor);
+  UTVMRuntimeSetInput(handle, 2, &C.ToDLPack()->dl_tensor);
+  UTVMRuntimeRun(handle);
+  auto Y = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
+  UTVMRuntimeGetOutput(handle, 0, &Y.ToDLPack()->dl_tensor);
+  auto* pY = (float*)Y.ToDLPack()->dl_tensor.data;
+  for (int i = 0; i < 6; ++i) {
+    CHECK_LT(fabs(pY[i] - (i + (i + 1) + (i + 2))), 1e-4);
+  }
+  UTVMRuntimeDestroy(handle);
+  UTVMRuntimeDSOModuleDestroy(dsoModule);
+}
+
+#endif
+#endif
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  testing::FLAGS_gtest_death_test_style = "threadsafe";
+  return RUN_ALL_TESTS();
+}