Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RUNTIME][uTVM] AutoTVM + uTVM for Cortex-M7 #5417

Merged
merged 11 commits into from
Apr 30, 2020
Merged
2 changes: 1 addition & 1 deletion 3rdparty/dmlc-core
Submodule dmlc-core updated 54 files
+0 −38 .github/workflows/githubci.yml
+1 −0 .gitignore
+82 −0 .travis.yml
+31 −51 CMakeLists.txt
+13 −201 LICENSE
+1 −1 README.md
+6 −19 appveyor.yml
+0 −13 cmake/Modules/FindASan.cmake
+0 −13 cmake/Modules/FindLSan.cmake
+0 −13 cmake/Modules/FindTSan.cmake
+0 −13 cmake/Modules/FindUBSan.cmake
+0 −63 cmake/Sanitizer.cmake
+1 −4 cmake/build_config.h.in
+1 −1 cmake/gtest_cmake.in
+16 −1 doc/Doxyfile
+1 −16 include/dmlc/base.h
+1 −4 include/dmlc/build_config_default.h
+0 −4 include/dmlc/concurrency.h
+18 −18 include/dmlc/concurrentqueue.h
+2 −3 include/dmlc/json.h
+3 −20 include/dmlc/logging.h
+1 −1 include/dmlc/omp.h
+0 −10 include/dmlc/optional.h
+23 −106 include/dmlc/parameter.h
+3 −1 include/dmlc/thread_group.h
+2 −4 include/dmlc/thread_local.h
+46 −74 include/dmlc/threadediter.h
+2 −0 make/dmlc.mk
+2 −2 scripts/lint.py
+19 −12 scripts/packages.mk
+32 −0 scripts/setup_nvcc.sh
+0 −65 scripts/test_script.sh
+0 −0 scripts/travis/s390x/Dockerfile
+0 −0 scripts/travis/s390x/build_via_cmake.sh
+1 −1 scripts/travis/s390x/ci_build.sh
+0 −0 scripts/travis/s390x/entrypoint.sh
+3 −0 scripts/travis/travis_before_cache.sh
+9 −0 scripts/travis/travis_osx_install.sh
+57 −0 scripts/travis/travis_script.sh
+40 −0 scripts/travis/travis_setup_env.sh
+16 −0 src/build_config.cc
+3 −7 src/data/csv_parser.h
+1 −1 test/logging_test.cc
+0 −4 test/unittest/CMakeLists.txt
+1 −2 test/unittest/unittest_env.cc
+0 −30 test/unittest/unittest_param.cc
+56 −80 test/unittest/unittest_parser.cc
+1 −0 test/unittest/unittest_thread_group.cc
+2 −2 test/unittest/unittest_threaditer.cc
+15 −19 test/unittest/unittest_threaditer_exc_handling.cc
+0 −4 tracker/dmlc_tracker/launcher.py
+0 −7 tracker/dmlc_tracker/ssh.py
+0 −13 tracker/dmlc_tracker/util.py
+2 −4 tracker/dmlc_tracker/yarn.py
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,15 @@ include(cmake/modules/contrib/TFLite.cmake)
include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
include(cmake/modules/contrib/CoreML.cmake)

include(CheckCXXCompilerFlag)
if(NOT MSVC)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
message(STATUS "Build with c++14")
set(CMAKE_CXX_FLAGS "-std=c++14 ${CMAKE_CXX_FLAGS}")
set(CMAKE_CUDA_STANDARD 14)
else()
check_cxx_compiler_flag("/std:c++14" SUPPORT_CXX14)
set(CMAKE_CXX_FLAGS "/std:c++14 ${CMAKE_CXX_FLAGS}")
set(CMAKE_CUDA_STANDARD 14)
endif()

add_library(tvm SHARED ${COMPILER_SRCS} ${RUNTIME_SRCS})
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc
cpplint:
python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include;
python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src \
python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp \
include src \
examples/extension/src examples/graph_executor/src

pylint:
Expand Down
2 changes: 2 additions & 0 deletions include/tvm/tir/stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,8 @@ constexpr const char* loop_scope = "loop_scope";
constexpr const char* reduce_scope = "reduce_scope";
/*! \brief Mark region is guarded by the pragma extension */
constexpr const char* pragma_scope_prefix = "pragma_";
/*! \brief Import C source or file into the final code gen module */
constexpr const char* pragma_import_c = "pragma_import_c";
/*! \brief Import llvm source or file into the final code gen module */
constexpr const char* pragma_import_llvm = "pragma_import_llvm";
/*! \brief Try to modify the AST to support Tensor Core */
Expand Down
1 change: 1 addition & 0 deletions python/tvm/autotvm/measure/local_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def submit(self, func, *args, **kwargs):
if not self.do_fork:
return LocalFutureNoFork(func(*args, **kwargs))

# TODO why they choose a queue size of 2? add a comment
tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved
queue = Queue(2)
process = Process(target=call_with_timeout,
args=(queue, self.timeout, func, args, kwargs))
Expand Down
7 changes: 5 additions & 2 deletions python/tvm/autotvm/measure/measure_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ def get_build_kwargs(self):

if 'cuda' in self.task.target.keys:
kwargs["cuda_arch"] = "sm_" + "".join(ctx.compute_version.split('.'))
if self.task.target.device_name == 'micro_dev':
kwargs.setdefault('build_option', {})['disable_vectorize'] = True

return kwargs

Expand Down Expand Up @@ -273,8 +275,9 @@ def run(self, measure_inputs, build_results):
if isinstance(res, Exception): # executor error or timeout
results.append(MeasureResult((str(res),), MeasureErrorNo.RUN_TIMEOUT,
self.timeout, time.time()))
else:
results.append(res)
raise Exception(f'encountered exception during measurement: {results}')

results.append(res)

return results

Expand Down
1 change: 1 addition & 0 deletions python/tvm/autotvm/task/relay_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def _lower(mod,
grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
grc.codegen(mod["main"])
return

# default case
# Try graph codegen first to extract autotvm tasks.
# If failed to compile, then fallback to use VM compiler.
Expand Down
6 changes: 3 additions & 3 deletions python/tvm/autotvm/tuner/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ def __del__(self):
def _callback(tuner, inputs, results):
ctx.ct += len(inputs)

flops = 0
flops = float("inf")
tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved
for inp, res in zip(inputs, results):
if res.error_no == 0:
flops = inp.task.flop / np.mean(res.costs)
flops = min(inp.task.flop / np.mean(res.costs), flops)
tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved

if logger.level < logging.DEBUG: # only print progress bar in non-debug mode
if logger.level > logging.DEBUG: # only print progress bar in non-debug mode
ctx.cur_flops = flops
ctx.best_flops = tuner.best_flops

Expand Down
8 changes: 6 additions & 2 deletions python/tvm/autotvm/tuner/ga_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def __init__(self, task, pop_size=100, elite_num=3, mutation_prob=0.1):

# space info
self.space = task.config_space
self.dims = [len(x) for x in self.space.space_map.values()]
self.dim_keys = []
self.dims = []
for k, v in self.space.space_map.items():
self.dim_keys.append(k)
self.dims.append(len(v))

self.visited = set([])

Expand Down Expand Up @@ -123,7 +127,7 @@ def update(self, inputs, results):
if len(self.visited) < len(self.space):
while knob2point(tmp_gene, self.dims) in self.visited:
j = np.random.randint(len(self.dims))
tmp_gene[j] = np.random.randint(self.dims[j])
tmp_gene[j] = np.random.randint(self.dims[j]) # pylint: disable=invalid-sequence-index
next_genes.append(tmp_gene)
self.visited.add(knob2point(tmp_gene, self.dims))
else:
Expand Down
10 changes: 9 additions & 1 deletion python/tvm/autotvm/tuner/tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,15 @@ def tune(self, n_trial, measure_option, early_stopping=None, callbacks=(), si_pr
i + k + 1, si_prefix, format_si_prefix(flops, si_prefix),
format_si_prefix(self.best_flops, si_prefix), res, config)

i += len(results)
num_successes = 0
for result in results:
if isinstance(result.costs[0], float):
num_successes += 1
if num_successes != len(results):
logger.debug('not counting %d failures towards trial count',
len(results) - num_successes)
i += num_successes

tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved
self.ttl = min(early_stopping + self.best_iter, n_trial) - i

self.update(inputs, results)
Expand Down
29 changes: 26 additions & 3 deletions python/tvm/contrib/binutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import tvm._ffi
from . import util

# TODO does this file still belong in `contrib`. is it too µTVM-specific?

# TODO shouldn't need so many `ALIGN` directives
RELOCATION_LD_SCRIPT_TEMPLATE = """
/* linker symbol for use in UTVMInit */
_utvm_stack_pointer_init = 0x{stack_pointer_init:x};
Expand Down Expand Up @@ -118,7 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
size of the section in bytes
"""
if not os.path.isfile(binary_path):
raise RuntimeError("no such file \"{}\"".format(binary_path))
raise RuntimeError('no such file "{}"'.format(binary_path))
# We use the "-A" flag here to get the ".rodata" section's size, which is
# not included by default.
size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path])
Expand Down Expand Up @@ -160,6 +162,10 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
# padding for most cases, but symbols can be arbitrarily large, so this
# isn't bulletproof.
return section_size + 32

# NOTE: in the past, section_size has been wrong on x86. it may be
# inconsistent. TODO: maybe stop relying on `*size` to give us the size and
# instead read the section with `*objcopy` and count the bytes.
return section_size


Expand Down Expand Up @@ -206,11 +212,13 @@ def tvm_callback_relocate_binary(
rel_bin : bytearray
the relocated binary
"""
assert text_start < rodata_start < data_start < bss_start < stack_end
stack_pointer_init = stack_end - word_size
ld_script_contents = ""
# TODO(weberlo): There should be a better way to configure this for different archs.
# TODO is this line even necessary?
if "riscv" in toolchain_prefix:
ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n"
ld_script_contents += 'OUTPUT_ARCH( "riscv" )\n\n'
ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format(
word_size=word_size,
text_start=text_start,
Expand All @@ -221,16 +229,31 @@ def tvm_callback_relocate_binary(

tmp_dir = util.tempdir()
rel_obj_path = tmp_dir.relpath("relocated.obj")
rel_ld_script_path = tmp_dir.relpath("relocated.lds")
rel_ld_script_path = tmp_dir.relpath("relocate.lds")
with open(rel_ld_script_path, "w") as f:
f.write(ld_script_contents)
run_cmd([
"{}ld".format(toolchain_prefix),
binary_path,
"-T", rel_ld_script_path,
"-o", rel_obj_path])

with open(rel_obj_path, "rb") as f:
rel_bin = bytearray(f.read())

gdb_init_dir = os.environ.get("MICRO_GDB_INIT_DIR")
if gdb_init_dir is not None:
gdb_init_path = f"{gdb_init_dir}/.gdbinit"
with open(gdb_init_path, "r") as f:
gdbinit_contents = f.read().split("\n")
new_contents = []
for line in gdbinit_contents:
new_contents.append(line)
if line.startswith("target"):
new_contents.append(f"add-symbol-file {rel_obj_path}")
with open(gdb_init_path, "w") as f:
f.write("\n".join(new_contents))

return rel_bin


Expand Down
42 changes: 25 additions & 17 deletions python/tvm/exec/rpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import argparse
import ast
import json
import multiprocessing
import sys
import logging
Expand All @@ -41,7 +42,7 @@ def main(args):
tracker_addr = (url, port)
if not args.key:
raise RuntimeError(
"Need key to present type of resource when tracker is available")
'Need key to present type of resource when tracker is available')
else:
tracker_addr = None

Expand Down Expand Up @@ -75,8 +76,8 @@ def init_utvm(args):
dev_config = json.load(dev_conf_file)
else:
dev_config_args = ast.literal_eval(args.utvm_dev_config_args)
default_config_func = micro.device.get_device_funcs(args.utvm_dev_id)['default_config']
dev_config = default_config_func(*dev_config_args)
generate_config_func = micro.device.get_device_funcs(args.utvm_dev_id)['generate_config']
dev_config = generate_config_func(*dev_config_args)

if args.utvm_dev_config or args.utvm_dev_id:
# add MicroTVM overrides
Expand All @@ -100,27 +101,34 @@ def server_shutdown():
parser.add_argument('--port-end', type=int, default=9199,
help='The end search port of the RPC')
parser.add_argument('--tracker', type=str,
help="The address of RPC tracker in host:port format. "
"e.g. (10.77.1.234:9190)")
help=('The address of RPC tracker in host:port format. '
'e.g. (10.77.1.234:9190)'))
parser.add_argument('--key', type=str, default="",
help="The key used to identify the device type in tracker.")
help='The key used to identify the device type in tracker.')
parser.add_argument('--silent', action='store_true',
help="Whether run in silent mode.")
help='Whether run in silent mode.')
parser.add_argument('--load-library', type=str,
help="Additional library to load")
help='Additional library to load')
tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument('--no-fork', dest='fork', action='store_false',
help="Use spawn mode to avoid fork. This option \
is able to avoid potential fork problems with Metal, OpenCL \
and ROCM compilers.")
help=('Use spawn mode to avoid fork. This option '
'is able to avoid potential fork problems with Metal, OpenCL '
'and ROCM compilers.'))
parser.add_argument('--custom-addr', type=str,
help="Custom IP Address to Report to RPC Tracker")
help='Custom IP Address to Report to RPC Tracker')
parser.add_argument('--utvm-dev-config', type=str,
help='JSON config file for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-id', type=str,
help='Unique ID for the target device (if using MicroTVM)')
help=('JSON config file for the target device (if using MicroTVM). '
'This file should contain serialized output similar to that returned '
"from the device module's generate_config. Can't be specified when "
'--utvm-dev-config-args is specified.'))
parser.add_argument('--utvm-dev-config-args', type=str,
help=('Python list of literals required to generate a default'
' MicroTVM config (if --utvm-dev-id is specified)'))
help=("Arguments to the device module's generate_config function. "
'Must be a python literal parseable by literal_eval. If specified, '
"the device configuration is generated using the device module's "
"generate_config. Can't be specified when --utvm-dev-config is "
"specified."))
parser.add_argument('--utvm-dev-id', type=str,
help=('Unique ID for the target device (if using MicroTVM). Should '
'match the name of a module underneath tvm.micro.device).'))

parser.set_defaults(fork=True)
args = parser.parse_args()
Expand Down
5 changes: 3 additions & 2 deletions python/tvm/micro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""MicroTVM module for bare-metal backends"""

from ..contrib import binutil
from .base import Session, create_micro_mod, cross_compiler
from .base import LibType, get_micro_host_driven_dir, get_micro_device_dir
from .base import DEVICE_SECTIONS
from .base import Session, create_micro_mod, cross_compiler, LibType
from .base import get_micro_host_driven_dir, get_micro_device_dir
from . import device
Loading