diff --git a/include/tvm/runtime/vm.h b/include/tvm/runtime/vm.h index 7d2df0b285b1..ee973cb62092 100644 --- a/include/tvm/runtime/vm.h +++ b/include/tvm/runtime/vm.h @@ -747,6 +747,12 @@ class VirtualMachine : public runtime::ModuleNode { /*! \brief The parameter name to data mapping. */ std::unordered_map params_; + + /*! + * \brief The constant pool for runtime. It caches the device dependent + * object to avoid rellocation of constants during inference. + */ + std::vector const_pool_; }; } // namespace vm diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index fd5ff64d5812..ab0e06208de9 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -795,9 +795,18 @@ void VirtualMachine::RunLoop() { } case Opcode::LoadConst: { auto constant_obj = exec->constants[instr.const_index]; - // TODO(wweic) ctx could be obtained from the ctxs list. - auto device_obj = CopyTo(constant_obj, ctxs[0]); - WriteRegister(instr.dst, device_obj); + // We cache the allocated object in the constant pool. To measure, the + // first iteration will set the pool up. The other iterations will + // directly reuse the allocated objects. + if (const_pool_.size() <= static_cast(instr.const_index)) { + const_pool_.resize(instr.const_index + 1); + } + + if (!const_pool_[instr.const_index].defined()) { + // TODO(wweic) ctx could be obtained from the ctxs list. + const_pool_[instr.const_index] = CopyTo(constant_obj, ctxs[0]); + } + WriteRegister(instr.dst, const_pool_[instr.const_index]); pc++; goto main_loop; }