From a6bc7b993a3e41bd611d8fc70967e0399adba41c Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 21 Jun 2018 17:24:01 +0900 Subject: [PATCH] Add support for multiple OpenCL platforms --- include/tvm/runtime/c_runtime_api.h | 1 + python/tvm/_ffi/runtime_ctypes.py | 3 +- python/tvm/contrib/sdaccel.py | 16 +++-- src/codegen/codegen_opencl.cc | 2 +- src/codegen/codegen_vhls.cc | 20 +++++- src/codegen/opt/build_opencl_off.cc | 4 +- src/pass/verify_memory.cc | 2 +- src/runtime/c_runtime_api.cc | 1 + src/runtime/opencl/opencl_common.h | 16 +++-- src/runtime/opencl/opencl_device_api.cc | 74 +++++++++++---------- src/runtime/opencl/opencl_module.cc | 31 +++++---- src/runtime/opencl/opencl_module.h | 4 +- tests/python/integration/test_ewise_fpga.py | 3 - 13 files changed, 107 insertions(+), 70 deletions(-) diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index c61a4884a1f5..7f3a11d9ddab 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -60,6 +60,7 @@ typedef int64_t tvm_index_t; /*! \brief Extension device types in TVM */ typedef enum { + kDLSDAccel = 6, kDLVulkan = 7, kOpenGL = 11, // Extension DRAM type, used for quickly test extension device diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index d862a5abd497..9609f867576b 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -95,6 +95,7 @@ class TVMContext(ctypes.Structure): 1 : 'cpu', 2 : 'gpu', 4 : 'opencl', + 6 : 'sdaccel', 7 : 'vulkan', 8 : 'metal', 9 : 'vpi', @@ -111,7 +112,7 @@ class TVMContext(ctypes.Structure): 'nvptx': 2, 'cl': 4, 'opencl': 4, - 'sdaccel': 4, + 'sdaccel': 6, 'vulkan': 7, 'metal': 8, 'vpi': 9, diff --git a/python/tvm/contrib/sdaccel.py b/python/tvm/contrib/sdaccel.py index 12dba31a4ab5..ab07304e82e0 100644 --- a/python/tvm/contrib/sdaccel.py +++ b/python/tvm/contrib/sdaccel.py @@ -27,11 +27,12 @@ def _vhls_to_opencl(code): return out +@register_func("tvm_callback_sdaccel_fake_compile") def _fake_compile_vhls(code): """Fake compile Vivado HLS code for SDAccel. - Compile the Vivado HLS code as an OpenCL code, and generate a program - binary for GPU which can be used instead of xclbin. + Compile the Vivado HLS code as an OpenCL code, and generate a program binary + with other platforms. The generated binary can be used for testing instead of xclbin. Parameters ---------- @@ -46,8 +47,11 @@ def _fake_compile_vhls(code): try: import pyopencl as cl except ImportError: - raise ImportError('PyOpenCL is required for testing SDAccel backend.') - ctx = cl.Context(dev_type=cl.device_type.GPU) + raise RuntimeError('PyOpenCL is required for testing SDAccel backend.') + platforms = [pf for pf in cl.get_platforms() if pf.name != "Xilinx"] + if not platforms: + raise RuntimeError("No OpenCL platform is available.") + ctx = cl.Context(properties=[(cl.context_properties.PLATFORM, platforms[0])]) program = cl.Program(ctx, _vhls_to_opencl(code)).build() binary = bytearray(program.binaries[0]) return binary @@ -87,9 +91,7 @@ def compile_vhls(code, kernel): platform = os.environ.get("XCL_PLATFORM", os.environ.get("AWS_PLATFORM")) if platform is None: - # If we don't have the Xilinx toolchain, create a program binary for - # GPU and use it for testing. - return _fake_compile_vhls(code) + raise RuntimeError("No Xlinx device specified.") # build xo args = [xocc, "-c", "-t", target, "--platform", platform, "-o", tmp_xo, "-k", kernel] + \ diff --git a/src/codegen/codegen_opencl.cc b/src/codegen/codegen_opencl.cc index 2d5026e827e2..ccf54dfc89d1 100644 --- a/src/codegen/codegen_opencl.cc +++ b/src/codegen/codegen_opencl.cc @@ -218,7 +218,7 @@ runtime::Module BuildOpenCL(Array funcs) { if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { code = (*f)(code).operator std::string(); } - return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs), code); + return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs), code, {"gpu", "cpu"}); } TVM_REGISTER_API("codegen.build_opencl") diff --git a/src/codegen/codegen_vhls.cc b/src/codegen/codegen_vhls.cc index 3cb8b9a346c2..ffe1f02b17ba 100644 --- a/src/codegen/codegen_vhls.cc +++ b/src/codegen/codegen_vhls.cc @@ -85,13 +85,27 @@ runtime::Module BuildSDAccel(Array funcs) { code = (*f)(code).operator std::string(); } + std::vector device_types = {"accelerator"}; + std::string platform_name = "Xilinx"; std::string xclbin; if (const auto* f = Registry::Get("tvm_callback_sdaccel_compile")) { - xclbin = (*f)(code, funcname).operator std::string(); - } else { + try { + xclbin = (*f)(code, funcname).operator std::string(); + } catch (const dmlc::Error& e) { + LOG(WARNING) << e.what(); + LOG(WARNING) << "Failed to set up SDAccel, falling back to other platforms for testing."; + if (const auto* f = Registry::Get("tvm_callback_sdaccel_fake_compile")) { + xclbin = (*f)(code).operator std::string(); + device_types = {"gpu", "cpu"}; + platform_name = ""; + } + } + } + if (xclbin == "") { LOG(FATAL) << "Cannot compile Vivado HLS code."; } - return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code); + return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code, device_types, + platform_name); } TVM_REGISTER_API("codegen.build_sdaccel") diff --git a/src/codegen/opt/build_opencl_off.cc b/src/codegen/opt/build_opencl_off.cc index fc962d4840e9..0cfccffaa1bb 100644 --- a/src/codegen/opt/build_opencl_off.cc +++ b/src/codegen/opt/build_opencl_off.cc @@ -12,7 +12,9 @@ Module OpenCLModuleCreate( std::string data, std::string fmt, std::unordered_map fmap, - std::string source) { + std::string source, + std::vector device_types, + std::string platform_name) { LOG(WARNING) << "OpenCL runtime not enabled, return a source module..."; return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "opencl"); } diff --git a/src/pass/verify_memory.cc b/src/pass/verify_memory.cc index e928bedf266c..24e94eab571d 100644 --- a/src/pass/verify_memory.cc +++ b/src/pass/verify_memory.cc @@ -139,7 +139,7 @@ class MemoryAccessVerifier final : protected IRVisitor { /// Check if a given DLDeviceType/TVMDeviceExtType value denotes GPU device. static bool IsGPUDevice(int dev_type) { - return kDLGPU == dev_type || kDLOpenCL == dev_type || + return kDLGPU == dev_type || kDLOpenCL == dev_type || kDLSDAccel == dev_type || kDLVulkan == dev_type || kDLMetal == dev_type || kDLROCM == dev_type || kOpenGL == dev_type; } diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index df3a19f306e0..5eb39abcc71a 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -31,6 +31,7 @@ inline std::string DeviceName(int type) { case kDLCPU: return "cpu"; case kDLGPU: return "gpu"; case kDLOpenCL: return "opencl"; + case kDLSDAccel: return "sdaccel"; case kDLVulkan: return "vulkan"; case kDLMetal: return "metal"; case kDLVPI: return "vpi"; diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index e2db24f85769..10fc87d18351 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -104,10 +104,12 @@ class OpenCLWorkspace final : public DeviceAPI { public: // global platform id cl_platform_id platform_id; + // global platform name + std::string platform_name; // global context of this process cl_context context{nullptr}; - // whether the workspace it initialized. - bool initialized_{false}; + // the device type + std::string device_type; // the devices std::vector devices; // the queues @@ -128,11 +130,15 @@ class OpenCLWorkspace final : public DeviceAPI { } } // Initialzie the device. - void Init(); + void Init(const std::vector& device_types, const std::string& platform_name = ""); + // Check whether the context is OpenCL or not. + bool IsOpenCLDevice(TVMContext ctx) { + return ctx.device_type == kDLOpenCL || + ctx.device_type == static_cast(kDLSDAccel); + } // get the queue of the context cl_command_queue GetQueue(TVMContext ctx) { - CHECK_EQ(ctx.device_type, kDLOpenCL); - this->Init(); + CHECK(IsOpenCLDevice(ctx)); CHECK(ctx.device_id >= 0 && static_cast(ctx.device_id) < queues.size()) << "Invalid OpenCL device_id=" << ctx.device_id; return queues[ctx.device_id]; diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index 8bd86d22aee5..ad7c66ddfc0d 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -24,7 +24,6 @@ void OpenCLWorkspace::SetDevice(TVMContext ctx) { void OpenCLWorkspace::GetAttr( TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) { - this->Init(); size_t index = static_cast(ctx.device_id); if (kind == kExist) { *rv = static_cast(index< devices.size()); @@ -99,7 +98,6 @@ void OpenCLWorkspace::GetAttr( void* OpenCLWorkspace::AllocDataSpace( TVMContext ctx, size_t size, size_t alignment, TVMType type_hint) { - this->Init(); CHECK(context != nullptr) << "No OpenCL device"; cl_int err_code; cl_mem mptr = clCreateBuffer( @@ -122,15 +120,14 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from, TVMContext ctx_to, TVMType type_hint, TVMStreamHandle stream) { - this->Init(); CHECK(stream == nullptr); - if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLOpenCL) { + if (IsOpenCLDevice(ctx_from) && IsOpenCLDevice(ctx_to)) { OPENCL_CALL(clEnqueueCopyBuffer( this->GetQueue(ctx_to), static_cast((void*)from), // NOLINT(*) static_cast(to), from_offset, to_offset, size, 0, nullptr, nullptr)); - } else if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLCPU) { + } else if (IsOpenCLDevice(ctx_from) && ctx_to.device_type == kDLCPU) { OPENCL_CALL(clEnqueueReadBuffer( this->GetQueue(ctx_from), static_cast((void*)from), // NOLINT(*) @@ -138,7 +135,7 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from, static_cast(to) + to_offset, 0, nullptr, nullptr)); OPENCL_CALL(clFinish(this->GetQueue(ctx_from))); - } else if (ctx_from.device_type == kDLCPU && ctx_to.device_type == kDLOpenCL) { + } else if (ctx_from.device_type == kDLCPU && IsOpenCLDevice(ctx_to)) { OPENCL_CALL(clEnqueueWriteBuffer( this->GetQueue(ctx_to), static_cast(to), @@ -226,38 +223,40 @@ bool MatchPlatformInfo( return param_value.find(value) != std::string::npos; } -void OpenCLWorkspace::Init() { - if (initialized_) return; +void OpenCLWorkspace::Init(const std::vector& device_types, + const std::string& platform_name) { + if (context != nullptr) return; std::lock_guard lock(this->mu); - if (initialized_) return; - initialized_ = true; if (context != nullptr) return; // matched platforms - std::vector platform_matched = cl::GetPlatformIDs(); - if (platform_matched.size() == 0) { + std::vector platform_ids = cl::GetPlatformIDs(); + if (platform_ids.size() == 0) { LOG(WARNING) << "No OpenCL platform matched given existing options ..."; return; } - if (platform_matched.size() > 1) { - LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... "; - } - this->platform_id = platform_matched[0]; - LOG(INFO) << "Initialize OpenCL platform \'" - << cl::GetPlatformInfo(this->platform_id, CL_PLATFORM_NAME) << '\''; - std::string device_types[] = {"accelerator", "gpu", "cpu"}; - std::vector devices_matched; - for (auto type : device_types) { - devices_matched = cl::GetDeviceIDs(this->platform_id, type); - if (devices_matched.size() > 0) { - break; + this->platform_id = nullptr; + for (auto platform_id : platform_ids) { + if (!MatchPlatformInfo(platform_id, CL_PLATFORM_NAME, platform_name)) { + continue; + } + for (auto device_type : device_types) { + std::vector devices_matched = cl::GetDeviceIDs(platform_id, device_type); + if (devices_matched.size() > 0) { + this->platform_id = platform_id; + this->platform_name = cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME); + this->device_type = device_type; + this->devices = devices_matched; + LOG(INFO) << "Initialize OpenCL platform \'" << this->platform_name << '\''; + break; + } + LOG(INFO) << "\'" << cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME) + << "\' platform has no OpenCL device: " << device_type << " mode"; } - LOG(INFO) << "No OpenCL device any device matched given the options: " << type << " mode"; } - if (devices_matched.size() == 0) { + if (this->platform_id == nullptr) { LOG(WARNING) << "No OpenCL device"; return; } - this->devices = devices_matched; cl_int err_code; this->context = clCreateContext( nullptr, this->devices.size(), &(this->devices[0]), @@ -275,15 +274,22 @@ void OpenCLWorkspace::Init() { } } -bool InitOpenCL(TVMArgs args, TVMRetValue* rv) { - cl::OpenCLWorkspace::Global()->Init(); - return true; -} - TVM_REGISTER_GLOBAL("device_api.opencl") .set_body([](TVMArgs args, TVMRetValue* rv) { - DeviceAPI* ptr = OpenCLWorkspace::Global().get(); - *rv = static_cast(ptr); + OpenCLWorkspace* w = OpenCLWorkspace::Global().get(); + w->Init({"gpu", "cpu"}); + *rv = static_cast(w); + }); + +TVM_REGISTER_GLOBAL("device_api.sdaccel") +.set_body([](TVMArgs args, TVMRetValue* rv) { + OpenCLWorkspace* w = OpenCLWorkspace::Global().get(); + w->Init({"accelerator"}, "Xilinx"); + if (w->context == nullptr) { + LOG(WARNING) << "Failed to set up SDAccel, falling back to other platforms for testing."; + w->Init({"gpu", "cpu"}); + } + *rv = static_cast(w); }); } // namespace cl diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc index 5d67ad286875..d1c4eb32553e 100644 --- a/src/runtime/opencl/opencl_module.cc +++ b/src/runtime/opencl/opencl_module.cc @@ -87,9 +87,9 @@ class OpenCLModuleNode : public ModuleNode { } // Initialize the programs - void Init() { + void Init(const std::vector& device_types, const std::string& platform_name) { workspace_ = cl::OpenCLWorkspace::Global(); - workspace_->Init(); + workspace_->Init(device_types, platform_name); CHECK(workspace_->context != nullptr) << "No OpenCL device"; if (fmt_ == "cl") { const char* s = data_.c_str(); @@ -283,26 +283,31 @@ Module OpenCLModuleCreate( std::string data, std::string fmt, std::unordered_map fmap, - std::string source) { + std::string source, + std::vector device_types, + std::string platform_name) { std::shared_ptr n = std::make_shared(data, fmt, fmap, source); - n->Init(); + n->Init(device_types, platform_name); return Module(n); } // Load module from module. Module OpenCLModuleLoadFile(const std::string& file_name, - const std::string& format) { + const std::string& format, + const std::vector& device_types, + const std::string& platform_name = "") { std::string data; std::unordered_map fmap; std::string fmt = GetFileFormat(file_name, format); std::string meta_file = GetMetaFilePath(file_name); LoadBinaryFromFile(file_name, &data); LoadMetaDataFromFile(meta_file, &fmap); - return OpenCLModuleCreate(data, fmt, fmap, std::string()); + return OpenCLModuleCreate(data, fmt, fmap, std::string(), device_types, platform_name); } -Module OpenCLModuleLoadBinary(void* strm) { +Module OpenCLModuleLoadBinary(void* strm, const std::vector& device_types, + const std::string& platform_name = "") { dmlc::Stream* stream = static_cast(strm); std::string data; std::unordered_map fmap; @@ -310,32 +315,32 @@ Module OpenCLModuleLoadBinary(void* strm) { stream->Read(&fmt); stream->Read(&fmap); stream->Read(&data); - return OpenCLModuleCreate(data, fmt, fmap, std::string()); + return OpenCLModuleCreate(data, fmt, fmap, std::string(), device_types, platform_name); } TVM_REGISTER_GLOBAL("module.loadfile_cl") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = OpenCLModuleLoadFile(args[0], args[1]); + *rv = OpenCLModuleLoadFile(args[0], args[1], {"gpu", "cpu"}); }); TVM_REGISTER_GLOBAL("module.loadfile_clbin") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = OpenCLModuleLoadFile(args[0], args[1]); + *rv = OpenCLModuleLoadFile(args[0], args[1], {"gpu", "cpu"}); }); TVM_REGISTER_GLOBAL("module.loadfile_xclbin") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = OpenCLModuleLoadFile(args[0], args[1]); + *rv = OpenCLModuleLoadFile(args[0], args[1], {"accelerator"}, "Xilinx"); }); TVM_REGISTER_GLOBAL("module.loadfile_awsxclbin") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = OpenCLModuleLoadFile(args[0], args[1]); + *rv = OpenCLModuleLoadFile(args[0], args[1], {"accelerator"}, "Xilinx"); }); TVM_REGISTER_GLOBAL("module.loadbinary_opencl") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = OpenCLModuleLoadBinary(args[0]); + *rv = OpenCLModuleLoadBinary(args[0], {"gpu", "cpu"}); }); } // namespace runtime } // namespace tvm diff --git a/src/runtime/opencl/opencl_module.h b/src/runtime/opencl/opencl_module.h index b6e2a1e0e88a..a6ad67afe4db 100644 --- a/src/runtime/opencl/opencl_module.h +++ b/src/runtime/opencl/opencl_module.h @@ -25,7 +25,9 @@ Module OpenCLModuleCreate( std::string data, std::string fmt, std::unordered_map fmap, - std::string source); + std::string source, + std::vector device_types, + std::string platform_name = ""); } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_OPENCL_OPENCL_MODULE_H_ diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py index fb7ca807340d..34f336c92775 100644 --- a/tests/python/integration/test_ewise_fpga.py +++ b/tests/python/integration/test_ewise_fpga.py @@ -1,8 +1,5 @@ import tvm import numpy as np -import os - -os.environ["XCL_EMULATION_MODE"] = "1" @tvm.register_func def tvm_callback_vhls_postproc(code):