Skip to content

Commit

Permalink
Add support for multiple OpenCL platforms
Browse files Browse the repository at this point in the history
  • Loading branch information
kazum committed Jun 27, 2018
1 parent d770412 commit a6bc7b9
Show file tree
Hide file tree
Showing 13 changed files with 107 additions and 70 deletions.
1 change: 1 addition & 0 deletions include/tvm/runtime/c_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ typedef int64_t tvm_index_t;

/*! \brief Extension device types in TVM */
typedef enum {
kDLSDAccel = 6,
kDLVulkan = 7,
kOpenGL = 11,
// Extension DRAM type, used for quickly test extension device
Expand Down
3 changes: 2 additions & 1 deletion python/tvm/_ffi/runtime_ctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class TVMContext(ctypes.Structure):
1 : 'cpu',
2 : 'gpu',
4 : 'opencl',
6 : 'sdaccel',
7 : 'vulkan',
8 : 'metal',
9 : 'vpi',
Expand All @@ -111,7 +112,7 @@ class TVMContext(ctypes.Structure):
'nvptx': 2,
'cl': 4,
'opencl': 4,
'sdaccel': 4,
'sdaccel': 6,
'vulkan': 7,
'metal': 8,
'vpi': 9,
Expand Down
16 changes: 9 additions & 7 deletions python/tvm/contrib/sdaccel.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ def _vhls_to_opencl(code):
return out


@register_func("tvm_callback_sdaccel_fake_compile")
def _fake_compile_vhls(code):
"""Fake compile Vivado HLS code for SDAccel.
Compile the Vivado HLS code as an OpenCL code, and generate a program
binary for GPU which can be used instead of xclbin.
Compile the Vivado HLS code as an OpenCL code, and generate a program binary
with other platforms. The generated binary can be used for testing instead of xclbin.
Parameters
----------
Expand All @@ -46,8 +47,11 @@ def _fake_compile_vhls(code):
try:
import pyopencl as cl
except ImportError:
raise ImportError('PyOpenCL is required for testing SDAccel backend.')
ctx = cl.Context(dev_type=cl.device_type.GPU)
raise RuntimeError('PyOpenCL is required for testing SDAccel backend.')
platforms = [pf for pf in cl.get_platforms() if pf.name != "Xilinx"]
if not platforms:
raise RuntimeError("No OpenCL platform is available.")
ctx = cl.Context(properties=[(cl.context_properties.PLATFORM, platforms[0])])
program = cl.Program(ctx, _vhls_to_opencl(code)).build()
binary = bytearray(program.binaries[0])
return binary
Expand Down Expand Up @@ -87,9 +91,7 @@ def compile_vhls(code, kernel):
platform = os.environ.get("XCL_PLATFORM", os.environ.get("AWS_PLATFORM"))

if platform is None:
# If we don't have the Xilinx toolchain, create a program binary for
# GPU and use it for testing.
return _fake_compile_vhls(code)
raise RuntimeError("No Xlinx device specified.")

# build xo
args = [xocc, "-c", "-t", target, "--platform", platform, "-o", tmp_xo, "-k", kernel] + \
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/codegen_opencl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ runtime::Module BuildOpenCL(Array<LoweredFunc> funcs) {
if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) {
code = (*f)(code).operator std::string();
}
return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs), code);
return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs), code, {"gpu", "cpu"});
}

TVM_REGISTER_API("codegen.build_opencl")
Expand Down
20 changes: 17 additions & 3 deletions src/codegen/codegen_vhls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,27 @@ runtime::Module BuildSDAccel(Array<LoweredFunc> funcs) {
code = (*f)(code).operator std::string();
}

std::vector<std::string> device_types = {"accelerator"};
std::string platform_name = "Xilinx";
std::string xclbin;
if (const auto* f = Registry::Get("tvm_callback_sdaccel_compile")) {
xclbin = (*f)(code, funcname).operator std::string();
} else {
try {
xclbin = (*f)(code, funcname).operator std::string();
} catch (const dmlc::Error& e) {
LOG(WARNING) << e.what();
LOG(WARNING) << "Failed to set up SDAccel, falling back to other platforms for testing.";
if (const auto* f = Registry::Get("tvm_callback_sdaccel_fake_compile")) {
xclbin = (*f)(code).operator std::string();
device_types = {"gpu", "cpu"};
platform_name = "";
}
}
}
if (xclbin == "") {
LOG(FATAL) << "Cannot compile Vivado HLS code.";
}
return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code);
return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code, device_types,
platform_name);
}

TVM_REGISTER_API("codegen.build_sdaccel")
Expand Down
4 changes: 3 additions & 1 deletion src/codegen/opt/build_opencl_off.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ Module OpenCLModuleCreate(
std::string data,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap,
std::string source) {
std::string source,
std::vector<std::string> device_types,
std::string platform_name) {
LOG(WARNING) << "OpenCL runtime not enabled, return a source module...";
return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "opencl");
}
Expand Down
2 changes: 1 addition & 1 deletion src/pass/verify_memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ class MemoryAccessVerifier final : protected IRVisitor {

/// Check if a given DLDeviceType/TVMDeviceExtType value denotes GPU device.
static bool IsGPUDevice(int dev_type) {
return kDLGPU == dev_type || kDLOpenCL == dev_type ||
return kDLGPU == dev_type || kDLOpenCL == dev_type || kDLSDAccel == dev_type ||
kDLVulkan == dev_type || kDLMetal == dev_type ||
kDLROCM == dev_type || kOpenGL == dev_type;
}
Expand Down
1 change: 1 addition & 0 deletions src/runtime/c_runtime_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ inline std::string DeviceName(int type) {
case kDLCPU: return "cpu";
case kDLGPU: return "gpu";
case kDLOpenCL: return "opencl";
case kDLSDAccel: return "sdaccel";
case kDLVulkan: return "vulkan";
case kDLMetal: return "metal";
case kDLVPI: return "vpi";
Expand Down
16 changes: 11 additions & 5 deletions src/runtime/opencl/opencl_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ class OpenCLWorkspace final : public DeviceAPI {
public:
// global platform id
cl_platform_id platform_id;
// global platform name
std::string platform_name;
// global context of this process
cl_context context{nullptr};
// whether the workspace it initialized.
bool initialized_{false};
// the device type
std::string device_type;
// the devices
std::vector<cl_device_id> devices;
// the queues
Expand All @@ -128,11 +130,15 @@ class OpenCLWorkspace final : public DeviceAPI {
}
}
// Initialzie the device.
void Init();
void Init(const std::vector<std::string>& device_types, const std::string& platform_name = "");
// Check whether the context is OpenCL or not.
bool IsOpenCLDevice(TVMContext ctx) {
return ctx.device_type == kDLOpenCL ||
ctx.device_type == static_cast<DLDeviceType>(kDLSDAccel);
}
// get the queue of the context
cl_command_queue GetQueue(TVMContext ctx) {
CHECK_EQ(ctx.device_type, kDLOpenCL);
this->Init();
CHECK(IsOpenCLDevice(ctx));
CHECK(ctx.device_id >= 0 && static_cast<size_t>(ctx.device_id) < queues.size())
<< "Invalid OpenCL device_id=" << ctx.device_id;
return queues[ctx.device_id];
Expand Down
74 changes: 40 additions & 34 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ void OpenCLWorkspace::SetDevice(TVMContext ctx) {

void OpenCLWorkspace::GetAttr(
TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) {
this->Init();
size_t index = static_cast<size_t>(ctx.device_id);
if (kind == kExist) {
*rv = static_cast<int>(index< devices.size());
Expand Down Expand Up @@ -99,7 +98,6 @@ void OpenCLWorkspace::GetAttr(

void* OpenCLWorkspace::AllocDataSpace(
TVMContext ctx, size_t size, size_t alignment, TVMType type_hint) {
this->Init();
CHECK(context != nullptr) << "No OpenCL device";
cl_int err_code;
cl_mem mptr = clCreateBuffer(
Expand All @@ -122,23 +120,22 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from,
TVMContext ctx_to,
TVMType type_hint,
TVMStreamHandle stream) {
this->Init();
CHECK(stream == nullptr);
if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLOpenCL) {
if (IsOpenCLDevice(ctx_from) && IsOpenCLDevice(ctx_to)) {
OPENCL_CALL(clEnqueueCopyBuffer(
this->GetQueue(ctx_to),
static_cast<cl_mem>((void*)from), // NOLINT(*)
static_cast<cl_mem>(to),
from_offset, to_offset, size, 0, nullptr, nullptr));
} else if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLCPU) {
} else if (IsOpenCLDevice(ctx_from) && ctx_to.device_type == kDLCPU) {
OPENCL_CALL(clEnqueueReadBuffer(
this->GetQueue(ctx_from),
static_cast<cl_mem>((void*)from), // NOLINT(*)
CL_FALSE, from_offset, size,
static_cast<char*>(to) + to_offset,
0, nullptr, nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_from)));
} else if (ctx_from.device_type == kDLCPU && ctx_to.device_type == kDLOpenCL) {
} else if (ctx_from.device_type == kDLCPU && IsOpenCLDevice(ctx_to)) {
OPENCL_CALL(clEnqueueWriteBuffer(
this->GetQueue(ctx_to),
static_cast<cl_mem>(to),
Expand Down Expand Up @@ -226,38 +223,40 @@ bool MatchPlatformInfo(
return param_value.find(value) != std::string::npos;
}

void OpenCLWorkspace::Init() {
if (initialized_) return;
void OpenCLWorkspace::Init(const std::vector<std::string>& device_types,
const std::string& platform_name) {
if (context != nullptr) return;
std::lock_guard<std::mutex> lock(this->mu);
if (initialized_) return;
initialized_ = true;
if (context != nullptr) return;
// matched platforms
std::vector<cl_platform_id> platform_matched = cl::GetPlatformIDs();
if (platform_matched.size() == 0) {
std::vector<cl_platform_id> platform_ids = cl::GetPlatformIDs();
if (platform_ids.size() == 0) {
LOG(WARNING) << "No OpenCL platform matched given existing options ...";
return;
}
if (platform_matched.size() > 1) {
LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... ";
}
this->platform_id = platform_matched[0];
LOG(INFO) << "Initialize OpenCL platform \'"
<< cl::GetPlatformInfo(this->platform_id, CL_PLATFORM_NAME) << '\'';
std::string device_types[] = {"accelerator", "gpu", "cpu"};
std::vector<cl_device_id> devices_matched;
for (auto type : device_types) {
devices_matched = cl::GetDeviceIDs(this->platform_id, type);
if (devices_matched.size() > 0) {
break;
this->platform_id = nullptr;
for (auto platform_id : platform_ids) {
if (!MatchPlatformInfo(platform_id, CL_PLATFORM_NAME, platform_name)) {
continue;
}
for (auto device_type : device_types) {
std::vector<cl_device_id> devices_matched = cl::GetDeviceIDs(platform_id, device_type);
if (devices_matched.size() > 0) {
this->platform_id = platform_id;
this->platform_name = cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME);
this->device_type = device_type;
this->devices = devices_matched;
LOG(INFO) << "Initialize OpenCL platform \'" << this->platform_name << '\'';
break;
}
LOG(INFO) << "\'" << cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME)
<< "\' platform has no OpenCL device: " << device_type << " mode";
}
LOG(INFO) << "No OpenCL device any device matched given the options: " << type << " mode";
}
if (devices_matched.size() == 0) {
if (this->platform_id == nullptr) {
LOG(WARNING) << "No OpenCL device";
return;
}
this->devices = devices_matched;
cl_int err_code;
this->context = clCreateContext(
nullptr, this->devices.size(), &(this->devices[0]),
Expand All @@ -275,15 +274,22 @@ void OpenCLWorkspace::Init() {
}
}

bool InitOpenCL(TVMArgs args, TVMRetValue* rv) {
cl::OpenCLWorkspace::Global()->Init();
return true;
}

TVM_REGISTER_GLOBAL("device_api.opencl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
DeviceAPI* ptr = OpenCLWorkspace::Global().get();
*rv = static_cast<void*>(ptr);
OpenCLWorkspace* w = OpenCLWorkspace::Global().get();
w->Init({"gpu", "cpu"});
*rv = static_cast<void*>(w);
});

TVM_REGISTER_GLOBAL("device_api.sdaccel")
.set_body([](TVMArgs args, TVMRetValue* rv) {
OpenCLWorkspace* w = OpenCLWorkspace::Global().get();
w->Init({"accelerator"}, "Xilinx");
if (w->context == nullptr) {
LOG(WARNING) << "Failed to set up SDAccel, falling back to other platforms for testing.";
w->Init({"gpu", "cpu"});
}
*rv = static_cast<void*>(w);
});

} // namespace cl
Expand Down
31 changes: 18 additions & 13 deletions src/runtime/opencl/opencl_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ class OpenCLModuleNode : public ModuleNode {
}

// Initialize the programs
void Init() {
void Init(const std::vector<std::string>& device_types, const std::string& platform_name) {
workspace_ = cl::OpenCLWorkspace::Global();
workspace_->Init();
workspace_->Init(device_types, platform_name);
CHECK(workspace_->context != nullptr) << "No OpenCL device";
if (fmt_ == "cl") {
const char* s = data_.c_str();
Expand Down Expand Up @@ -283,59 +283,64 @@ Module OpenCLModuleCreate(
std::string data,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap,
std::string source) {
std::string source,
std::vector<std::string> device_types,
std::string platform_name) {
std::shared_ptr<OpenCLModuleNode> n =
std::make_shared<OpenCLModuleNode>(data, fmt, fmap, source);
n->Init();
n->Init(device_types, platform_name);
return Module(n);
}

// Load module from module.
Module OpenCLModuleLoadFile(const std::string& file_name,
const std::string& format) {
const std::string& format,
const std::vector<std::string>& device_types,
const std::string& platform_name = "") {
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt = GetFileFormat(file_name, format);
std::string meta_file = GetMetaFilePath(file_name);
LoadBinaryFromFile(file_name, &data);
LoadMetaDataFromFile(meta_file, &fmap);
return OpenCLModuleCreate(data, fmt, fmap, std::string());
return OpenCLModuleCreate(data, fmt, fmap, std::string(), device_types, platform_name);
}

Module OpenCLModuleLoadBinary(void* strm) {
Module OpenCLModuleLoadBinary(void* strm, const std::vector<std::string>& device_types,
const std::string& platform_name = "") {
dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt;
stream->Read(&fmt);
stream->Read(&fmap);
stream->Read(&data);
return OpenCLModuleCreate(data, fmt, fmap, std::string());
return OpenCLModuleCreate(data, fmt, fmap, std::string(), device_types, platform_name);
}

TVM_REGISTER_GLOBAL("module.loadfile_cl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenCLModuleLoadFile(args[0], args[1]);
*rv = OpenCLModuleLoadFile(args[0], args[1], {"gpu", "cpu"});
});

TVM_REGISTER_GLOBAL("module.loadfile_clbin")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenCLModuleLoadFile(args[0], args[1]);
*rv = OpenCLModuleLoadFile(args[0], args[1], {"gpu", "cpu"});
});

TVM_REGISTER_GLOBAL("module.loadfile_xclbin")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenCLModuleLoadFile(args[0], args[1]);
*rv = OpenCLModuleLoadFile(args[0], args[1], {"accelerator"}, "Xilinx");
});

TVM_REGISTER_GLOBAL("module.loadfile_awsxclbin")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenCLModuleLoadFile(args[0], args[1]);
*rv = OpenCLModuleLoadFile(args[0], args[1], {"accelerator"}, "Xilinx");
});

TVM_REGISTER_GLOBAL("module.loadbinary_opencl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenCLModuleLoadBinary(args[0]);
*rv = OpenCLModuleLoadBinary(args[0], {"gpu", "cpu"});
});
} // namespace runtime
} // namespace tvm
Loading

0 comments on commit a6bc7b9

Please sign in to comment.