From a6bc7b993a3e41bd611d8fc70967e0399adba41c Mon Sep 17 00:00:00 2001
From: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Date: Thu, 21 Jun 2018 17:24:01 +0900
Subject: [PATCH] Add support for multiple OpenCL platforms

---
 include/tvm/runtime/c_runtime_api.h         |  1 +
 python/tvm/_ffi/runtime_ctypes.py           |  3 +-
 python/tvm/contrib/sdaccel.py               | 16 +++--
 src/codegen/codegen_opencl.cc               |  2 +-
 src/codegen/codegen_vhls.cc                 | 20 +++++-
 src/codegen/opt/build_opencl_off.cc         |  4 +-
 src/pass/verify_memory.cc                   |  2 +-
 src/runtime/c_runtime_api.cc                |  1 +
 src/runtime/opencl/opencl_common.h          | 16 +++--
 src/runtime/opencl/opencl_device_api.cc     | 74 +++++++++++----------
 src/runtime/opencl/opencl_module.cc         | 31 +++++----
 src/runtime/opencl/opencl_module.h          |  4 +-
 tests/python/integration/test_ewise_fpga.py |  3 -
 13 files changed, 107 insertions(+), 70 deletions(-)

diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
index c61a4884a1f5..7f3a11d9ddab 100644
--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -60,6 +60,7 @@ typedef int64_t tvm_index_t;
 
 /*! \brief Extension device types in TVM */
 typedef enum {
+  kDLSDAccel = 6,
   kDLVulkan = 7,
   kOpenGL = 11,
   // Extension DRAM type, used for quickly test extension device
diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py
index d862a5abd497..9609f867576b 100644
--- a/python/tvm/_ffi/runtime_ctypes.py
+++ b/python/tvm/_ffi/runtime_ctypes.py
@@ -95,6 +95,7 @@ class TVMContext(ctypes.Structure):
         1 : 'cpu',
         2 : 'gpu',
         4 : 'opencl',
+        6 : 'sdaccel',
         7 : 'vulkan',
         8 : 'metal',
         9 : 'vpi',
@@ -111,7 +112,7 @@ class TVMContext(ctypes.Structure):
         'nvptx': 2,
         'cl': 4,
         'opencl': 4,
-        'sdaccel': 4,
+        'sdaccel': 6,
         'vulkan': 7,
         'metal': 8,
         'vpi': 9,
diff --git a/python/tvm/contrib/sdaccel.py b/python/tvm/contrib/sdaccel.py
index 12dba31a4ab5..ab07304e82e0 100644
--- a/python/tvm/contrib/sdaccel.py
+++ b/python/tvm/contrib/sdaccel.py
@@ -27,11 +27,12 @@ def _vhls_to_opencl(code):
     return out
 
 
+@register_func("tvm_callback_sdaccel_fake_compile")
 def _fake_compile_vhls(code):
     """Fake compile Vivado HLS code for SDAccel.
 
-    Compile the Vivado HLS code as an OpenCL code, and generate a program
-    binary for GPU which can be used instead of xclbin.
+    Compile the Vivado HLS code as an OpenCL code, and generate a program binary
+    with other platforms.  The generated binary can be used for testing instead of xclbin.
 
     Parameters
     ----------
@@ -46,8 +47,11 @@ def _fake_compile_vhls(code):
     try:
         import pyopencl as cl
     except ImportError:
-        raise ImportError('PyOpenCL is required for testing SDAccel backend.')
-    ctx = cl.Context(dev_type=cl.device_type.GPU)
+        raise RuntimeError('PyOpenCL is required for testing SDAccel backend.')
+    platforms = [pf for pf in cl.get_platforms() if pf.name != "Xilinx"]
+    if not platforms:
+        raise RuntimeError("No OpenCL platform is available.")
+    ctx = cl.Context(properties=[(cl.context_properties.PLATFORM, platforms[0])])
     program = cl.Program(ctx, _vhls_to_opencl(code)).build()
     binary = bytearray(program.binaries[0])
     return binary
@@ -87,9 +91,7 @@ def compile_vhls(code, kernel):
     platform = os.environ.get("XCL_PLATFORM", os.environ.get("AWS_PLATFORM"))
 
     if platform is None:
-        # If we don't have the Xilinx toolchain, create a program binary for
-        # GPU and use it for testing.
-        return _fake_compile_vhls(code)
+        raise RuntimeError("No Xlinx device specified.")
 
     # build xo
     args = [xocc, "-c", "-t", target, "--platform", platform, "-o", tmp_xo, "-k", kernel] + \
diff --git a/src/codegen/codegen_opencl.cc b/src/codegen/codegen_opencl.cc
index 2d5026e827e2..ccf54dfc89d1 100644
--- a/src/codegen/codegen_opencl.cc
+++ b/src/codegen/codegen_opencl.cc
@@ -218,7 +218,7 @@ runtime::Module BuildOpenCL(Array<LoweredFunc> funcs) {
   if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) {
     code = (*f)(code).operator std::string();
   }
-  return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs), code);
+  return OpenCLModuleCreate(code, "cl", ExtractFuncInfo(funcs), code, {"gpu", "cpu"});
 }
 
 TVM_REGISTER_API("codegen.build_opencl")
diff --git a/src/codegen/codegen_vhls.cc b/src/codegen/codegen_vhls.cc
index 3cb8b9a346c2..ffe1f02b17ba 100644
--- a/src/codegen/codegen_vhls.cc
+++ b/src/codegen/codegen_vhls.cc
@@ -85,13 +85,27 @@ runtime::Module BuildSDAccel(Array<LoweredFunc> funcs) {
     code = (*f)(code).operator std::string();
   }
 
+  std::vector<std::string> device_types = {"accelerator"};
+  std::string platform_name = "Xilinx";
   std::string xclbin;
   if (const auto* f = Registry::Get("tvm_callback_sdaccel_compile")) {
-    xclbin = (*f)(code, funcname).operator std::string();
-  } else {
+    try {
+      xclbin = (*f)(code, funcname).operator std::string();
+    } catch (const dmlc::Error& e) {
+      LOG(WARNING) << e.what();
+      LOG(WARNING) << "Failed to set up SDAccel, falling back to other platforms for testing.";
+      if (const auto* f = Registry::Get("tvm_callback_sdaccel_fake_compile")) {
+        xclbin = (*f)(code).operator std::string();
+        device_types = {"gpu", "cpu"};
+        platform_name = "";
+      }
+    }
+  }
+  if (xclbin == "") {
     LOG(FATAL) << "Cannot compile Vivado HLS code.";
   }
-  return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code);
+  return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code, device_types,
+                            platform_name);
 }
 
 TVM_REGISTER_API("codegen.build_sdaccel")
diff --git a/src/codegen/opt/build_opencl_off.cc b/src/codegen/opt/build_opencl_off.cc
index fc962d4840e9..0cfccffaa1bb 100644
--- a/src/codegen/opt/build_opencl_off.cc
+++ b/src/codegen/opt/build_opencl_off.cc
@@ -12,7 +12,9 @@ Module OpenCLModuleCreate(
     std::string data,
     std::string fmt,
     std::unordered_map<std::string, FunctionInfo> fmap,
-    std::string source) {
+    std::string source,
+    std::vector<std::string> device_types,
+    std::string platform_name) {
   LOG(WARNING) << "OpenCL runtime not enabled, return a source module...";
   return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "opencl");
 }
diff --git a/src/pass/verify_memory.cc b/src/pass/verify_memory.cc
index e928bedf266c..24e94eab571d 100644
--- a/src/pass/verify_memory.cc
+++ b/src/pass/verify_memory.cc
@@ -139,7 +139,7 @@ class MemoryAccessVerifier final : protected IRVisitor {
 
   /// Check if a given DLDeviceType/TVMDeviceExtType value denotes GPU device.
   static bool IsGPUDevice(int dev_type) {
-    return kDLGPU == dev_type || kDLOpenCL == dev_type ||
+    return kDLGPU == dev_type || kDLOpenCL == dev_type || kDLSDAccel == dev_type ||
            kDLVulkan == dev_type || kDLMetal == dev_type ||
            kDLROCM == dev_type || kOpenGL == dev_type;
   }
diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc
index df3a19f306e0..5eb39abcc71a 100644
--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -31,6 +31,7 @@ inline std::string DeviceName(int type) {
     case kDLCPU: return "cpu";
     case kDLGPU: return "gpu";
     case kDLOpenCL: return "opencl";
+    case kDLSDAccel: return "sdaccel";
     case kDLVulkan: return "vulkan";
     case kDLMetal: return "metal";
     case kDLVPI: return "vpi";
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index e2db24f85769..10fc87d18351 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -104,10 +104,12 @@ class OpenCLWorkspace final : public DeviceAPI {
  public:
   // global platform id
   cl_platform_id platform_id;
+  // global platform name
+  std::string platform_name;
   // global context of this process
   cl_context context{nullptr};
-  // whether the workspace it initialized.
-  bool initialized_{false};
+  // the device type
+  std::string device_type;
   // the devices
   std::vector<cl_device_id> devices;
   // the queues
@@ -128,11 +130,15 @@ class OpenCLWorkspace final : public DeviceAPI {
     }
   }
   // Initialzie the device.
-  void Init();
+  void Init(const std::vector<std::string>& device_types, const std::string& platform_name = "");
+  // Check whether the context is OpenCL or not.
+  bool IsOpenCLDevice(TVMContext ctx) {
+    return ctx.device_type == kDLOpenCL ||
+        ctx.device_type == static_cast<DLDeviceType>(kDLSDAccel);
+  }
   // get the queue of the context
   cl_command_queue GetQueue(TVMContext ctx) {
-    CHECK_EQ(ctx.device_type, kDLOpenCL);
-    this->Init();
+    CHECK(IsOpenCLDevice(ctx));
     CHECK(ctx.device_id >= 0  && static_cast<size_t>(ctx.device_id) < queues.size())
         << "Invalid OpenCL device_id=" << ctx.device_id;
     return queues[ctx.device_id];
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index 8bd86d22aee5..ad7c66ddfc0d 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -24,7 +24,6 @@ void OpenCLWorkspace::SetDevice(TVMContext ctx) {
 
 void OpenCLWorkspace::GetAttr(
     TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) {
-  this->Init();
   size_t index = static_cast<size_t>(ctx.device_id);
   if (kind == kExist) {
     *rv = static_cast<int>(index< devices.size());
@@ -99,7 +98,6 @@ void OpenCLWorkspace::GetAttr(
 
 void* OpenCLWorkspace::AllocDataSpace(
     TVMContext ctx, size_t size, size_t alignment, TVMType type_hint) {
-  this->Init();
   CHECK(context != nullptr) << "No OpenCL device";
   cl_int err_code;
   cl_mem mptr = clCreateBuffer(
@@ -122,15 +120,14 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from,
                                      TVMContext ctx_to,
                                      TVMType type_hint,
                                      TVMStreamHandle stream) {
-  this->Init();
   CHECK(stream == nullptr);
-  if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLOpenCL) {
+  if (IsOpenCLDevice(ctx_from) && IsOpenCLDevice(ctx_to)) {
     OPENCL_CALL(clEnqueueCopyBuffer(
         this->GetQueue(ctx_to),
         static_cast<cl_mem>((void*)from),  // NOLINT(*)
         static_cast<cl_mem>(to),
         from_offset, to_offset, size, 0, nullptr, nullptr));
-  } else if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLCPU) {
+  } else if (IsOpenCLDevice(ctx_from) && ctx_to.device_type == kDLCPU) {
     OPENCL_CALL(clEnqueueReadBuffer(
         this->GetQueue(ctx_from),
         static_cast<cl_mem>((void*)from),  // NOLINT(*)
@@ -138,7 +135,7 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from,
         static_cast<char*>(to) + to_offset,
         0, nullptr, nullptr));
     OPENCL_CALL(clFinish(this->GetQueue(ctx_from)));
-  } else if (ctx_from.device_type == kDLCPU && ctx_to.device_type == kDLOpenCL) {
+  } else if (ctx_from.device_type == kDLCPU && IsOpenCLDevice(ctx_to)) {
     OPENCL_CALL(clEnqueueWriteBuffer(
         this->GetQueue(ctx_to),
         static_cast<cl_mem>(to),
@@ -226,38 +223,40 @@ bool MatchPlatformInfo(
   return param_value.find(value) != std::string::npos;
 }
 
-void OpenCLWorkspace::Init() {
-  if (initialized_) return;
+void OpenCLWorkspace::Init(const std::vector<std::string>& device_types,
+                           const std::string& platform_name) {
+  if (context != nullptr) return;
   std::lock_guard<std::mutex> lock(this->mu);
-  if (initialized_) return;
-  initialized_ = true;
   if (context != nullptr) return;
   // matched platforms
-  std::vector<cl_platform_id> platform_matched = cl::GetPlatformIDs();
-  if (platform_matched.size() == 0) {
+  std::vector<cl_platform_id> platform_ids = cl::GetPlatformIDs();
+  if (platform_ids.size() == 0) {
     LOG(WARNING) << "No OpenCL platform matched given existing options ...";
     return;
   }
-  if (platform_matched.size() > 1) {
-    LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... ";
-  }
-  this->platform_id = platform_matched[0];
-  LOG(INFO) << "Initialize OpenCL platform \'"
-            << cl::GetPlatformInfo(this->platform_id, CL_PLATFORM_NAME) << '\'';
-  std::string device_types[] = {"accelerator", "gpu", "cpu"};
-  std::vector<cl_device_id> devices_matched;
-  for (auto type : device_types) {
-    devices_matched = cl::GetDeviceIDs(this->platform_id, type);
-    if (devices_matched.size() > 0) {
-      break;
+  this->platform_id = nullptr;
+  for (auto platform_id : platform_ids) {
+    if (!MatchPlatformInfo(platform_id, CL_PLATFORM_NAME, platform_name)) {
+      continue;
+    }
+    for (auto device_type : device_types) {
+      std::vector<cl_device_id> devices_matched = cl::GetDeviceIDs(platform_id, device_type);
+      if (devices_matched.size() > 0) {
+        this->platform_id = platform_id;
+        this->platform_name = cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME);
+        this->device_type = device_type;
+        this->devices = devices_matched;
+        LOG(INFO) << "Initialize OpenCL platform \'" << this->platform_name << '\'';
+        break;
+      }
+      LOG(INFO) << "\'" << cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME)
+                << "\' platform has no OpenCL device: " << device_type << " mode";
     }
-    LOG(INFO) << "No OpenCL device any device matched given the options: " << type << " mode";
   }
-  if (devices_matched.size() == 0) {
+  if (this->platform_id == nullptr) {
     LOG(WARNING) << "No OpenCL device";
     return;
   }
-  this->devices = devices_matched;
   cl_int err_code;
   this->context = clCreateContext(
       nullptr, this->devices.size(), &(this->devices[0]),
@@ -275,15 +274,22 @@ void OpenCLWorkspace::Init() {
   }
 }
 
-bool InitOpenCL(TVMArgs args, TVMRetValue* rv) {
-  cl::OpenCLWorkspace::Global()->Init();
-  return true;
-}
-
 TVM_REGISTER_GLOBAL("device_api.opencl")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-    DeviceAPI* ptr = OpenCLWorkspace::Global().get();
-    *rv = static_cast<void*>(ptr);
+    OpenCLWorkspace* w = OpenCLWorkspace::Global().get();
+    w->Init({"gpu", "cpu"});
+    *rv = static_cast<void*>(w);
+  });
+
+TVM_REGISTER_GLOBAL("device_api.sdaccel")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    OpenCLWorkspace* w = OpenCLWorkspace::Global().get();
+    w->Init({"accelerator"}, "Xilinx");
+    if (w->context == nullptr) {
+      LOG(WARNING) << "Failed to set up SDAccel, falling back to other platforms for testing.";
+      w->Init({"gpu", "cpu"});
+    }
+    *rv = static_cast<void*>(w);
   });
 
 }  // namespace cl
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 5d67ad286875..d1c4eb32553e 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -87,9 +87,9 @@ class OpenCLModuleNode : public ModuleNode {
   }
 
   // Initialize the programs
-  void Init() {
+  void Init(const std::vector<std::string>& device_types, const std::string& platform_name) {
     workspace_ = cl::OpenCLWorkspace::Global();
-    workspace_->Init();
+    workspace_->Init(device_types, platform_name);
     CHECK(workspace_->context != nullptr) << "No OpenCL device";
     if (fmt_ == "cl") {
       const char* s = data_.c_str();
@@ -283,26 +283,31 @@ Module OpenCLModuleCreate(
     std::string data,
     std::string fmt,
     std::unordered_map<std::string, FunctionInfo> fmap,
-    std::string source) {
+    std::string source,
+    std::vector<std::string> device_types,
+    std::string platform_name) {
   std::shared_ptr<OpenCLModuleNode> n =
       std::make_shared<OpenCLModuleNode>(data, fmt, fmap, source);
-  n->Init();
+  n->Init(device_types, platform_name);
   return Module(n);
 }
 
 // Load module from module.
 Module OpenCLModuleLoadFile(const std::string& file_name,
-                            const std::string& format) {
+                            const std::string& format,
+                            const std::vector<std::string>& device_types,
+                            const std::string& platform_name = "") {
   std::string data;
   std::unordered_map<std::string, FunctionInfo> fmap;
   std::string fmt = GetFileFormat(file_name, format);
   std::string meta_file = GetMetaFilePath(file_name);
   LoadBinaryFromFile(file_name, &data);
   LoadMetaDataFromFile(meta_file, &fmap);
-  return OpenCLModuleCreate(data, fmt, fmap, std::string());
+  return OpenCLModuleCreate(data, fmt, fmap, std::string(), device_types, platform_name);
 }
 
-Module OpenCLModuleLoadBinary(void* strm) {
+Module OpenCLModuleLoadBinary(void* strm, const std::vector<std::string>& device_types,
+                              const std::string& platform_name = "") {
   dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
   std::string data;
   std::unordered_map<std::string, FunctionInfo> fmap;
@@ -310,32 +315,32 @@ Module OpenCLModuleLoadBinary(void* strm) {
   stream->Read(&fmt);
   stream->Read(&fmap);
   stream->Read(&data);
-  return OpenCLModuleCreate(data, fmt, fmap, std::string());
+  return OpenCLModuleCreate(data, fmt, fmap, std::string(), device_types, platform_name);
 }
 
 TVM_REGISTER_GLOBAL("module.loadfile_cl")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-    *rv = OpenCLModuleLoadFile(args[0], args[1]);
+    *rv = OpenCLModuleLoadFile(args[0], args[1], {"gpu", "cpu"});
   });
 
 TVM_REGISTER_GLOBAL("module.loadfile_clbin")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-    *rv = OpenCLModuleLoadFile(args[0], args[1]);
+    *rv = OpenCLModuleLoadFile(args[0], args[1], {"gpu", "cpu"});
   });
 
 TVM_REGISTER_GLOBAL("module.loadfile_xclbin")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-    *rv = OpenCLModuleLoadFile(args[0], args[1]);
+    *rv = OpenCLModuleLoadFile(args[0], args[1], {"accelerator"}, "Xilinx");
   });
 
 TVM_REGISTER_GLOBAL("module.loadfile_awsxclbin")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-    *rv = OpenCLModuleLoadFile(args[0], args[1]);
+    *rv = OpenCLModuleLoadFile(args[0], args[1], {"accelerator"}, "Xilinx");
   });
 
 TVM_REGISTER_GLOBAL("module.loadbinary_opencl")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-    *rv = OpenCLModuleLoadBinary(args[0]);
+    *rv = OpenCLModuleLoadBinary(args[0], {"gpu", "cpu"});
   });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/opencl/opencl_module.h b/src/runtime/opencl/opencl_module.h
index b6e2a1e0e88a..a6ad67afe4db 100644
--- a/src/runtime/opencl/opencl_module.h
+++ b/src/runtime/opencl/opencl_module.h
@@ -25,7 +25,9 @@ Module OpenCLModuleCreate(
     std::string data,
     std::string fmt,
     std::unordered_map<std::string, FunctionInfo> fmap,
-    std::string source);
+    std::string source,
+    std::vector<std::string> device_types,
+    std::string platform_name = "");
 }  // namespace runtime
 }  // namespace tvm
 #endif  // TVM_RUNTIME_OPENCL_OPENCL_MODULE_H_
diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py
index fb7ca807340d..34f336c92775 100644
--- a/tests/python/integration/test_ewise_fpga.py
+++ b/tests/python/integration/test_ewise_fpga.py
@@ -1,8 +1,5 @@
 import tvm
 import numpy as np
-import os
-
-os.environ["XCL_EMULATION_MODE"] = "1"
 
 @tvm.register_func
 def tvm_callback_vhls_postproc(code):