Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Two small fixes to AMDCPU codegen for LLVM 10+ and ROCm 3.5+ #5920

Merged
merged 1 commit into from
Jun 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/tvm/runtime/device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ enum DeviceAttrKind : int {
kMultiProcessorCount = 7,
kMaxThreadDimensions = 8,
kMaxRegistersPerBlock = 9,
kGcnArch = 10
kGcnArch = 10,
kApiVersion = 11
};

/*! \brief Number of bytes each allocation must align to */
Expand Down
4 changes: 4 additions & 0 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ class CUDADeviceAPI final : public DeviceAPI {
}
case kGcnArch:
return;
case kApiVersion: {
*rv = CUDA_VERSION;
return;
}
}
*rv = value;
}
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/metal/metal_device_api.mm
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
return;
case kGcnArch:
return;
case kApiVersion:
return;
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ void OpenCLWorkspace::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue*
return;
case kGcnArch:
return;
case kApiVersion:
return;
}
}

Expand Down
1 change: 1 addition & 0 deletions src/runtime/rocm/rocm_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define TVM_RUNTIME_ROCM_ROCM_COMMON_H_

#include <hip/hip_runtime_api.h>
#include <hip/hip_version.h>
#include <tvm/runtime/packed_func.h>

#include <string>
Expand Down
8 changes: 7 additions & 1 deletion src/runtime/rocm/rocm_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,19 @@ class ROCMDeviceAPI final : public DeviceAPI {
return;
}
case kMaxRegistersPerBlock:
return;
ROCM_CALL(
hipDeviceGetAttribute(&value, hipDeviceAttributeMaxRegistersPerBlock, ctx.device_id));
break;
case kGcnArch: {
hipDeviceProp_t prop;
ROCM_CALL(hipGetDeviceProperties(&prop, ctx.device_id));
*rv = prop.gcnArch;
return;
}
case kApiVersion: {
*rv = HIP_VERSION;
return;
}
}
*rv = value;
}
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/vulkan/vulkan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue*
return;
case kGcnArch:
return;
case kApiVersion:
return;
}
}

Expand Down
29 changes: 25 additions & 4 deletions src/target/llvm/codegen_amdgpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,13 @@ class CodeGenAMDGPU : public CodeGenLLVM {
llvm::GlobalVariable* global = new llvm::GlobalVariable(
*module_, type, false, llvm::GlobalValue::PrivateLinkage, 0, ".shared", nullptr,
llvm::GlobalValue::NotThreadLocal, shared_address_space);
if (global->getAlignment() < static_cast<uint32_t>(info.alignment)) {
#if TVM_LLVM_VERSION >= 100
global->setAlignment(llvm::Align(info.alignment));
global->setAlignment(llvm::Align(info.alignment));
#else
global->setAlignment(info.alignment);
global->setAlignment(info.alignment);
#endif
}
buf = global;
}

Expand Down Expand Up @@ -212,6 +214,20 @@ inline int DetectROCMComputeVersion(const std::string& target) {
return 900;
}

inline int DetectROCMApiVersion() {
TVMContext tvm_ctx;
tvm_ctx.device_type = kDLROCM;
tvm_ctx.device_id = 0;
tvm::runtime::DeviceAPI* api = tvm::runtime::DeviceAPI::Get(tvm_ctx, true);
if (api != nullptr) {
TVMRetValue val;
api->GetAttr(tvm_ctx, tvm::runtime::kApiVersion, &val);
return val.operator int();
}
LOG(WARNING) << "Cannot detect ROCm version, assume >= 3.5";
return 305;
}

runtime::Module BuildAMDGPU(IRModule mod, std::string target) {
#if TVM_LLVM_VERSION < 90
LOG(FATAL) << "AMDGPU backend requires at least LLVM 9";
Expand All @@ -221,8 +237,13 @@ runtime::Module BuildAMDGPU(IRModule mod, std::string target) {
InitializeLLVM();
CHECK(target.length() >= 4 && target.substr(0, 4) == "rocm");
std::ostringstream config;
config << "-mtriple=amdgcn-amd-amdhsa-hcc -mcpu=gfx" << DetectROCMComputeVersion(target)
<< " -mattr=-code-object-v3 " << target.substr(4, target.length() - 4);
config << "-mtriple=amdgcn-amd-amdhsa-hcc -mcpu=gfx" << DetectROCMComputeVersion(target);
if (DetectROCMApiVersion() < 305) {
// before ROCm 3.5 we needed code object v2, starting
// with 3.5 we need v3 (this argument disables v3)
config << " -mattr=-code-object-v3 ";
}
config << target.substr(4, target.length() - 4);
std::unique_ptr<llvm::TargetMachine> tm = GetLLVMTargetMachine(config.str());
std::unique_ptr<llvm::LLVMContext> ctx(new llvm::LLVMContext());
// careful: cg will hold a naked pointer reference to ctx, so it should
Expand Down