From cc93bc44598a50d38c8d92b3c36d00ed75e532d5 Mon Sep 17 00:00:00 2001 From: Kristina Bessonova Date: Tue, 27 Mar 2018 11:10:46 +0300 Subject: [PATCH 1/2] Generate OpControlBarrier for Subgroup scope for OpenCL 1.2/2.0 'cl_intel_subgroup' extension allows to use sub_group_barrier() built-in with OpenCL 1.2/2.0. It requires to translate this built-in to reqular (core) SPIR-V instruction OpControlBarrier. Note: currently clag generates nothing representing vendor extensions like 'cl_intel_subgroup'. So, SPIR-V translator doesn't check that the extensions was enabled. It assumes that if a built-in or a type which belong to one of supported extension used in the IR, this extensions is enabled. --- lib/SPIRV/OCL20ToSPIRV.cpp | 3 +- .../OpControlBarrier_cl20_subgroup.ll | 122 ++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 test/transcoding/OpControlBarrier_cl20_subgroup.ll diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp index a8b8e8c7b0..f9115a2c19 100644 --- a/lib/SPIRV/OCL20ToSPIRV.cpp +++ b/lib/SPIRV/OCL20ToSPIRV.cpp @@ -478,7 +478,8 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { return; } if (DemangledName == kOCLBuiltinName::WorkGroupBarrier || - DemangledName == kOCLBuiltinName::Barrier) { + DemangledName == kOCLBuiltinName::Barrier || + DemangledName == kOCLBuiltinName::SubGroupBarrier) { visitCallBarrier(&CI); return; } diff --git a/test/transcoding/OpControlBarrier_cl20_subgroup.ll b/test/transcoding/OpControlBarrier_cl20_subgroup.ll new file mode 100644 index 0000000000..45b4caebcd --- /dev/null +++ b/test/transcoding/OpControlBarrier_cl20_subgroup.ll @@ -0,0 +1,122 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o %t.txt +; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-LLVM: call spir_func void @_Z17sub_group_barrierji(i32 2, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 1, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 4, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 3, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 5, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 7, i32 1) #{{[0-9]+}} + +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 2, i32 0) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 2, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 2, i32 2) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 2, i32 3) #{{[0-9]+}} + +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 1, i32 0) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 1, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 1, i32 2) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 1, i32 3) #{{[0-9]+}} + +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 4, i32 0) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 4, i32 1) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 4, i32 2) #{{[0-9]+}} +; CHECK-LLVM-NEXT: call spir_func void @_Z17sub_group_barrierji(i32 4, i32 3) #{{[0-9]+}} + + +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[MemSema1:[0-9]+]] 512 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[MemSema2:[0-9]+]] 256 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[MemSema3:[0-9]+]] 2048 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[MemSema4:[0-9]+]] 768 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[MemSema5:[0-9]+]] 2304 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[MemSema6:[0-9]+]] 2816 + +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[ScopeWorkItem:[0-9]+]] 4 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[ScopeWorkGroup:[0-9]+]] 2 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[ScopeDevice:[0-9]+]] 1 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[ScopeCrossDevice:[0-9]+]] 0 +; CHECK-SPIRV-DAG: 4 Constant {{[0-9]+}} [[ScopeSubGroup:[0-9]+]] 3 + +; CHECK-SPIRV: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema1]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema2]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema3]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema4]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema5]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema6]] + +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkItem]] [[MemSema1]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema1]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeDevice]] [[MemSema1]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeCrossDevice]] [[MemSema1]] + +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkItem]] [[MemSema2]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema2]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeDevice]] [[MemSema2]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeCrossDevice]] [[MemSema2]] + +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkItem]] [[MemSema3]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeWorkGroup]] [[MemSema3]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeDevice]] [[MemSema3]] +; CHECK-SPIRV-NEXT: 4 ControlBarrier [[ScopeSubGroup]] [[ScopeCrossDevice]] [[MemSema3]] + +; ModuleID = 'sub_group_barrier.cl' +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: nounwind +define spir_kernel void @test() #0 { +entry: + call spir_func void @_Z17sub_group_barrierj(i32 2) ; global mem fence + call spir_func void @_Z17sub_group_barrierj(i32 1) ; local mem fence + call spir_func void @_Z17sub_group_barrierj(i32 4) ; image mem fence + + call spir_func void @_Z17sub_group_barrierj(i32 3) ; global | local + call spir_func void @_Z17sub_group_barrierj(i32 5) ; local | image + call spir_func void @_Z17sub_group_barrierj(i32 7) ; global | local | image + + call spir_func void @_Z17sub_group_barrierji(i32 2, i32 0) ; global mem fence + memory_scope_work_item + call spir_func void @_Z17sub_group_barrierji(i32 2, i32 1) ; global mem fence + memory_scope_work_group + call spir_func void @_Z17sub_group_barrierji(i32 2, i32 2) ; global mem fence + memory_scope_device + call spir_func void @_Z17sub_group_barrierji(i32 2, i32 3) ; global mem fence + memory_scope_all_svm_devices + + call spir_func void @_Z17sub_group_barrierji(i32 1, i32 0) ; local mem fence + memory_scope_work_item + call spir_func void @_Z17sub_group_barrierji(i32 1, i32 1) ; local mem fence + memory_scope_work_group + call spir_func void @_Z17sub_group_barrierji(i32 1, i32 2) ; local mem fence + memory_scope_device + call spir_func void @_Z17sub_group_barrierji(i32 1, i32 3) ; local mem fence + memory_scope_all_svm_devices + + call spir_func void @_Z17sub_group_barrierji(i32 4, i32 0) ; image mem fence + memory_scope_work_item + call spir_func void @_Z17sub_group_barrierji(i32 4, i32 1) ; image mem fence + memory_scope_work_group + call spir_func void @_Z17sub_group_barrierji(i32 4, i32 2) ; image mem fence + memory_scope_device + call spir_func void @_Z17sub_group_barrierji(i32 4, i32 3) ; image mem fence + memory_scope_all_svm_devices + + ret void +} + +declare spir_func void @_Z17sub_group_barrierj(i32) #1 +declare spir_func void @_Z17sub_group_barrierji(i32, i32) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!opencl.kernels = !{!0} +!rpencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!6} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} + +!0 = !{void ()* @test, !1, !2, !3, !4, !5} +!1 = !{!"kernel_arg_addr_space"} +!2 = !{!"kernel_arg_access_qual"} +!3 = !{!"kernel_arg_type"} +!4 = !{!"kernel_arg_base_type"} +!5 = !{!"kernel_arg_type_qual"} +!6 = !{i32 1, i32 2} +!7 = !{i32 2, i32 0} +!8 = !{} From 8074435149ea9ee9db7a4980518139f1a268a8e3 Mon Sep 17 00:00:00 2001 From: neildhickey Date: Wed, 19 Sep 2018 11:08:52 +0100 Subject: [PATCH 2/2] Update OpControlBarrier_cl20_subgroup.ll --- test/transcoding/OpControlBarrier_cl20_subgroup.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/transcoding/OpControlBarrier_cl20_subgroup.ll b/test/transcoding/OpControlBarrier_cl20_subgroup.ll index 45b4caebcd..0ad3bd4d8c 100644 --- a/test/transcoding/OpControlBarrier_cl20_subgroup.ll +++ b/test/transcoding/OpControlBarrier_cl20_subgroup.ll @@ -104,7 +104,7 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" " attributes #2 = { nounwind } !opencl.kernels = !{!0} -!rpencl.enable.FP_CONTRACT = !{} +!opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} !opencl.ocl.version = !{!7} !opencl.used.extensions = !{!8}