Skip to content

Commit

Permalink
Revert opaquify changes for tests that run ESIMDLowerVecArg pass. Thi…
Browse files Browse the repository at this point in the history
…s pass requires typed pointers.

Signed-off-by: Lu, John <[email protected]>
  • Loading branch information
LU-JOHN committed Aug 8, 2023
1 parent 509fe11 commit 887ab89
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 98 deletions.
70 changes: 38 additions & 32 deletions llvm/test/SYCLLowerIR/ESIMD/global.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ $"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any
@0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" zeroinitializer, align 64 #0

; Function Attrs: norecurse
define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"(ptr addrspace(1) %_arg_) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"(i32 addrspace(1)* %_arg_) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
entry:
%vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
%agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
Expand All @@ -36,57 +36,63 @@ entry:
%group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64
%mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i, %wgsize.x.cast.ty.i.i.i.i.i
%add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i, %local_id.x.cast.ty.i.i.i.i.i
call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %agg.tmp.i)
call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %vc.i) #5
%0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to i8*
call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
%1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to i8*
call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %1) #5
%conv.i = trunc i64 %add.i5.i.i.i.i to i32
%0 = addrspacecast ptr %vc.i to ptr addrspace(4)
%2 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%splat.splatinsert.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
%splat.splat.i.i = shufflevector <16 x i32> %splat.splatinsert.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
store <16 x i32> %splat.splat.i.i, ptr addrspace(4) %0, align 64, !tbaa !13
%M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %2, i64 0, i32 0
store <16 x i32> %splat.splat.i.i, <16 x i32> addrspace(4)* %M_data.i13.i, align 64, !tbaa !13
%conv3.i = trunc i64 %add.i.i.i.i.i to i32
%splat.splatinsert.i20.i = insertelement <8 x i32> undef, i32 %conv3.i, i32 0
%splat.splat.i21.i = shufflevector <8 x i32> %splat.splatinsert.i20.i, <8 x i32> undef, <8 x i32> zeroinitializer
%call.esimd.i.i.i.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0) #5
%call.esimd.i.i.i.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5
%call4.esimd.i.i.i.i = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1(<16 x i32> %call.esimd.i.i.i.i.i2, <8 x i32> %splat.splat.i21.i, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) #5
call void @llvm.genx.vstore.v16i32.p4(<16 x i32> %call4.esimd.i.i.i.i, ptr addrspace(4) %0) #5
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call4.esimd.i.i.i.i, <16 x i32> addrspace(4)* %M_data.i13.i) #5
%cmp.i = icmp eq i64 %add.i.i.i.i.i, 0
%..i = select i1 %cmp.i, i64 %add.i5.i.i.i.i, i64 %add.i.i.i.i.i
%conv9.i = trunc i64 %..i to i32
; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, ptr addrspace(4) addrspacecast (ptr [[NEWGLOBAL]] to ptr addrspace(4)), align 64, !tbaa.struct !16
store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, ptr addrspace(4) addrspacecast (ptr @0 to ptr addrspace(4)), align 64, !tbaa.struct !16
; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
%mul.i = shl nsw i32 %conv9.i, 4
%idx.ext.i = sext i32 %mul.i to i64
%add.ptr.i16 = getelementptr inbounds i32, ptr addrspace(1) %_arg_, i64 %idx.ext.i
%add.ptr.i = addrspacecast ptr addrspace(1) %add.ptr.i16 to ptr addrspace(4)
%1 = addrspacecast ptr %agg.tmp.i to ptr addrspace(4)
%call.esimd.i.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0) #5
call void @llvm.genx.vstore.v16i32.p4(<16 x i32> %call.esimd.i.i.i, ptr addrspace(4) %1) #5
call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(ptr addrspace(4) %add.ptr.i, ptr nonnull %agg.tmp.i) #5
store <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, ptr addrspace(4) addrspacecast (ptr @0 to ptr addrspace(4)), align 64, !tbaa.struct !16
call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %vc.i) #5
call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %agg.tmp.i)
%add.ptr.i16 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %idx.ext.i
%add.ptr.i = addrspacecast i32 addrspace(1)* %add.ptr.i16 to i32 addrspace(4)*
%3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%call.esimd.i.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5
%M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %3, i64 0, i32 0
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call.esimd.i.i.i, <16 x i32> addrspace(4)* %M_data.i2.i.i) #5
call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %add.ptr.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull %agg.tmp.i) #5
store <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %1) #5
call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
ret void
}

; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0(i64 immarg %agg.tmp.i, ptr nocapture %vc.i) #2
declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2

; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0(i64 immarg %agg.tmp.i, ptr nocapture %vc.i) #2
declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #2

; Function Attrs: noinline norecurse nounwind
define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(ptr addrspace(4) %C, ptr %v) local_unnamed_addr #3 {
define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v) local_unnamed_addr #3 {
entry:
%agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
%0 = addrspacecast ptr %v to ptr addrspace(4)
%1 = addrspacecast ptr %agg.tmp to ptr addrspace(4)
%call.esimd.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0), !noalias !17
; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) addrspacecast (ptr [[NEWGLOBAL]] to ptr addrspace(4))), !noalias !17
%call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) addrspacecast (ptr @0 to ptr addrspace(4))), !noalias !17
%0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %0, i64 0, i32 0
%call.esimd.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i), !noalias !17
; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17
%call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0)), !noalias !17
%add.i = add <16 x i32> %call.esimd.i8.i, %call.esimd.i.i
call void @llvm.genx.vstore.v16i32.p4(<16 x i32> %add.i, ptr addrspace(4) %1)
%2 = ptrtoint ptr addrspace(4) %C to i64
%call.esimd.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %1)
%M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %1, i64 0, i32 0
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %add.i, <16 x i32> addrspace(4)* %M_data.i.i.i)
%2 = ptrtoint i32 addrspace(4)* %C to i64
%call.esimd.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i.i)
call void @llvm.genx.svm.block.st.v16i32(i64 %2, <16 x i32> %call.esimd.i.i2)
ret void
}
Expand All @@ -95,10 +101,10 @@ entry:
declare !genx_intrinsic_id !20 <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1(<16 x i32> %0, <8 x i32> %1, i32 %2, i32 %3, i32 %4, i16 %5, i32 %6, <8 x i1> %7) #4

; Function Attrs: nounwind
declare !genx_intrinsic_id !21 <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0) #5
declare !genx_intrinsic_id !21 <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %0) #5

; Function Attrs: nounwind
declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4(<16 x i32> %0, ptr addrspace(4) %1) #5
declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %0, <16 x i32> addrspace(4)* %1) #5

; Function Attrs: nounwind
declare !genx_intrinsic_id !23 void @llvm.genx.svm.block.st.v16i32(i64 %0, <16 x i32> %1) #5
Expand Down Expand Up @@ -134,7 +140,7 @@ attributes #5 = { nounwind }
!2 = !{i32 1, i32 2}
!3 = !{i32 6, i32 100000}
!4 = !{!"clang version 11.0.0"}
!5 = !{ptr @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !6, i32 0, i32 0, !6, !7, i32 0, i32 0}
!5 = !{void (i32 addrspace(1)*)* @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !6, i32 0, i32 0, !6, !7, i32 0, i32 0}
!6 = !{i32 0}
!7 = !{!"svmptr_t"}
!8 = !{i32 1}
Expand Down
10 changes: 6 additions & 4 deletions llvm/test/SYCLLowerIR/ESIMD/global_crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ target triple = "spir64-unknown-unknown"

define void @no_crash(<2512 x i32> %simd_val) {
; CHECK-LABEL: @no_crash(
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr @Global to ptr addrspace(4)
; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], ptr addrspace(4) [[CAST]], align 16384
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*
; CHECK-NEXT: [[GEP:%.*]] = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* [[CAST]], i64 0, i32 0
; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* [[GEP]], align 16384
; CHECK-NEXT: ret void
;
%cast = addrspacecast ptr @Global to ptr addrspace(4)
store <2512 x i32> %simd_val, ptr addrspace(4) %cast, align 16384
%cast = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%gep = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* %cast, i64 0, i32 0
store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* %gep, align 16384
ret void
}
4 changes: 2 additions & 2 deletions llvm/test/SYCLLowerIR/ESIMD/global_undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ target triple = "spir64-unknown-unknown"

define void @f(<2512 x i32> %simd_val) {
; CHECK-LABEL: @f(
; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], ptr addrspace(4) addrspacecast (ptr @Global to ptr addrspace(4)), align 16384
; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* getelementptr (%"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0), align 16384
; CHECK-NEXT: ret void
;
store <2512 x i32> %simd_val, ptr addrspace(4) addrspacecast (ptr @Global to ptr addrspace(4)), align 16384
store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* getelementptr (%"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0), align 16384
ret void
}
30 changes: 15 additions & 15 deletions llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

%"cl::sycl::INTEL::gpu::simd" = type { <64 x i32> }

define dso_local spir_func void @func(ptr %arg) {
define dso_local spir_func void @func(%"cl::sycl::INTEL::gpu::simd"* %arg) {
; CHECK-LABEL: @func(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret void
Expand All @@ -15,44 +15,44 @@ entry:
ret void
}

define dso_local spir_func void @init_ptr(ptr %foo) !sycl_explicit_simd !1 {
define dso_local spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** %foo) !sycl_explicit_simd !1 {
; CHECK-LABEL: @init_ptr(
; CHECK-NEXT: entry:
; CHECK-NEXT: store ptr @func, ptr [[FOO:%.*]], align 8
; CHECK-NEXT: store void (%"cl::sycl::INTEL::gpu::simd"*)* @func, void (%"cl::sycl::INTEL::gpu::simd"*)** [[FOO:%.*]], align 8
; CHECK-NEXT: ret void
;
entry:
store ptr @func, ptr %foo
store void (%"cl::sycl::INTEL::gpu::simd"*)* @func, void (%"cl::sycl::INTEL::gpu::simd"*)** %foo
ret void
}

define dso_local spir_func void @use_ptr(ptr %foo) !sycl_explicit_simd !1 {
define dso_local spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* %foo) !sycl_explicit_simd !1 {
; CHECK-LABEL: @use_ptr(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca %"cl::sycl::INTEL::gpu::simd", align 256
; CHECK-NEXT: call spir_func void [[FOO:%.*]](ptr [[AGG_TMP]])
; CHECK-NEXT: call spir_func void [[FOO:%.*]](%"cl::sycl::INTEL::gpu::simd"* [[AGG_TMP]])
; CHECK-NEXT: ret void
;
entry:
%agg.tmp = alloca %"cl::sycl::INTEL::gpu::simd"
call spir_func void %foo(ptr %agg.tmp)
call spir_func void %foo(%"cl::sycl::INTEL::gpu::simd"* %agg.tmp)
ret void
}

define dso_local spir_func void @esimd_kernel() !sycl_explicit_simd !1 {
; CHECK-LABEL: @esimd_kernel(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call spir_func void @init_ptr(ptr [[FP]])
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP]], align 8
; CHECK-NEXT: call spir_func void @use_ptr(ptr [[TMP0]])
; CHECK-NEXT: [[FP:%.*]] = alloca void (%"cl::sycl::INTEL::gpu::simd"*)*, align 8
; CHECK-NEXT: call spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** [[FP]])
; CHECK-NEXT: [[TMP0:%.*]] = load void (%"cl::sycl::INTEL::gpu::simd"*)*, void (%"cl::sycl::INTEL::gpu::simd"*)** [[FP]], align 8
; CHECK-NEXT: call spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
%fp = alloca ptr
call spir_func void @init_ptr(ptr %fp)
%0 = load ptr, ptr %fp
call spir_func void @use_ptr(ptr %0)
%fp = alloca void (%"cl::sycl::INTEL::gpu::simd"*)*
call spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** %fp)
%0 = load void (%"cl::sycl::INTEL::gpu::simd"*)*, void (%"cl::sycl::INTEL::gpu::simd"*)** %fp
call spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* %0)
ret void
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

$foo = comdat any

define weak_odr dso_local spir_kernel void @foo(ptr addrspace(1) noundef align 16 %_arg_out) local_unnamed_addr comdat {
define weak_odr dso_local spir_kernel void @foo(%"class.sycl::_V1::vec" addrspace(1)* noundef align 16 %_arg_out) local_unnamed_addr comdat {
entry:
ret void
}

;CHECK: !genx.kernels = !{![[GenXMD:[0-9]+]]}
!genx.kernels = !{!0}

;CHECK: ![[GenXMD]] = !{ptr @foo, {{.*}}}
!0 = !{ptr @foo, !"foo", !1, i32 0, i32 0, !1, !2, i32 0, i32 0}
;CHECK: ![[GenXMD]] = !{void (<2 x double> addrspace(1)*)* @foo, {{.*}}}
!0 = !{void (%"class.sycl::_V1::vec" addrspace(1)*)* @foo, !"foo", !1, i32 0, i32 0, !1, !2, i32 0, i32 0}
!1 = !{i32 0}
!2 = !{!"svmptr_t"}
Loading

0 comments on commit 887ab89

Please sign in to comment.