From 887ab890f7337f20a120546f7dfeebd6b1f2cbe8 Mon Sep 17 00:00:00 2001 From: "Lu, John" Date: Tue, 8 Aug 2023 08:25:20 -0700 Subject: [PATCH] Revert opaquify changes for tests that run ESIMDLowerVecArg pass. This pass requires typed pointers. Signed-off-by: Lu, John --- llvm/test/SYCLLowerIR/ESIMD/global.ll | 70 +++++++++-------- llvm/test/SYCLLowerIR/ESIMD/global_crash.ll | 10 ++- llvm/test/SYCLLowerIR/ESIMD/global_undef.ll | 4 +- .../SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll | 30 ++++---- .../ESIMD/lower_vec_arg_fp_metadata.ll | 6 +- llvm/test/SYCLLowerIR/ESIMD/subroutine.ll | 77 ++++++++++--------- .../SYCLLowerIR/ESIMD/subroutine_extern.ll | 15 ++-- 7 files changed, 114 insertions(+), 98 deletions(-) diff --git a/llvm/test/SYCLLowerIR/ESIMD/global.ll b/llvm/test/SYCLLowerIR/ESIMD/global.ll index 77d9896b3374e..274903e9325a1 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/global.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/global.ll @@ -14,7 +14,7 @@ $"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any @0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" zeroinitializer, align 64 #0 ; Function Attrs: norecurse -define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"(ptr addrspace(1) %_arg_) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 { +define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"(i32 addrspace(1)* %_arg_) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 { entry: %vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 %agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 @@ -36,57 +36,63 @@ entry: %group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64 %mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i, %wgsize.x.cast.ty.i.i.i.i.i %add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i, %local_id.x.cast.ty.i.i.i.i.i - call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %agg.tmp.i) - call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %vc.i) #5 + %0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) + %1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %1) #5 %conv.i = trunc i64 %add.i5.i.i.i.i to i32 - %0 = addrspacecast ptr %vc.i to ptr addrspace(4) + %2 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %splat.splatinsert.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0 %splat.splat.i.i = shufflevector <16 x i32> %splat.splatinsert.i.i, <16 x i32> undef, <16 x i32> zeroinitializer - store <16 x i32> %splat.splat.i.i, ptr addrspace(4) %0, align 64, !tbaa !13 + %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %2, i64 0, i32 0 + store <16 x i32> %splat.splat.i.i, <16 x i32> addrspace(4)* %M_data.i13.i, align 64, !tbaa !13 %conv3.i = trunc i64 %add.i.i.i.i.i to i32 %splat.splatinsert.i20.i = insertelement <8 x i32> undef, i32 %conv3.i, i32 0 %splat.splat.i21.i = shufflevector <8 x i32> %splat.splatinsert.i20.i, <8 x i32> undef, <8 x i32> zeroinitializer - %call.esimd.i.i.i.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0) #5 + %call.esimd.i.i.i.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5 %call4.esimd.i.i.i.i = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1(<16 x i32> %call.esimd.i.i.i.i.i2, <8 x i32> %splat.splat.i21.i, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> ) #5 - call void @llvm.genx.vstore.v16i32.p4(<16 x i32> %call4.esimd.i.i.i.i, ptr addrspace(4) %0) #5 + call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call4.esimd.i.i.i.i, <16 x i32> addrspace(4)* %M_data.i13.i) #5 %cmp.i = icmp eq i64 %add.i.i.i.i.i, 0 %..i = select i1 %cmp.i, i64 %add.i5.i.i.i.i, i64 %add.i.i.i.i.i %conv9.i = trunc i64 %..i to i32 -; CHECK: store <16 x i32> , ptr addrspace(4) addrspacecast (ptr [[NEWGLOBAL]] to ptr addrspace(4)), align 64, !tbaa.struct !16 - store <16 x i32> , ptr addrspace(4) addrspacecast (ptr @0 to ptr addrspace(4)), align 64, !tbaa.struct !16 +; CHECK: store <16 x i32> , <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16 + store <16 x i32> , <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16 %mul.i = shl nsw i32 %conv9.i, 4 %idx.ext.i = sext i32 %mul.i to i64 - %add.ptr.i16 = getelementptr inbounds i32, ptr addrspace(1) %_arg_, i64 %idx.ext.i - %add.ptr.i = addrspacecast ptr addrspace(1) %add.ptr.i16 to ptr addrspace(4) - %1 = addrspacecast ptr %agg.tmp.i to ptr addrspace(4) - %call.esimd.i.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0) #5 - call void @llvm.genx.vstore.v16i32.p4(<16 x i32> %call.esimd.i.i.i, ptr addrspace(4) %1) #5 - call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(ptr addrspace(4) %add.ptr.i, ptr nonnull %agg.tmp.i) #5 - store <16 x i32> , ptr addrspace(4) addrspacecast (ptr @0 to ptr addrspace(4)), align 64, !tbaa.struct !16 - call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %vc.i) #5 - call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %agg.tmp.i) + %add.ptr.i16 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %idx.ext.i + %add.ptr.i = addrspacecast i32 addrspace(1)* %add.ptr.i16 to i32 addrspace(4)* + %3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* + %call.esimd.i.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5 + %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %3, i64 0, i32 0 + call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call.esimd.i.i.i, <16 x i32> addrspace(4)* %M_data.i2.i.i) #5 + call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %add.ptr.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull %agg.tmp.i) #5 + store <16 x i32> , <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16 + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %1) #5 + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) ret void } ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.start.p0(i64 immarg %agg.tmp.i, ptr nocapture %vc.i) #2 +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2 ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.end.p0(i64 immarg %agg.tmp.i, ptr nocapture %vc.i) #2 +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #2 ; Function Attrs: noinline norecurse nounwind -define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(ptr addrspace(4) %C, ptr %v) local_unnamed_addr #3 { +define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v) local_unnamed_addr #3 { entry: %agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %0 = addrspacecast ptr %v to ptr addrspace(4) - %1 = addrspacecast ptr %agg.tmp to ptr addrspace(4) - %call.esimd.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0), !noalias !17 -; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) addrspacecast (ptr [[NEWGLOBAL]] to ptr addrspace(4))), !noalias !17 - %call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) addrspacecast (ptr @0 to ptr addrspace(4))), !noalias !17 + %0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* + %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* + %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %0, i64 0, i32 0 + %call.esimd.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i), !noalias !17 +; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17 + %call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0)), !noalias !17 %add.i = add <16 x i32> %call.esimd.i8.i, %call.esimd.i.i - call void @llvm.genx.vstore.v16i32.p4(<16 x i32> %add.i, ptr addrspace(4) %1) - %2 = ptrtoint ptr addrspace(4) %C to i64 - %call.esimd.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %1) + %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %1, i64 0, i32 0 + call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %add.i, <16 x i32> addrspace(4)* %M_data.i.i.i) + %2 = ptrtoint i32 addrspace(4)* %C to i64 + %call.esimd.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i.i) call void @llvm.genx.svm.block.st.v16i32(i64 %2, <16 x i32> %call.esimd.i.i2) ret void } @@ -95,10 +101,10 @@ entry: declare !genx_intrinsic_id !20 <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1(<16 x i32> %0, <8 x i32> %1, i32 %2, i32 %3, i32 %4, i16 %5, i32 %6, <8 x i1> %7) #4 ; Function Attrs: nounwind -declare !genx_intrinsic_id !21 <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) %0) #5 +declare !genx_intrinsic_id !21 <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %0) #5 ; Function Attrs: nounwind -declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4(<16 x i32> %0, ptr addrspace(4) %1) #5 +declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %0, <16 x i32> addrspace(4)* %1) #5 ; Function Attrs: nounwind declare !genx_intrinsic_id !23 void @llvm.genx.svm.block.st.v16i32(i64 %0, <16 x i32> %1) #5 @@ -134,7 +140,7 @@ attributes #5 = { nounwind } !2 = !{i32 1, i32 2} !3 = !{i32 6, i32 100000} !4 = !{!"clang version 11.0.0"} -!5 = !{ptr @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !6, i32 0, i32 0, !6, !7, i32 0, i32 0} +!5 = !{void (i32 addrspace(1)*)* @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !6, i32 0, i32 0, !6, !7, i32 0, i32 0} !6 = !{i32 0} !7 = !{!"svmptr_t"} !8 = !{i32 1} diff --git a/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll b/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll index 6483eb03e593a..cabf30983661e 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll @@ -14,11 +14,13 @@ target triple = "spir64-unknown-unknown" define void @no_crash(<2512 x i32> %simd_val) { ; CHECK-LABEL: @no_crash( -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr @Global to ptr addrspace(4) -; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], ptr addrspace(4) [[CAST]], align 16384 +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* [[CAST]], i64 0, i32 0 +; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* [[GEP]], align 16384 ; CHECK-NEXT: ret void ; - %cast = addrspacecast ptr @Global to ptr addrspace(4) - store <2512 x i32> %simd_val, ptr addrspace(4) %cast, align 16384 + %cast = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* + %gep = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* %cast, i64 0, i32 0 + store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* %gep, align 16384 ret void } diff --git a/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll b/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll index a79203e634237..4924dd715d8f1 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll @@ -14,9 +14,9 @@ target triple = "spir64-unknown-unknown" define void @f(<2512 x i32> %simd_val) { ; CHECK-LABEL: @f( -; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], ptr addrspace(4) addrspacecast (ptr @Global to ptr addrspace(4)), align 16384 +; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* getelementptr (%"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0), align 16384 ; CHECK-NEXT: ret void ; - store <2512 x i32> %simd_val, ptr addrspace(4) addrspacecast (ptr @Global to ptr addrspace(4)), align 16384 + store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* getelementptr (%"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0), align 16384 ret void } diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll index 0ddaf2f93a323..fcb6fe869d5fa 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll @@ -6,7 +6,7 @@ %"cl::sycl::INTEL::gpu::simd" = type { <64 x i32> } -define dso_local spir_func void @func(ptr %arg) { +define dso_local spir_func void @func(%"cl::sycl::INTEL::gpu::simd"* %arg) { ; CHECK-LABEL: @func( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void @@ -15,44 +15,44 @@ entry: ret void } -define dso_local spir_func void @init_ptr(ptr %foo) !sycl_explicit_simd !1 { +define dso_local spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** %foo) !sycl_explicit_simd !1 { ; CHECK-LABEL: @init_ptr( ; CHECK-NEXT: entry: -; CHECK-NEXT: store ptr @func, ptr [[FOO:%.*]], align 8 +; CHECK-NEXT: store void (%"cl::sycl::INTEL::gpu::simd"*)* @func, void (%"cl::sycl::INTEL::gpu::simd"*)** [[FOO:%.*]], align 8 ; CHECK-NEXT: ret void ; entry: - store ptr @func, ptr %foo + store void (%"cl::sycl::INTEL::gpu::simd"*)* @func, void (%"cl::sycl::INTEL::gpu::simd"*)** %foo ret void } -define dso_local spir_func void @use_ptr(ptr %foo) !sycl_explicit_simd !1 { +define dso_local spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* %foo) !sycl_explicit_simd !1 { ; CHECK-LABEL: @use_ptr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca %"cl::sycl::INTEL::gpu::simd", align 256 -; CHECK-NEXT: call spir_func void [[FOO:%.*]](ptr [[AGG_TMP]]) +; CHECK-NEXT: call spir_func void [[FOO:%.*]](%"cl::sycl::INTEL::gpu::simd"* [[AGG_TMP]]) ; CHECK-NEXT: ret void ; entry: %agg.tmp = alloca %"cl::sycl::INTEL::gpu::simd" - call spir_func void %foo(ptr %agg.tmp) + call spir_func void %foo(%"cl::sycl::INTEL::gpu::simd"* %agg.tmp) ret void } define dso_local spir_func void @esimd_kernel() !sycl_explicit_simd !1 { ; CHECK-LABEL: @esimd_kernel( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: call spir_func void @init_ptr(ptr [[FP]]) -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP]], align 8 -; CHECK-NEXT: call spir_func void @use_ptr(ptr [[TMP0]]) +; CHECK-NEXT: [[FP:%.*]] = alloca void (%"cl::sycl::INTEL::gpu::simd"*)*, align 8 +; CHECK-NEXT: call spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** [[FP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load void (%"cl::sycl::INTEL::gpu::simd"*)*, void (%"cl::sycl::INTEL::gpu::simd"*)** [[FP]], align 8 +; CHECK-NEXT: call spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* [[TMP0]]) ; CHECK-NEXT: ret void ; entry: - %fp = alloca ptr - call spir_func void @init_ptr(ptr %fp) - %0 = load ptr, ptr %fp - call spir_func void @use_ptr(ptr %0) + %fp = alloca void (%"cl::sycl::INTEL::gpu::simd"*)* + call spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** %fp) + %0 = load void (%"cl::sycl::INTEL::gpu::simd"*)*, void (%"cl::sycl::INTEL::gpu::simd"*)** %fp + call spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* %0) ret void } diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll index d15cdeaaf66a3..d67f05736c196 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll @@ -6,7 +6,7 @@ $foo = comdat any -define weak_odr dso_local spir_kernel void @foo(ptr addrspace(1) noundef align 16 %_arg_out) local_unnamed_addr comdat { +define weak_odr dso_local spir_kernel void @foo(%"class.sycl::_V1::vec" addrspace(1)* noundef align 16 %_arg_out) local_unnamed_addr comdat { entry: ret void } @@ -14,7 +14,7 @@ entry: ;CHECK: !genx.kernels = !{![[GenXMD:[0-9]+]]} !genx.kernels = !{!0} -;CHECK: ![[GenXMD]] = !{ptr @foo, {{.*}}} -!0 = !{ptr @foo, !"foo", !1, i32 0, i32 0, !1, !2, i32 0, i32 0} +;CHECK: ![[GenXMD]] = !{void (<2 x double> addrspace(1)*)* @foo, {{.*}}} +!0 = !{void (%"class.sycl::_V1::vec" addrspace(1)*)* @foo, !"foo", !1, i32 0, i32 0, !1, !2, i32 0, i32 0} !1 = !{i32 0} !2 = !{!"svmptr_t"} diff --git a/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll b/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll index 2e36751828a71..100ea3a3b2f8b 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll @@ -20,61 +20,68 @@ entry: ; CHECK-NEXT: [[A:%[a-zA-Z0-9_]*]] = alloca {{.+}} %a = alloca %class._ZTS4simdIiLi16EE.simd, align 64 %agg.tmp = alloca %class._ZTS4simdIiLi16EE.simd, align 64 - store i32 %x, ptr %x.addr, align 4, !tbaa !4 - call void @llvm.lifetime.start.p0(i64 64, ptr %a) #2 + store i32 %x, i32* %x.addr, align 4, !tbaa !4 + %0 = bitcast %class._ZTS4simdIiLi16EE.simd* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* %0) #2 ; CHECK: [[ADDRSPCAST1:%[a-zA-Z0-9_]*]] = addrspacecast {{.+}} [[A]] to {{.+}} - %0 = addrspacecast ptr %agg.tmp to ptr addrspace(4) - %1 = addrspacecast ptr %a to ptr addrspace(4) -; CHECK: call spir_func void @_ZN4simdIiLi16EEC1ERS0_(ptr addrspace(4) {{.+}}, ptr addrspace(4) [[ADDRSPCAST1]]) - call spir_func void @_ZN4simdIiLi16EEC1ERS0_(ptr addrspace(4) %0, ptr addrspace(4) align 64 dereferenceable(64) %1) -; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} to ptr -; CHECK-NEXT: {{.+}} = call spir_func i32 {{.+}}bar{{.+}}(ptr [[BITCASTRESULT2]]) - %call = call spir_func i32 @_Z3bar4simdIiLi16EE(ptr %agg.tmp) - call void @llvm.lifetime.end.p0(i64 64, ptr %a) #2 + %1 = addrspacecast %class._ZTS4simdIiLi16EE.simd* %agg.tmp to %class._ZTS4simdIiLi16EE.simd addrspace(4)* + %2 = addrspacecast %class._ZTS4simdIiLi16EE.simd* %a to %class._ZTS4simdIiLi16EE.simd addrspace(4)* +; CHECK: [[BITCASTRESULT1:%[a-zA-Z0-9_]*]] = bitcast {{.+}} addrspace(4)* [[ADDRSPCAST1]] to <16 x i32> addrspace(4)* +; CHECK-NEXT: call spir_func void @_ZN4simdIiLi16EEC1ERS0_(<16 x i32> addrspace(4)* {{.+}}, <16 x i32> addrspace(4)* [[BITCASTRESULT1]]) + call spir_func void @_ZN4simdIiLi16EEC1ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %1, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %2) +; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} to <16 x i32>* +; CHECK-NEXT: {{.+}} = call spir_func i32 {{.+}}bar{{.+}}(<16 x i32>* [[BITCASTRESULT2]]) + %call = call spir_func i32 @_Z3bar4simdIiLi16EE(%class._ZTS4simdIiLi16EE.simd* %agg.tmp) + %3 = bitcast %class._ZTS4simdIiLi16EE.simd* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 64, i8* %3) #2 ret void } ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 ; Function Attrs: norecurse nounwind -; CHECK: define spir_func i32 @_Z3bar4simdIiLi16EE(ptr {{.+}} -define spir_func i32 @_Z3bar4simdIiLi16EE(ptr %v) #0 { +; CHECK: define spir_func i32 @_Z3bar4simdIiLi16EE(<16 x i32>* {{.+}} +define spir_func i32 @_Z3bar4simdIiLi16EE(%class._ZTS4simdIiLi16EE.simd* %v) #0 { entry: -; CHECK: {{.+}} = bitcast ptr {{.+}} +; CHECK: {{.+}} = bitcast <16 x i32>* {{.+}} ret i32 1 } ; Function Attrs: norecurse nounwind -; CHECK: define linkonce_odr spir_func void @_ZN4simdIiLi16EEC1ERS0_(ptr addrspace(4) [[OLDARG0:%[a-zA-Z0-9_]*]], ptr addrspace(4){{.*}} [[OLDARG1:%[a-zA-Z0-9_]*]]) unnamed_addr {{.+}} -define linkonce_odr spir_func void @_ZN4simdIiLi16EEC1ERS0_(ptr addrspace(4) %this, ptr addrspace(4) align 64 dereferenceable(64) %other) unnamed_addr #0 comdat align 2 { +; CHECK: define linkonce_odr spir_func void @_ZN4simdIiLi16EEC1ERS0_(<16 x i32> addrspace(4)* [[OLDARG0:%[a-zA-Z0-9_]*]], <16 x i32> addrspace(4)*{{.*}} [[OLDARG1:%[a-zA-Z0-9_]*]]) unnamed_addr {{.+}} +define linkonce_odr spir_func void @_ZN4simdIiLi16EEC1ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %other) unnamed_addr #0 comdat align 2 { entry: - %this.addr = alloca ptr addrspace(4), align 8 - %other.addr = alloca ptr addrspace(4), align 8 -; CHECK: store ptr addrspace(4) [[OLDARG0]], {{.+}} - store ptr addrspace(4) %this, ptr %this.addr, align 8, !tbaa !8 -; CHECK-NEXT: store ptr addrspace(4) [[OLDARG1]], {{.+}} - store ptr addrspace(4) %other, ptr %other.addr, align 8, !tbaa !8 - %this1 = load ptr addrspace(4), ptr %this.addr, align 8 - %0 = load ptr addrspace(4), ptr %other.addr, align 8 - call spir_func void @_ZN4simdIiLi16EEC2ERS0_(ptr addrspace(4) %this1, ptr addrspace(4) align 64 dereferenceable(64) %0) +; CHECK: [[NEWARG1:%[a-zA-Z0-9_]*]] = bitcast <16 x i32> addrspace(4)* [[OLDARG1]] to {{.+}} +; CHECK-NEXT: [[NEWARG0:%[a-zA-Z0-9_]*]] = bitcast <16 x i32> addrspace(4)* [[OLDARG0]] to {{.+}} + %this.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 + %other.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 +; CHECK: store {{.+}} addrspace(4)* [[NEWARG0]], {{.+}} + store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8, !tbaa !8 +; CHECK-NEXT: store {{.+}} addrspace(4)* [[NEWARG1]], {{.+}} + store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %other, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8, !tbaa !8 + %this1 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8 + %0 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8 + call spir_func void @_ZN4simdIiLi16EEC2ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %this1, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %0) ret void } ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 ; Function Attrs: norecurse nounwind -define linkonce_odr spir_func void @_ZN4simdIiLi16EEC2ERS0_(ptr addrspace(4) %this, ptr addrspace(4) align 64 dereferenceable(64) %other) unnamed_addr #0 comdat align 2 { +define linkonce_odr spir_func void @_ZN4simdIiLi16EEC2ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %other) unnamed_addr #0 comdat align 2 { entry: - %this.addr = alloca ptr addrspace(4), align 8 - %other.addr = alloca ptr addrspace(4), align 8 - store ptr addrspace(4) %this, ptr %this.addr, align 8, !tbaa !8 - store ptr addrspace(4) %other, ptr %other.addr, align 8, !tbaa !8 - %this1 = load ptr addrspace(4), ptr %this.addr, align 8 - %0 = load ptr addrspace(4), ptr %other.addr, align 8, !tbaa !8 - %1 = load <16 x i32>, ptr addrspace(4) %0, align 64, !tbaa !10 - store <16 x i32> %1, ptr addrspace(4) %this1, align 64, !tbaa !10 + %this.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 + %other.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 + store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8, !tbaa !8 + store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %other, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8, !tbaa !8 + %this1 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8 + %0 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8, !tbaa !8 + %__M_data = getelementptr inbounds %class._ZTS4simdIiLi16EE.simd, %class._ZTS4simdIiLi16EE.simd addrspace(4)* %0, i32 0, i32 0 + %1 = load <16 x i32>, <16 x i32> addrspace(4)* %__M_data, align 64, !tbaa !10 + %__M_data2 = getelementptr inbounds %class._ZTS4simdIiLi16EE.simd, %class._ZTS4simdIiLi16EE.simd addrspace(4)* %this1, i32 0, i32 0 + store <16 x i32> %1, <16 x i32> addrspace(4)* %__M_data2, align 64, !tbaa !10 ret void } diff --git a/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll b/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll index 87a65ac4a968d..376be5384782a 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll @@ -18,19 +18,20 @@ entry: %0 = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 %agg.tmp5.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 %agg.tmp6.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %1 = addrspacecast ptr %0 to ptr addrspace(4) + %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* %0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* -; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp5.i to ptr -; CHECK: [[BITCASTRESULT3:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp6.i to ptr -; CHECK-NEXT: call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(ptr addrspace(4) %1, ptr [[BITCASTRESULT2]], ptr [[BITCASTRESULT3]]) +; CHECK: [[BITCASTRESULT1:%[a-zA-Z0-9_]*]] = bitcast {{.+}} addrspace(4)* %1 to <16 x float> addrspace(4)* +; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp5.i to <16 x float>* +; CHECK: [[BITCASTRESULT3:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp6.i to <16 x float>* +; CHECK-NEXT: call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(<16 x float> addrspace(4)* [[BITCASTRESULT1]], <16 x float>* [[BITCASTRESULT2]], <16 x float>* [[BITCASTRESULT3]]) - call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(ptr addrspace(4) sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, ptr nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp5.i, ptr nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp6.i) #1 + call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp5.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp6.i) #1 ret void } -; CHECK: declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(ptr addrspace(4), ptr, ptr){{.+}} +; CHECK: declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(<16 x float> addrspace(4)*, <16 x float>*, <16 x float>*){{.+}} ; Function Attrs: convergent -declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(ptr addrspace(4) sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %0, ptr byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, ptr byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %2) local_unnamed_addr #2 +declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %0, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %2) local_unnamed_addr #2 attributes #0 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent }