Skip to content

Commit

Permalink
AMDGPU: Make various vector undefs legal
Browse files Browse the repository at this point in the history
Surprisingly these were getting legalized to something
zero initialized.

This fixes an infinite loop when combining some vector types.
Also fixes zero initializing some undef values.

SimplifyDemandedVectorElts / SimplifyDemandedBits are not checking
for the legality of the output undefs they are replacing unused
operations with. This resulted in turning vectors into undefs
that were later re-legalized back into zero vectors.

(cherry picked from commit 7a84624)
  • Loading branch information
arsenm authored and tstellar committed Nov 2, 2022
1 parent 80a9fc8 commit 5c68a1c
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 458 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
case ISD::STORE:
case ISD::BUILD_VECTOR:
case ISD::BITCAST:
case ISD::UNDEF:
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR:
Expand Down Expand Up @@ -516,6 +517,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
case ISD::STORE:
case ISD::BUILD_VECTOR:
case ISD::BITCAST:
case ISD::UNDEF:
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
case ISD::INSERT_SUBVECTOR:
Expand Down
16 changes: 0 additions & 16 deletions llvm/test/CodeGen/AMDGPU/commute-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
; SI-LABEL: main:
; SI: ; %bb.0: ; %bb
; SI-NEXT: s_mov_b32 s0, 0
; SI-NEXT: s_mov_b32 s1, s0
; SI-NEXT: s_mov_b32 s2, s0
; SI-NEXT: s_mov_b32 s3, s0
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s0
; SI-NEXT: s_mov_b32 s6, s0
; SI-NEXT: s_mov_b32 s7, s0
; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
; SI-NEXT: v_and_b32_e32 v0, 7, v0
Expand All @@ -26,14 +18,6 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
;
; VI-LABEL: main:
; VI: ; %bb.0: ; %bb
; VI-NEXT: s_mov_b32 s0, 0
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: s_mov_b32 s2, s0
; VI-NEXT: s_mov_b32 s3, s0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s0
; VI-NEXT: s_mov_b32 s6, s0
; VI-NEXT: s_mov_b32 s7, s0
; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; VI-NEXT: v_cvt_i32_f32_e32 v0, v0
; VI-NEXT: v_and_b32_e32 v0, 7, v0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ if.else: ; preds = %entry
br label %if.end

if.end: ; preds = %if.else, %if.then
%call6.sink = phi <3 x i16> [ %call6, %if.else ], [ undef, %if.then ]
%call6.sink = phi <3 x i16> [ %call6, %if.else ], [ zeroinitializer, %if.then ]
store <3 x i16> %call6.sink, <3 x i16> addrspace(1)* undef
ret void
}
Expand Down Expand Up @@ -266,7 +266,7 @@ if.else: ; preds = %entry
br label %if.end

if.end: ; preds = %if.else, %if.then
%call6.sink = phi <3 x half> [ %call6, %if.else ], [ undef, %if.then ]
%call6.sink = phi <3 x half> [ %call6, %if.else ], [ zeroinitializer, %if.then ]
store <3 x half> %call6.sink, <3 x half> addrspace(1)* undef
ret void
}
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,8 @@
define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GCN-LABEL: _amdgpu_ps_main:
; GCN: ; %bb.0: ; %.entry
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: s_mov_b32 s1, s0
; GCN-NEXT: s_mov_b32 s2, s0
; GCN-NEXT: s_mov_b32 s3, s0
; GCN-NEXT: s_mov_b32 s4, s0
; GCN-NEXT: s_mov_b32 s5, s0
; GCN-NEXT: s_mov_b32 s6, s0
; GCN-NEXT: s_mov_b32 s7, s0
; GCN-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_clause 0x1
; GCN-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
Expand Down
78 changes: 6 additions & 72 deletions llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,7 @@ define <4 x i16> @vec_8xi16_extract_4xi16(<8 x i16> addrspace(1) * %p0, <8 x i16
; GFX9-NEXT: s_cbranch_execz .LBB0_3
; GFX9-NEXT: s_branch .LBB0_4
; GFX9-NEXT: .LBB0_2:
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, s8
; GFX9-NEXT: s_mov_b32 s10, s8
; GFX9-NEXT: s_mov_b32 s11, s8
; GFX9-NEXT: v_mov_b32_e32 v2, s8
; GFX9-NEXT: v_mov_b32_e32 v3, s9
; GFX9-NEXT: v_mov_b32_e32 v4, s10
; GFX9-NEXT: v_mov_b32_e32 v5, s11
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9-NEXT: .LBB0_3: ; %T
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -244,14 +237,7 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(<8 x i16> addrspace(1) * %p0, <8 x i
; GFX9-NEXT: s_cbranch_execz .LBB1_3
; GFX9-NEXT: s_branch .LBB1_4
; GFX9-NEXT: .LBB1_2:
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, s8
; GFX9-NEXT: s_mov_b32 s10, s8
; GFX9-NEXT: s_mov_b32 s11, s8
; GFX9-NEXT: v_mov_b32_e32 v2, s8
; GFX9-NEXT: v_mov_b32_e32 v3, s9
; GFX9-NEXT: v_mov_b32_e32 v4, s10
; GFX9-NEXT: v_mov_b32_e32 v5, s11
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9-NEXT: .LBB1_3: ; %T
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -386,14 +372,7 @@ define <4 x half> @vec_8xf16_extract_4xf16(<8 x half> addrspace(1) * %p0, <8 x h
; GFX9-NEXT: s_cbranch_execz .LBB2_3
; GFX9-NEXT: s_branch .LBB2_4
; GFX9-NEXT: .LBB2_2:
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, s8
; GFX9-NEXT: s_mov_b32 s10, s8
; GFX9-NEXT: s_mov_b32 s11, s8
; GFX9-NEXT: v_mov_b32_e32 v2, s8
; GFX9-NEXT: v_mov_b32_e32 v3, s9
; GFX9-NEXT: v_mov_b32_e32 v4, s10
; GFX9-NEXT: v_mov_b32_e32 v5, s11
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9-NEXT: .LBB2_3: ; %T
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -567,22 +546,7 @@ define <4 x i16> @vec_16xi16_extract_4xi16(<16 x i16> addrspace(1) * %p0, <16 x
; GFX9-NEXT: s_cbranch_execz .LBB3_3
; GFX9-NEXT: s_branch .LBB3_4
; GFX9-NEXT: .LBB3_2:
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, s8
; GFX9-NEXT: s_mov_b32 s10, s8
; GFX9-NEXT: s_mov_b32 s11, s8
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_mov_b32 s13, s8
; GFX9-NEXT: s_mov_b32 s14, s8
; GFX9-NEXT: s_mov_b32 s15, s8
; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v5, s9
; GFX9-NEXT: v_mov_b32_e32 v6, s10
; GFX9-NEXT: v_mov_b32_e32 v7, s11
; GFX9-NEXT: v_mov_b32_e32 v8, s12
; GFX9-NEXT: v_mov_b32_e32 v9, s13
; GFX9-NEXT: v_mov_b32_e32 v10, s14
; GFX9-NEXT: v_mov_b32_e32 v11, s15
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GFX9-NEXT: .LBB3_3: ; %T
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
Expand Down Expand Up @@ -759,22 +723,7 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(<16 x i16> addrspace(1) * %p0, <16
; GFX9-NEXT: s_cbranch_execz .LBB4_3
; GFX9-NEXT: s_branch .LBB4_4
; GFX9-NEXT: .LBB4_2:
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, s8
; GFX9-NEXT: s_mov_b32 s10, s8
; GFX9-NEXT: s_mov_b32 s11, s8
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_mov_b32 s13, s8
; GFX9-NEXT: s_mov_b32 s14, s8
; GFX9-NEXT: s_mov_b32 s15, s8
; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v5, s9
; GFX9-NEXT: v_mov_b32_e32 v6, s10
; GFX9-NEXT: v_mov_b32_e32 v7, s11
; GFX9-NEXT: v_mov_b32_e32 v8, s12
; GFX9-NEXT: v_mov_b32_e32 v9, s13
; GFX9-NEXT: v_mov_b32_e32 v10, s14
; GFX9-NEXT: v_mov_b32_e32 v11, s15
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GFX9-NEXT: .LBB4_3: ; %T
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
Expand Down Expand Up @@ -949,22 +898,7 @@ define <4 x half> @vec_16xf16_extract_4xf16(<16 x half> addrspace(1) * %p0, <16
; GFX9-NEXT: s_cbranch_execz .LBB5_3
; GFX9-NEXT: s_branch .LBB5_4
; GFX9-NEXT: .LBB5_2:
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, s8
; GFX9-NEXT: s_mov_b32 s10, s8
; GFX9-NEXT: s_mov_b32 s11, s8
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_mov_b32 s13, s8
; GFX9-NEXT: s_mov_b32 s14, s8
; GFX9-NEXT: s_mov_b32 s15, s8
; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v5, s9
; GFX9-NEXT: v_mov_b32_e32 v6, s10
; GFX9-NEXT: v_mov_b32_e32 v7, s11
; GFX9-NEXT: v_mov_b32_e32 v8, s12
; GFX9-NEXT: v_mov_b32_e32 v9, s13
; GFX9-NEXT: v_mov_b32_e32 v10, s14
; GFX9-NEXT: v_mov_b32_e32 v11, s15
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GFX9-NEXT: .LBB5_3: ; %T
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
Expand Down
14 changes: 3 additions & 11 deletions llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -374,18 +374,10 @@ define <4 x float> @insertelement_to_sgpr() nounwind {
; GCN-LABEL: insertelement_to_sgpr:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0
; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s12, 0
; GCN-NEXT: s_mov_b32 s4, s12
; GCN-NEXT: s_mov_b32 s5, s12
; GCN-NEXT: s_mov_b32 s6, s12
; GCN-NEXT: s_mov_b32 s7, s12
; GCN-NEXT: s_mov_b32 s8, s12
; GCN-NEXT: s_mov_b32 s9, s12
; GCN-NEXT: s_mov_b32 s10, s12
; GCN-NEXT: s_mov_b32 s11, s12
; GCN-NEXT: image_gather4_lz v[0:3], v[0:1], s[4:11], s[12:15] dmask:0x1
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: image_gather4_lz v[0:3], v[0:1], s[4:11], s[4:7] dmask:0x1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%tmp = load <4 x i32>, <4 x i32> addrspace(4)* undef
Expand Down
Loading

0 comments on commit 5c68a1c

Please sign in to comment.