Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SDAG] Don't treat ISD::SHL as a uniform binary operator in ShrinkDemandedOp #92753

Merged
merged 2 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
if (VT.isVector())
return false;

assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
"ShrinkDemandedOp only supports operands that have the same size!");

// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
Expand Down Expand Up @@ -1832,11 +1836,33 @@ bool TargetLowering::SimplifyDemandedBits(
}
}

// TODO: Can we merge this fold with the one below?
// Try shrinking the operation as long as the shift amount will still be
// in range.
dtcxzyw marked this conversation as resolved.
Show resolved Hide resolved
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
Op.getNode()->hasOneUse()) {
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
unsigned DemandedSize = DemandedBits.getActiveBits();
for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
if (isNarrowingProfitable(VT, SmallVT) &&
isTypeDesirableForOp(ISD::SHL, SmallVT) &&
isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using isZExtFree for a context where you use ANY_EXTEND is kind of wrong, but we don't have anything better right ow

(!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
assert(DemandedSize <= SmallVTBits &&
"Narrowed below demanded bits?");
// We found a type with free casts.
SDValue NarrowShl = TLO.DAG.getNode(
ISD::SHL, dl, SmallVT,
TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
}
}

// Narrow shift to lower half - similar to ShrinkDemandedOp.
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
Expand Down
11 changes: 4 additions & 7 deletions llvm/test/CodeGen/AArch64/bitfield-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,10 @@ define void @test_64bit_badmask(ptr %existing, ptr %new) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov w10, #135 // =0x87
; CHECK-NEXT: mov w11, #664 // =0x298
; CHECK-NEXT: lsl w9, w9, #3
; CHECK-NEXT: and x8, x8, x10
; CHECK-NEXT: and x9, x9, x11
; CHECK-NEXT: mov w10, #664 // =0x298
; CHECK-NEXT: mov w11, #135 // =0x87
; CHECK-NEXT: and x9, x10, x9, lsl #3
; CHECK-NEXT: and x8, x8, x11
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: ret
Expand Down Expand Up @@ -579,7 +578,6 @@ define <2 x i32> @test_complex_type(ptr %addr, i64 %in, ptr %bf ) {
define i64 @test_truncated_shift(i64 %x, i64 %y) {
; CHECK-LABEL: test_truncated_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1
; CHECK-NEXT: bfi x0, x1, #25, #5
; CHECK-NEXT: ret
entry:
Expand All @@ -593,7 +591,6 @@ entry:
define i64 @test_and_extended_shift_with_imm(i64 %0) {
; CHECK-LABEL: test_and_extended_shift_with_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0
; CHECK-NEXT: ubfiz x0, x0, #7, #8
; CHECK-NEXT: ret
%2 = shl i64 %0, 7
Expand Down
118 changes: 57 additions & 61 deletions llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -571,29 +571,27 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: LBB5_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp x10, x9, [x0]
; CHECK-NEXT: ldrb w13, [x0, #18]
; CHECK-NEXT: ldrh w14, [x0, #16]
; CHECK-NEXT: ldp x9, x10, [x0]
; CHECK-NEXT: ldrb w14, [x0, #18]
; CHECK-NEXT: ldrh w15, [x0, #16]
; CHECK-NEXT: add x0, x0, #32
; CHECK-NEXT: ubfx x12, x9, #12, #20
; CHECK-NEXT: fmov s0, w10
; CHECK-NEXT: lsr x11, x10, #19
; CHECK-NEXT: lsr x15, x9, #31
; CHECK-NEXT: fmov s1, w12
; CHECK-NEXT: lsr x12, x9, #50
; CHECK-NEXT: mov.s v0[1], w11
; CHECK-NEXT: orr w11, w14, w13, lsl #16
; CHECK-NEXT: lsr x13, x10, #38
; CHECK-NEXT: lsr x10, x10, #57
; CHECK-NEXT: mov.s v1[1], w15
; CHECK-NEXT: orr w12, w12, w11, lsl #14
; CHECK-NEXT: orr w9, w10, w9, lsl #7
; CHECK-NEXT: lsr w10, w11, #5
; CHECK-NEXT: mov.s v0[2], w13
; CHECK-NEXT: ubfx x12, x10, #12, #20
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: lsr x11, x9, #19
; CHECK-NEXT: lsr x13, x10, #31
; CHECK-NEXT: fmov s0, w12
; CHECK-NEXT: lsr x12, x9, #38
; CHECK-NEXT: extr x9, x10, x9, #57
; CHECK-NEXT: mov.s v1[1], w11
; CHECK-NEXT: orr x11, x15, x14, lsl #16
; CHECK-NEXT: mov.s v0[1], w13
; CHECK-NEXT: extr x13, x11, x10, #50
; CHECK-NEXT: ubfx x10, x11, #5, #27
; CHECK-NEXT: mov.s v1[2], w12
; CHECK-NEXT: mov.s v0[3], w9
; CHECK-NEXT: mov.s v1[3], w10
; CHECK-NEXT: uzp1.8h v0, v0, v1
; CHECK-NEXT: mov.s v0[2], w13
; CHECK-NEXT: mov.s v1[3], w9
; CHECK-NEXT: mov.s v0[3], w10
; CHECK-NEXT: uzp1.8h v0, v1, v0
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: str d0, [x1, x8, lsl #3]
; CHECK-NEXT: add x8, x8, #1
Expand All @@ -608,35 +606,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: .LBB5_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldp x10, x9, [x0]
; CHECK-BE-NEXT: ldrb w16, [x0, #18]
; CHECK-BE-NEXT: lsr x11, x9, #40
; CHECK-BE-NEXT: ubfx x12, x9, #33, #7
; CHECK-BE-NEXT: lsr x15, x10, #45
; CHECK-BE-NEXT: lsr x13, x10, #40
; CHECK-BE-NEXT: ubfx x14, x10, #26, #14
; CHECK-BE-NEXT: orr w11, w12, w11, lsl #7
; CHECK-BE-NEXT: ldrh w12, [x0, #16]
; CHECK-BE-NEXT: fmov s0, w15
; CHECK-BE-NEXT: orr w13, w14, w13, lsl #14
; CHECK-BE-NEXT: ubfx x14, x9, #14, #18
; CHECK-BE-NEXT: ldrh w16, [x0, #16]
; CHECK-BE-NEXT: ldrb w17, [x0, #18]
; CHECK-BE-NEXT: add x0, x0, #32
; CHECK-BE-NEXT: fmov s1, w11
; CHECK-BE-NEXT: orr w11, w16, w12, lsl #8
; CHECK-BE-NEXT: lsl x12, x9, #24
; CHECK-BE-NEXT: mov v0.s[1], w13
; CHECK-BE-NEXT: lsl x11, x9, #24
; CHECK-BE-NEXT: lsr x12, x9, #40
; CHECK-BE-NEXT: lsr x13, x10, #45
; CHECK-BE-NEXT: lsl x14, x10, #24
; CHECK-BE-NEXT: lsr x15, x10, #40
; CHECK-BE-NEXT: extr x12, x12, x11, #57
; CHECK-BE-NEXT: fmov s0, w13
; CHECK-BE-NEXT: ubfx x13, x10, #7, #25
; CHECK-BE-NEXT: extr x14, x15, x14, #50
; CHECK-BE-NEXT: ubfx x15, x9, #14, #18
; CHECK-BE-NEXT: extr x9, x10, x9, #40
; CHECK-BE-NEXT: orr w12, w11, w12
; CHECK-BE-NEXT: mov v1.s[1], w14
; CHECK-BE-NEXT: lsr w12, w12, #19
; CHECK-BE-NEXT: fmov s1, w12
; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8
; CHECK-BE-NEXT: mov v0.s[1], w14
; CHECK-BE-NEXT: ubfx x9, x9, #12, #20
; CHECK-BE-NEXT: orr w11, w12, w11
; CHECK-BE-NEXT: mov v1.s[1], w15
; CHECK-BE-NEXT: lsr w11, w11, #19
; CHECK-BE-NEXT: mov v0.s[2], w13
; CHECK-BE-NEXT: mov v1.s[2], w12
; CHECK-BE-NEXT: mov v1.s[2], w11
; CHECK-BE-NEXT: mov v0.s[3], w9
; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
; CHECK-BE-NEXT: add x8, x8, #1
; CHECK-BE-NEXT: cmp x8, #1000
; CHECK-BE-NEXT: mov v1.s[3], w11
; CHECK-BE-NEXT: mov v1.s[3], w12
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-BE-NEXT: xtn v0.8b, v0.8h
; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
Expand All @@ -650,35 +647,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18]
; CHECK-DISABLE-NEXT: lsr x11, x9, #40
; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7
; CHECK-DISABLE-NEXT: lsr x15, x10, #45
; CHECK-DISABLE-NEXT: lsr x13, x10, #40
; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14
; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7
; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16]
; CHECK-DISABLE-NEXT: fmov s0, w15
; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14
; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18
; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16]
; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18]
; CHECK-DISABLE-NEXT: add x0, x0, #32
; CHECK-DISABLE-NEXT: fmov s1, w11
; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8
; CHECK-DISABLE-NEXT: lsl x12, x9, #24
; CHECK-DISABLE-NEXT: mov v0.s[1], w13
; CHECK-DISABLE-NEXT: lsl x11, x9, #24
; CHECK-DISABLE-NEXT: lsr x12, x9, #40
; CHECK-DISABLE-NEXT: lsr x13, x10, #45
; CHECK-DISABLE-NEXT: lsl x14, x10, #24
; CHECK-DISABLE-NEXT: lsr x15, x10, #40
; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57
; CHECK-DISABLE-NEXT: fmov s0, w13
; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50
; CHECK-DISABLE-NEXT: ubfx x15, x9, #14, #18
; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
; CHECK-DISABLE-NEXT: orr w12, w11, w12
; CHECK-DISABLE-NEXT: mov v1.s[1], w14
; CHECK-DISABLE-NEXT: lsr w12, w12, #19
; CHECK-DISABLE-NEXT: fmov s1, w12
; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8
; CHECK-DISABLE-NEXT: mov v0.s[1], w14
; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
; CHECK-DISABLE-NEXT: orr w11, w12, w11
; CHECK-DISABLE-NEXT: mov v1.s[1], w15
; CHECK-DISABLE-NEXT: lsr w11, w11, #19
; CHECK-DISABLE-NEXT: mov v0.s[2], w13
; CHECK-DISABLE-NEXT: mov v1.s[2], w12
; CHECK-DISABLE-NEXT: mov v1.s[2], w11
; CHECK-DISABLE-NEXT: mov v0.s[3], w9
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
; CHECK-DISABLE-NEXT: add x8, x8, #1
; CHECK-DISABLE-NEXT: cmp x8, #1000
; CHECK-DISABLE-NEXT: mov v1.s[3], w11
; CHECK-DISABLE-NEXT: mov v1.s[3], w12
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/X86/pr92720.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s

; Make sure we don't crash when shrinking the shift amount before legalization.
define i64 @pr92720(i64 %x) {
; CHECK-LABEL: pr92720:
; CHECK: # %bb.0:
; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000
; CHECK-NEXT: retq
%or = or i64 %x, 255
%sub = sub i64 0, %or
%shl = shl i64 1, %sub
%sext = shl i64 %shl, 32
ret i64 %sext
}
Loading