From a9dcfd813ebe24d5c958ac9e73ee943a8463246b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 20 May 2024 21:37:41 +0800 Subject: [PATCH 1/2] [SDAG] Don't treat ISD::SHL as a uniform binary operator in `ShrinkDemandedOp` --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 31 ++++- llvm/test/CodeGen/AArch64/bitfield-insert.ll | 11 +- llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 118 +++++++++--------- llvm/test/CodeGen/X86/pr92720.ll | 15 +++ 4 files changed, 104 insertions(+), 71 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr92720.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3ec6b9b795079d..83dee6db3787ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -587,6 +587,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, if (VT.isVector()) return false; + assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth && + Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth && + "ShrinkDemandedOp only supports operands that have the same size!"); + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) @@ -1834,9 +1838,30 @@ bool TargetLowering::SimplifyDemandedBits( // Try shrinking the operation as long as the shift amount will still be // in range. - if ((ShAmt < DemandedBits.getActiveBits()) && - ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; + if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() && + Op.getNode()->hasOneUse()) { + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + unsigned DemandedSize = DemandedBits.getActiveBits(); + for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize); + SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits); + if (isNarrowingProfitable(VT, SmallVT) && + isTypeDesirableForOp(ISD::SHL, SmallVT) && + isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) && + (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) { + assert(DemandedSize <= SmallVTBits && + "Narrowed below demanded bits?"); + // We found a type with free casts. + SDValue NarrowShl = TLO.DAG.getNode( + ISD::SHL, dl, SmallVT, + TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), + TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); + } + } + } // Narrow shift to lower half - similar to ShrinkDemandedOp. // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K)) diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index 30b5e86c1e6dc8..14a594e8028d5d 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -193,11 +193,10 @@ define void @test_64bit_badmask(ptr %existing, ptr %new) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: mov w10, #135 // =0x87 -; CHECK-NEXT: mov w11, #664 // =0x298 -; CHECK-NEXT: lsl w9, w9, #3 -; CHECK-NEXT: and x8, x8, x10 -; CHECK-NEXT: and x9, x9, x11 +; CHECK-NEXT: mov w10, #664 // =0x298 +; CHECK-NEXT: mov w11, #135 // =0x87 +; CHECK-NEXT: and x9, x10, x9, lsl #3 +; CHECK-NEXT: and x8, x8, x11 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret @@ -579,7 +578,6 @@ define <2 x i32> @test_complex_type(ptr %addr, i64 %in, ptr %bf ) { define i64 @test_truncated_shift(i64 %x, i64 %y) { ; CHECK-LABEL: test_truncated_shift: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1 ; CHECK-NEXT: bfi x0, x1, #25, #5 ; CHECK-NEXT: ret entry: @@ -593,7 +591,6 @@ entry: define i64 @test_and_extended_shift_with_imm(i64 %0) { ; CHECK-LABEL: test_and_extended_shift_with_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0 ; CHECK-NEXT: ubfiz x0, x0, #7, #8 ; CHECK-NEXT: ret %2 = shl i64 %0, 7 diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index 18cd4cc2111a42..c4a58ba12dc6be 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -571,29 +571,27 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB5_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp x10, x9, [x0] -; CHECK-NEXT: ldrb w13, [x0, #18] -; CHECK-NEXT: ldrh w14, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldrb w14, [x0, #18] +; CHECK-NEXT: ldrh w15, [x0, #16] ; CHECK-NEXT: add x0, x0, #32 -; CHECK-NEXT: ubfx x12, x9, #12, #20 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: lsr x11, x10, #19 -; CHECK-NEXT: lsr x15, x9, #31 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: lsr x12, x9, #50 -; CHECK-NEXT: mov.s v0[1], w11 -; CHECK-NEXT: orr w11, w14, w13, lsl #16 -; CHECK-NEXT: lsr x13, x10, #38 -; CHECK-NEXT: lsr x10, x10, #57 -; CHECK-NEXT: mov.s v1[1], w15 -; CHECK-NEXT: orr w12, w12, w11, lsl #14 -; CHECK-NEXT: orr w9, w10, w9, lsl #7 -; CHECK-NEXT: lsr w10, w11, #5 -; CHECK-NEXT: mov.s v0[2], w13 +; CHECK-NEXT: ubfx x12, x10, #12, #20 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: lsr x11, x9, #19 +; CHECK-NEXT: lsr x13, x10, #31 +; CHECK-NEXT: fmov s0, w12 +; CHECK-NEXT: lsr x12, x9, #38 +; CHECK-NEXT: extr x9, x10, x9, #57 +; CHECK-NEXT: mov.s v1[1], w11 +; CHECK-NEXT: orr x11, x15, x14, lsl #16 +; CHECK-NEXT: mov.s v0[1], w13 +; CHECK-NEXT: extr x13, x11, x10, #50 +; CHECK-NEXT: ubfx x10, x11, #5, #27 ; CHECK-NEXT: mov.s v1[2], w12 -; CHECK-NEXT: mov.s v0[3], w9 -; CHECK-NEXT: mov.s v1[3], w10 -; CHECK-NEXT: uzp1.8h v0, v0, v1 +; CHECK-NEXT: mov.s v0[2], w13 +; CHECK-NEXT: mov.s v1[3], w9 +; CHECK-NEXT: mov.s v0[3], w10 +; CHECK-NEXT: uzp1.8h v0, v1, v0 ; CHECK-NEXT: xtn.8b v0, v0 ; CHECK-NEXT: str d0, [x1, x8, lsl #3] ; CHECK-NEXT: add x8, x8, #1 @@ -608,35 +606,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: .LBB5_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ldp x10, x9, [x0] -; CHECK-BE-NEXT: ldrb w16, [x0, #18] -; CHECK-BE-NEXT: lsr x11, x9, #40 -; CHECK-BE-NEXT: ubfx x12, x9, #33, #7 -; CHECK-BE-NEXT: lsr x15, x10, #45 -; CHECK-BE-NEXT: lsr x13, x10, #40 -; CHECK-BE-NEXT: ubfx x14, x10, #26, #14 -; CHECK-BE-NEXT: orr w11, w12, w11, lsl #7 -; CHECK-BE-NEXT: ldrh w12, [x0, #16] -; CHECK-BE-NEXT: fmov s0, w15 -; CHECK-BE-NEXT: orr w13, w14, w13, lsl #14 -; CHECK-BE-NEXT: ubfx x14, x9, #14, #18 +; CHECK-BE-NEXT: ldrh w16, [x0, #16] +; CHECK-BE-NEXT: ldrb w17, [x0, #18] ; CHECK-BE-NEXT: add x0, x0, #32 -; CHECK-BE-NEXT: fmov s1, w11 -; CHECK-BE-NEXT: orr w11, w16, w12, lsl #8 -; CHECK-BE-NEXT: lsl x12, x9, #24 -; CHECK-BE-NEXT: mov v0.s[1], w13 +; CHECK-BE-NEXT: lsl x11, x9, #24 +; CHECK-BE-NEXT: lsr x12, x9, #40 +; CHECK-BE-NEXT: lsr x13, x10, #45 +; CHECK-BE-NEXT: lsl x14, x10, #24 +; CHECK-BE-NEXT: lsr x15, x10, #40 +; CHECK-BE-NEXT: extr x12, x12, x11, #57 +; CHECK-BE-NEXT: fmov s0, w13 ; CHECK-BE-NEXT: ubfx x13, x10, #7, #25 +; CHECK-BE-NEXT: extr x14, x15, x14, #50 +; CHECK-BE-NEXT: ubfx x15, x9, #14, #18 ; CHECK-BE-NEXT: extr x9, x10, x9, #40 -; CHECK-BE-NEXT: orr w12, w11, w12 -; CHECK-BE-NEXT: mov v1.s[1], w14 -; CHECK-BE-NEXT: lsr w12, w12, #19 +; CHECK-BE-NEXT: fmov s1, w12 +; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8 +; CHECK-BE-NEXT: mov v0.s[1], w14 ; CHECK-BE-NEXT: ubfx x9, x9, #12, #20 +; CHECK-BE-NEXT: orr w11, w12, w11 +; CHECK-BE-NEXT: mov v1.s[1], w15 +; CHECK-BE-NEXT: lsr w11, w11, #19 ; CHECK-BE-NEXT: mov v0.s[2], w13 -; CHECK-BE-NEXT: mov v1.s[2], w12 +; CHECK-BE-NEXT: mov v1.s[2], w11 ; CHECK-BE-NEXT: mov v0.s[3], w9 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 ; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: mov v1.s[3], w11 +; CHECK-BE-NEXT: mov v1.s[3], w12 ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-BE-NEXT: xtn v0.8b, v0.8h ; CHECK-BE-NEXT: st1 { v0.8b }, [x9] @@ -650,35 +647,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-DISABLE-NEXT: .LBB5_1: // %loop ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-DISABLE-NEXT: ldp x10, x9, [x0] -; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18] -; CHECK-DISABLE-NEXT: lsr x11, x9, #40 -; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7 -; CHECK-DISABLE-NEXT: lsr x15, x10, #45 -; CHECK-DISABLE-NEXT: lsr x13, x10, #40 -; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14 -; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7 -; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16] -; CHECK-DISABLE-NEXT: fmov s0, w15 -; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14 -; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18 +; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16] +; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18] ; CHECK-DISABLE-NEXT: add x0, x0, #32 -; CHECK-DISABLE-NEXT: fmov s1, w11 -; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8 -; CHECK-DISABLE-NEXT: lsl x12, x9, #24 -; CHECK-DISABLE-NEXT: mov v0.s[1], w13 +; CHECK-DISABLE-NEXT: lsl x11, x9, #24 +; CHECK-DISABLE-NEXT: lsr x12, x9, #40 +; CHECK-DISABLE-NEXT: lsr x13, x10, #45 +; CHECK-DISABLE-NEXT: lsl x14, x10, #24 +; CHECK-DISABLE-NEXT: lsr x15, x10, #40 +; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57 +; CHECK-DISABLE-NEXT: fmov s0, w13 ; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25 +; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50 +; CHECK-DISABLE-NEXT: ubfx x15, x9, #14, #18 ; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40 -; CHECK-DISABLE-NEXT: orr w12, w11, w12 -; CHECK-DISABLE-NEXT: mov v1.s[1], w14 -; CHECK-DISABLE-NEXT: lsr w12, w12, #19 +; CHECK-DISABLE-NEXT: fmov s1, w12 +; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8 +; CHECK-DISABLE-NEXT: mov v0.s[1], w14 ; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20 +; CHECK-DISABLE-NEXT: orr w11, w12, w11 +; CHECK-DISABLE-NEXT: mov v1.s[1], w15 +; CHECK-DISABLE-NEXT: lsr w11, w11, #19 ; CHECK-DISABLE-NEXT: mov v0.s[2], w13 -; CHECK-DISABLE-NEXT: mov v1.s[2], w12 +; CHECK-DISABLE-NEXT: mov v1.s[2], w11 ; CHECK-DISABLE-NEXT: mov v0.s[3], w9 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3 ; CHECK-DISABLE-NEXT: add x8, x8, #1 ; CHECK-DISABLE-NEXT: cmp x8, #1000 -; CHECK-DISABLE-NEXT: mov v1.s[3], w11 +; CHECK-DISABLE-NEXT: mov v1.s[3], w12 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9] diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll new file mode 100644 index 00000000000000..b2543c08328c7a --- /dev/null +++ b/llvm/test/CodeGen/X86/pr92720.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s + +; Make sure we don't crash when shrinking the shift amount before legalization. +define i64 @pr92720(i64 %x) { +; CHECK-LABEL: pr92720: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000 +; CHECK-NEXT: retq + %or = or i64 %x, 255 + %sub = sub i64 0, %or + %shl = shl i64 1, %sub + %sext = shl i64 %shl, 32 + ret i64 %sext +} From 5ba8a4fd138b16b4c671b058ce4cdb7c65d7b7a0 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 22 May 2024 19:31:03 +0800 Subject: [PATCH 2/2] [SDAG] Add todo. NFC. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 83dee6db3787ef..50645270452623 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1836,6 +1836,7 @@ bool TargetLowering::SimplifyDemandedBits( } } + // TODO: Can we merge this fold with the one below? // Try shrinking the operation as long as the shift amount will still be // in range. if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&