Skip to content

Commit

Permalink
[CodeGenPrepare] Transform shl X, cttz(Y) to mul (Y & -Y), X if cttz …
Browse files Browse the repository at this point in the history
…is unsupported
  • Loading branch information
dtcxzyw committed Mar 21, 2024
1 parent 0a519f4 commit 43e36d8
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 140 deletions.
24 changes: 23 additions & 1 deletion llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8433,7 +8433,29 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return true;

switch (I->getOpcode()) {
case Instruction::Shl:
case Instruction::Shl: {
// shl X, cttz(Y) -> mul (Y & -Y), X if cttz is unsupported on the target.
Value *Y;
if (match(I->getOperand(1),
m_OneUse(m_Intrinsic<Intrinsic::cttz>(m_Value(Y))))) {
EVT VT = TLI->getValueType(*DL, Y->getType());
if (!TLI->isOperationLegalOrCustom(ISD::CTTZ, VT) &&
TLI->isOperationLegalOrCustom(ISD::MUL, VT)) {
IRBuilder<> Builder(I);
Value *NegY = Builder.CreateNeg(Y);
Value *Power2 = Builder.CreateAnd(Y, NegY);
Value *New = Builder.CreateMul(Power2, I->getOperand(0), "",
/*HasNUW=*/I->hasNoUnsignedWrap(),
/*HasNSW=*/false);
replaceAllUsesWith(I, New, FreshBBs, IsHugeFunc);
RecursivelyDeleteTriviallyDeadInstructions(
I, TLInfo, nullptr,
[&](Value *V) { removeAllAssertingVHReferences(V); });
return true;
}
}
}
[[fallthrough]];
case Instruction::LShr:
case Instruction::AShr:
return optimizeShiftInst(cast<BinaryOperator>(I));
Expand Down
240 changes: 101 additions & 139 deletions llvm/test/CodeGen/RISCV/shl-cttz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -383,15 +383,7 @@ define i32 @shl_cttz_i32(i32 %x, i32 %y) {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: lui a2, 30667
; RV32I-NEXT: addi a2, a2, 1329
; RV32I-NEXT: mul a1, a1, a2
; RV32I-NEXT: srli a1, a1, 27
; RV32I-NEXT: lui a2, %hi(.LCPI4_0)
; RV32I-NEXT: addi a2, a2, %lo(.LCPI4_0)
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: mul a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: shl_cttz_i32:
Expand All @@ -400,26 +392,33 @@ define i32 @shl_cttz_i32(i32 %x, i32 %y) {
; RV32ZBB-NEXT: sll a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: shl_cttz_i32:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 30667
; RV64I-NEXT: addi a2, a2, 1329
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: srliw a1, a1, 27
; RV64I-NEXT: lui a2, %hi(.LCPI4_0)
; RV64I-NEXT: addi a2, a2, %lo(.LCPI4_0)
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
; RV64IILLEGALI32-LABEL: shl_cttz_i32:
; RV64IILLEGALI32: # %bb.0: # %entry
; RV64IILLEGALI32-NEXT: negw a2, a1
; RV64IILLEGALI32-NEXT: and a1, a1, a2
; RV64IILLEGALI32-NEXT: lui a2, 30667
; RV64IILLEGALI32-NEXT: addi a2, a2, 1329
; RV64IILLEGALI32-NEXT: mul a1, a1, a2
; RV64IILLEGALI32-NEXT: srliw a1, a1, 27
; RV64IILLEGALI32-NEXT: lui a2, %hi(.LCPI4_0)
; RV64IILLEGALI32-NEXT: addi a2, a2, %lo(.LCPI4_0)
; RV64IILLEGALI32-NEXT: add a1, a2, a1
; RV64IILLEGALI32-NEXT: lbu a1, 0(a1)
; RV64IILLEGALI32-NEXT: sllw a0, a0, a1
; RV64IILLEGALI32-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_i32:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: ctzw a1, a1
; RV64ZBB-NEXT: sllw a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ILEGALI32-LABEL: shl_cttz_i32:
; RV64ILEGALI32: # %bb.0: # %entry
; RV64ILEGALI32-NEXT: negw a2, a1
; RV64ILEGALI32-NEXT: and a1, a1, a2
; RV64ILEGALI32-NEXT: mulw a0, a1, a0
; RV64ILEGALI32-NEXT: ret
entry:
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
%res = shl i32 %x, %cttz
Expand All @@ -431,16 +430,7 @@ define i32 @shl_cttz_constant_i32(i32 %y) {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: neg a1, a0
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 30667
; RV32I-NEXT: addi a1, a1, 1329
; RV32I-NEXT: mul a0, a0, a1
; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: lui a1, %hi(.LCPI5_0)
; RV32I-NEXT: addi a1, a1, %lo(.LCPI5_0)
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: lbu a0, 0(a0)
; RV32I-NEXT: li a1, 4
; RV32I-NEXT: sll a0, a1, a0
; RV32I-NEXT: slli a0, a0, 2
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: shl_cttz_constant_i32:
Expand All @@ -450,28 +440,35 @@ define i32 @shl_cttz_constant_i32(i32 %y) {
; RV32ZBB-NEXT: sll a0, a1, a0
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: shl_cttz_constant_i32:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addi a1, a1, 1329
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI5_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI5_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: li a1, 4
; RV64I-NEXT: sllw a0, a1, a0
; RV64I-NEXT: ret
; RV64IILLEGALI32-LABEL: shl_cttz_constant_i32:
; RV64IILLEGALI32: # %bb.0: # %entry
; RV64IILLEGALI32-NEXT: negw a1, a0
; RV64IILLEGALI32-NEXT: and a0, a0, a1
; RV64IILLEGALI32-NEXT: lui a1, 30667
; RV64IILLEGALI32-NEXT: addi a1, a1, 1329
; RV64IILLEGALI32-NEXT: mul a0, a0, a1
; RV64IILLEGALI32-NEXT: srliw a0, a0, 27
; RV64IILLEGALI32-NEXT: lui a1, %hi(.LCPI5_0)
; RV64IILLEGALI32-NEXT: addi a1, a1, %lo(.LCPI5_0)
; RV64IILLEGALI32-NEXT: add a0, a1, a0
; RV64IILLEGALI32-NEXT: lbu a0, 0(a0)
; RV64IILLEGALI32-NEXT: li a1, 4
; RV64IILLEGALI32-NEXT: sllw a0, a1, a0
; RV64IILLEGALI32-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_constant_i32:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: ctzw a0, a0
; RV64ZBB-NEXT: li a1, 4
; RV64ZBB-NEXT: sllw a0, a1, a0
; RV64ZBB-NEXT: ret
;
; RV64ILEGALI32-LABEL: shl_cttz_constant_i32:
; RV64ILEGALI32: # %bb.0: # %entry
; RV64ILEGALI32-NEXT: negw a1, a0
; RV64ILEGALI32-NEXT: and a0, a0, a1
; RV64ILEGALI32-NEXT: slliw a0, a0, 2
; RV64ILEGALI32-NEXT: ret
entry:
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
%res = shl i32 4, %cttz
Expand All @@ -483,15 +480,7 @@ define i32 @shl_cttz_nuw_i32(i32 %x, i32 %y) {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: lui a2, 30667
; RV32I-NEXT: addi a2, a2, 1329
; RV32I-NEXT: mul a1, a1, a2
; RV32I-NEXT: srli a1, a1, 27
; RV32I-NEXT: lui a2, %hi(.LCPI6_0)
; RV32I-NEXT: addi a2, a2, %lo(.LCPI6_0)
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: mul a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: shl_cttz_nuw_i32:
Expand All @@ -500,26 +489,33 @@ define i32 @shl_cttz_nuw_i32(i32 %x, i32 %y) {
; RV32ZBB-NEXT: sll a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: shl_cttz_nuw_i32:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 30667
; RV64I-NEXT: addi a2, a2, 1329
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: srliw a1, a1, 27
; RV64I-NEXT: lui a2, %hi(.LCPI6_0)
; RV64I-NEXT: addi a2, a2, %lo(.LCPI6_0)
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
; RV64IILLEGALI32-LABEL: shl_cttz_nuw_i32:
; RV64IILLEGALI32: # %bb.0: # %entry
; RV64IILLEGALI32-NEXT: negw a2, a1
; RV64IILLEGALI32-NEXT: and a1, a1, a2
; RV64IILLEGALI32-NEXT: lui a2, 30667
; RV64IILLEGALI32-NEXT: addi a2, a2, 1329
; RV64IILLEGALI32-NEXT: mul a1, a1, a2
; RV64IILLEGALI32-NEXT: srliw a1, a1, 27
; RV64IILLEGALI32-NEXT: lui a2, %hi(.LCPI6_0)
; RV64IILLEGALI32-NEXT: addi a2, a2, %lo(.LCPI6_0)
; RV64IILLEGALI32-NEXT: add a1, a2, a1
; RV64IILLEGALI32-NEXT: lbu a1, 0(a1)
; RV64IILLEGALI32-NEXT: sllw a0, a0, a1
; RV64IILLEGALI32-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_nuw_i32:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: ctzw a1, a1
; RV64ZBB-NEXT: sllw a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ILEGALI32-LABEL: shl_cttz_nuw_i32:
; RV64ILEGALI32: # %bb.0: # %entry
; RV64ILEGALI32-NEXT: negw a2, a1
; RV64ILEGALI32-NEXT: and a1, a1, a2
; RV64ILEGALI32-NEXT: mulw a0, a1, a0
; RV64ILEGALI32-NEXT: ret
entry:
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
%res = shl nuw i32 %x, %cttz
Expand All @@ -531,15 +527,7 @@ define i32 @shl_cttz_nsw_i32(i32 %x, i32 %y) {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: lui a2, 30667
; RV32I-NEXT: addi a2, a2, 1329
; RV32I-NEXT: mul a1, a1, a2
; RV32I-NEXT: srli a1, a1, 27
; RV32I-NEXT: lui a2, %hi(.LCPI7_0)
; RV32I-NEXT: addi a2, a2, %lo(.LCPI7_0)
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: mul a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: shl_cttz_nsw_i32:
Expand All @@ -548,26 +536,33 @@ define i32 @shl_cttz_nsw_i32(i32 %x, i32 %y) {
; RV32ZBB-NEXT: sll a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: shl_cttz_nsw_i32:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 30667
; RV64I-NEXT: addi a2, a2, 1329
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: srliw a1, a1, 27
; RV64I-NEXT: lui a2, %hi(.LCPI7_0)
; RV64I-NEXT: addi a2, a2, %lo(.LCPI7_0)
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
; RV64IILLEGALI32-LABEL: shl_cttz_nsw_i32:
; RV64IILLEGALI32: # %bb.0: # %entry
; RV64IILLEGALI32-NEXT: negw a2, a1
; RV64IILLEGALI32-NEXT: and a1, a1, a2
; RV64IILLEGALI32-NEXT: lui a2, 30667
; RV64IILLEGALI32-NEXT: addi a2, a2, 1329
; RV64IILLEGALI32-NEXT: mul a1, a1, a2
; RV64IILLEGALI32-NEXT: srliw a1, a1, 27
; RV64IILLEGALI32-NEXT: lui a2, %hi(.LCPI7_0)
; RV64IILLEGALI32-NEXT: addi a2, a2, %lo(.LCPI7_0)
; RV64IILLEGALI32-NEXT: add a1, a2, a1
; RV64IILLEGALI32-NEXT: lbu a1, 0(a1)
; RV64IILLEGALI32-NEXT: sllw a0, a0, a1
; RV64IILLEGALI32-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_nsw_i32:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: ctzw a1, a1
; RV64ZBB-NEXT: sllw a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ILEGALI32-LABEL: shl_cttz_nsw_i32:
; RV64ILEGALI32: # %bb.0: # %entry
; RV64ILEGALI32-NEXT: negw a2, a1
; RV64ILEGALI32-NEXT: and a1, a1, a2
; RV64ILEGALI32-NEXT: mulw a0, a1, a0
; RV64ILEGALI32-NEXT: ret
entry:
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
%res = shl nsw i32 %x, %cttz
Expand Down Expand Up @@ -754,17 +749,9 @@ define i64 @shl_cttz_i64(i64 %x, i64 %y) {
;
; RV64I-LABEL: shl_cttz_i64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a2, %hi(.LCPI9_0)
; RV64I-NEXT: ld a2, %lo(.LCPI9_0)(a2)
; RV64I-NEXT: neg a3, a1
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: srli a1, a1, 58
; RV64I-NEXT: lui a2, %hi(.LCPI9_1)
; RV64I-NEXT: addi a2, a2, %lo(.LCPI9_1)
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: mul a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_i64:
Expand Down Expand Up @@ -847,18 +834,9 @@ define i64 @shl_cttz_constant_i64(i64 %y) {
;
; RV64I-LABEL: shl_cttz_constant_i64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
; RV64I-NEXT: neg a2, a0
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: srli a0, a0, 58
; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: li a1, 4
; RV64I-NEXT: sll a0, a1, a0
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_constant_i64:
Expand Down Expand Up @@ -944,17 +922,9 @@ define i64 @shl_cttz_nuw_i64(i64 %x, i64 %y) {
;
; RV64I-LABEL: shl_cttz_nuw_i64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a2, %hi(.LCPI11_0)
; RV64I-NEXT: ld a2, %lo(.LCPI11_0)(a2)
; RV64I-NEXT: neg a3, a1
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: srli a1, a1, 58
; RV64I-NEXT: lui a2, %hi(.LCPI11_1)
; RV64I-NEXT: addi a2, a2, %lo(.LCPI11_1)
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: mul a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_nuw_i64:
Expand Down Expand Up @@ -1039,17 +1009,9 @@ define i64 @shl_cttz_nsw_i64(i64 %x, i64 %y) {
;
; RV64I-LABEL: shl_cttz_nsw_i64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a2, %hi(.LCPI12_0)
; RV64I-NEXT: ld a2, %lo(.LCPI12_0)(a2)
; RV64I-NEXT: neg a3, a1
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: srli a1, a1, 58
; RV64I-NEXT: lui a2, %hi(.LCPI12_1)
; RV64I-NEXT: addi a2, a2, %lo(.LCPI12_1)
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: mul a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: shl_cttz_nsw_i64:
Expand Down

0 comments on commit 43e36d8

Please sign in to comment.