Skip to content

Commit

Permalink
[InstCombine] fold flooring sdiv by power-of-2 to ashr
Browse files Browse the repository at this point in the history
It's a bigger match than usual, but I have not found any
sub-patterns that reduce:
(X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)

https://alive2.llvm.org/ce/z/MJzlhl

Fixes issue #55741
  • Loading branch information
rotateright committed Dec 18, 2022
1 parent 3c7d059 commit 86b4a23
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 11 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,35 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
return nullptr;
}

/// Try to reduce signed division by power-of-2 to an arithmetic shift right.
static Instruction *foldAddToAshr(BinaryOperator &Add) {
// Division must be by power-of-2, but not the minimum signed value.
Value *X;
const APInt *DivC;
if (!match(Add.getOperand(0), m_SDiv(m_Value(X), m_Power2(DivC))) ||
DivC->isNegative())
return nullptr;

// Rounding is done by adding -1 if the dividend (X) is negative and has any
// low bits set. The canonical pattern for that is an "ugt" compare with SMIN:
// sext (icmp ugt (X & (DivC - 1)), SMIN)
const APInt *MaskC;
ICmpInst::Predicate Pred;
if (!match(Add.getOperand(1),
m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)),
m_SignMask()))) ||
Pred != ICmpInst::ICMP_UGT)
return nullptr;

APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits());
if (*MaskC != (SMin | (*DivC - 1)))
return nullptr;

// (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)
return BinaryOperator::CreateAShr(
X, ConstantInt::get(Add.getType(), DivC->exactLogBase2()));
}

Instruction *InstCombinerImpl::
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
BinaryOperator &I) {
Expand Down Expand Up @@ -1484,6 +1513,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(B, Shl);
}

if (Instruction *Ashr = foldAddToAshr(I))
return Ashr;

// TODO(jingyue): Consider willNotOverflowSignedAdd and
// willNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
Expand Down
28 changes: 17 additions & 11 deletions llvm/test/Transforms/InstCombine/add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2462,11 +2462,7 @@ define i9 @sext_zext_not_commute(i4 %x) {

define i32 @floor_sdiv(i32 %x) {
; CHECK-LABEL: @floor_sdiv(
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4
; CHECK-NEXT: [[A:%.*]] = and i32 [[X]], -2147483645
; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[A]], -2147483648
; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[D]], [[S]]
; CHECK-NEXT: [[R:%.*]] = ashr i32 [[X:%.*]], 2
; CHECK-NEXT: ret i32 [[R]]
;
%d = sdiv i32 %x, 4
Expand All @@ -2477,13 +2473,11 @@ define i32 @floor_sdiv(i32 %x) {
ret i32 %r
}

; vectors work too and commute is handled by complexity-based canonicalization

define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) {
; CHECK-LABEL: @floor_sdiv_vec_commute(
; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i32> [[X:%.*]], <i32 4, i32 4>
; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X]], <i32 -2147483645, i32 -2147483645>
; CHECK-NEXT: [[I:%.*]] = icmp ugt <2 x i32> [[A]], <i32 -2147483648, i32 -2147483648>
; CHECK-NEXT: [[S:%.*]] = sext <2 x i1> [[I]] to <2 x i32>
; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[D]], [[S]]
; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 2, i32 2>
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%d = sdiv <2 x i32> %x, <i32 4, i32 4>
Expand All @@ -2494,6 +2488,8 @@ define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) {
ret <2 x i32> %r
}

; extra uses are ok

define i8 @floor_sdiv_uses(i8 %x) {
; CHECK-LABEL: @floor_sdiv_uses(
; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], 16
Expand All @@ -2503,7 +2499,7 @@ define i8 @floor_sdiv_uses(i8 %x) {
; CHECK-NEXT: [[I:%.*]] = icmp ugt i8 [[A]], -128
; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i8
; CHECK-NEXT: call void @use(i8 [[S]])
; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[D]], [[S]]
; CHECK-NEXT: [[R:%.*]] = ashr i8 [[X]], 4
; CHECK-NEXT: ret i8 [[R]]
;
%d = sdiv i8 %x, 16
Expand All @@ -2517,6 +2513,8 @@ define i8 @floor_sdiv_uses(i8 %x) {
ret i8 %r
}

; negative test

define i32 @floor_sdiv_wrong_div(i32 %x) {
; CHECK-LABEL: @floor_sdiv_wrong_div(
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 8
Expand All @@ -2534,6 +2532,8 @@ define i32 @floor_sdiv_wrong_div(i32 %x) {
ret i32 %r
}

; negative test

define i32 @floor_sdiv_wrong_mask(i32 %x) {
; CHECK-LABEL: @floor_sdiv_wrong_mask(
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4
Expand All @@ -2551,6 +2551,8 @@ define i32 @floor_sdiv_wrong_mask(i32 %x) {
ret i32 %r
}

; negative test

define i32 @floor_sdiv_wrong_cmp(i32 %x) {
; CHECK-LABEL: @floor_sdiv_wrong_cmp(
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4
Expand All @@ -2568,6 +2570,8 @@ define i32 @floor_sdiv_wrong_cmp(i32 %x) {
ret i32 %r
}

; negative test

define i32 @floor_sdiv_wrong_ext(i32 %x) {
; CHECK-LABEL: @floor_sdiv_wrong_ext(
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4
Expand All @@ -2585,6 +2589,8 @@ define i32 @floor_sdiv_wrong_ext(i32 %x) {
ret i32 %r
}

; negative test

define i32 @floor_sdiv_wrong_op(i32 %x, i32 %y) {
; CHECK-LABEL: @floor_sdiv_wrong_op(
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4
Expand Down

2 comments on commit 86b4a23

@nickdesaulniers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rotateright is the Fixes tag correct on this?

@rotateright
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No - sorry, that was a typo in the commit message. This was for issue #57741.

Please sign in to comment.