-
Notifications
You must be signed in to change notification settings - Fork 12.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LoopReroll] Fix rerolling loop with extra instructions
Fixes PR47627 This fix suppresses rerolling a loop which has an unrerollable instruction. Sample IR for the explanation below: ``` define void @foo([2 x i32]* nocapture %a) { entry: br label %loop loop: ; base instruction %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] ; unrerollable instructions %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvar, i64 0 store i32 999, i32* %stptrx, align 4 ; extra simple arithmetic operations, used by root instructions %plus20 = add nuw nsw i64 %indvar, 20 %plus10 = add nuw nsw i64 %indvar, 10 ; root instruction 0 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 %value0 = load i32, i32* %ldptr0, align 4 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 store i32 %value0, i32* %stptr0, align 4 ; root instruction 1 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 %value1 = load i32, i32* %ldptr1, align 4 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 store i32 %value1, i32* %stptr1, align 4 ; loop-increment and latch %indvar.next = add nuw nsw i64 %indvar, 1 %exitcond = icmp eq i64 %indvar.next, 5 br i1 %exitcond, label %exit, label %loop exit: ret void } ``` In the loop rerolling pass, `%indvar` and `%indvar.next` are appended to the `LoopIncs` vector in the `LoopReroll::DAGRootTracker::findRoots` function. Before this fix, two instructions with `unrerollable instructions` comment above are marked as `IL_All` at the end of the `LoopReroll::DAGRootTracker::collectUsedInstructions` function, as well as instructions with `extra simple arithmetic operations` comment and `loop-increment and latch` comment. It is incorrect because `IL_All` means that the instruction should be executed in all iterations of the rerolled loop but the `store` instruction should not. This fix rejects instructions which may have side effects and don't belong to def-use chains of any root instructions and reductions. See https://bugs.llvm.org/show_bug.cgi?id=47627 for more information. (cherry picked from commit d9a9c99)
- Loading branch information
1 parent
471a386
commit ac593de
Showing
2 changed files
with
274 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,268 @@ | ||
; RUN: opt -S -loop-reroll %s | FileCheck %s | ||
target triple = "aarch64--linux-gnu" | ||
|
||
define void @rerollable1([2 x i32]* nocapture %a) { | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
|
||
; CHECK-LABEL: loop: | ||
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv | ||
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv | ||
; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4 | ||
; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4 | ||
|
||
; base instruction | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
|
||
; NO unrerollable instructions | ||
|
||
; extra simple arithmetic operations, used by root instructions | ||
%plus20 = add nuw nsw i64 %iv, 20 | ||
%plus10 = add nuw nsw i64 %iv, 10 | ||
|
||
; root instruction 0 | ||
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 | ||
%value0 = load i32, i32* %ldptr0, align 4 | ||
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 | ||
store i32 %value0, i32* %stptr0, align 4 | ||
|
||
; root instruction 1 | ||
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 | ||
%value1 = load i32, i32* %ldptr1, align 4 | ||
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 | ||
store i32 %value1, i32* %stptr1, align 4 | ||
|
||
; loop-increment | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
|
||
; latch | ||
%exitcond = icmp eq i64 %iv.next, 5 | ||
br i1 %exitcond, label %exit, label %loop | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @unrerollable1([2 x i32]* nocapture %a) { | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
|
||
; CHECK-LABEL: loop: | ||
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 | ||
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 | ||
|
||
; base instruction | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
|
||
; unrerollable instructions using %iv | ||
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 | ||
store i32 999, i32* %stptrx, align 4 | ||
|
||
; extra simple arithmetic operations, used by root instructions | ||
%plus20 = add nuw nsw i64 %iv, 20 | ||
%plus10 = add nuw nsw i64 %iv, 10 | ||
|
||
; root instruction 0 | ||
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 | ||
%value0 = load i32, i32* %ldptr0, align 4 | ||
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 | ||
store i32 %value0, i32* %stptr0, align 4 | ||
|
||
; root instruction 1 | ||
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 | ||
%value1 = load i32, i32* %ldptr1, align 4 | ||
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 | ||
store i32 %value1, i32* %stptr1, align 4 | ||
|
||
; loop-increment | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
|
||
; latch | ||
%exitcond = icmp eq i64 %iv.next, 5 | ||
br i1 %exitcond, label %exit, label %loop | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @unrerollable2([2 x i32]* nocapture %a) { | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
|
||
; CHECK-LABEL: loop: | ||
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 | ||
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 | ||
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 | ||
|
||
; base instruction | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
|
||
; loop-increment | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
|
||
; unrerollable instructions using %iv.next | ||
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 | ||
store i32 999, i32* %stptrx, align 4 | ||
|
||
; extra simple arithmetic operations, used by root instructions | ||
%plus20 = add nuw nsw i64 %iv, 20 | ||
%plus10 = add nuw nsw i64 %iv, 10 | ||
|
||
; root instruction 0 | ||
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 | ||
%value0 = load i32, i32* %ldptr0, align 4 | ||
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 | ||
store i32 %value0, i32* %stptr0, align 4 | ||
|
||
; root instruction 1 | ||
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 | ||
%value1 = load i32, i32* %ldptr1, align 4 | ||
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 | ||
store i32 %value1, i32* %stptr1, align 4 | ||
|
||
; latch | ||
%exitcond = icmp eq i64 %iv.next, 5 | ||
br i1 %exitcond, label %exit, label %loop | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define dso_local void @rerollable2() { | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
|
||
; CHECK-LABEL: loop: | ||
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] | ||
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} | ||
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} | ||
|
||
; induction variable | ||
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] | ||
|
||
; scale instruction | ||
%iv.mul3 = mul nuw nsw i32 %iv, 3 | ||
|
||
; extra simple arithmetic operations, used by root instructions | ||
%iv.scaled = add nuw nsw i32 %iv.mul3, 20 | ||
|
||
; NO unrerollable instructions | ||
|
||
; root set 1 | ||
|
||
; base instruction | ||
%iv.scaled.div5 = udiv i32 %iv.scaled, 5 | ||
tail call void @bar(i32 %iv.scaled.div5) | ||
; root instruction 0 | ||
%iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 | ||
%iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 | ||
tail call void @bar(i32 %iv.scaled.add1.div5) | ||
; root instruction 2 | ||
%iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 | ||
%iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 | ||
tail call void @bar(i32 %iv.scaled.add2.div5) | ||
|
||
; root set 2 | ||
|
||
; base instruction | ||
%iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 | ||
%iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 | ||
tail call void @bar(i32 %iv.scaled.add4.div5) | ||
; root instruction 0 | ||
%iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 | ||
%iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 | ||
tail call void @bar(i32 %iv.scaled.add5.div5) | ||
; root instruction 2 | ||
%iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 | ||
%iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 | ||
tail call void @bar(i32 %iv.scaled.add6.div5) | ||
|
||
; loop-increment | ||
%iv.next = add nuw nsw i32 %iv, 1 | ||
|
||
; latch | ||
%cmp = icmp ult i32 %iv.next, 3 | ||
br i1 %cmp, label %loop, label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define dso_local void @unrerollable3() { | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
|
||
; CHECK-LABEL: loop: | ||
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] | ||
; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 | ||
; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 | ||
; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 | ||
; CHECK-NEXT: tail call void @bar(i32 %iv.mul7) | ||
|
||
; induction variable | ||
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] | ||
|
||
; scale instruction | ||
%iv.mul3 = mul nuw nsw i32 %iv, 3 | ||
|
||
; extra simple arithmetic operations, used by root instructions | ||
%iv.scaled = add nuw nsw i32 %iv.mul3, 20 | ||
|
||
; unrerollable instructions using %iv | ||
%iv.mul7 = mul nuw nsw i32 %iv, 7 | ||
tail call void @bar(i32 %iv.mul7) | ||
|
||
; root set 1 | ||
|
||
; base instruction | ||
%iv.scaled.div5 = udiv i32 %iv.scaled, 5 | ||
tail call void @bar(i32 %iv.scaled.div5) | ||
; root instruction 0 | ||
%iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 | ||
%iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 | ||
tail call void @bar(i32 %iv.scaled.add1.div5) | ||
; root instruction 2 | ||
%iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 | ||
%iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 | ||
tail call void @bar(i32 %iv.scaled.add2.div5) | ||
|
||
; root set 2 | ||
|
||
; base instruction | ||
%iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 | ||
%iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 | ||
tail call void @bar(i32 %iv.scaled.add4.div5) | ||
; root instruction 0 | ||
%iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 | ||
%iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 | ||
tail call void @bar(i32 %iv.scaled.add5.div5) | ||
; root instruction 2 | ||
%iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 | ||
%iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 | ||
tail call void @bar(i32 %iv.scaled.add6.div5) | ||
|
||
; loop-increment | ||
%iv.next = add nuw nsw i32 %iv, 1 | ||
|
||
; latch | ||
%cmp = icmp ult i32 %iv.next, 3 | ||
br i1 %cmp, label %loop, label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
declare dso_local void @bar(i32) |