-
Notifications
You must be signed in to change notification settings - Fork 12.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Fix case of 0 dynamic alloc when stack probing #74806
Conversation
@llvm/pr-subscribers-github-workflow @llvm/pr-subscribers-backend-aarch64 Author: Oskar Wirga (oskarwirga) ChangesThis is my first attempt at fixing a subtle bug which has manifested as part of stack probing for AArch64. If the dynamic allocation size is 0, then we will still probe the current The fix here is the create a pretest condition which checks if Full diff: https://github.com/llvm/llvm-project/pull/74806.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 93b8295f4f3ef..5ab05125a65cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9469,6 +9469,7 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
+
MachineBasicBlock::iterator
AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
Register TargetReg, bool FrameSetup) const {
@@ -9482,6 +9483,9 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
DebugLoc DL = MBB.findDebugLoc(MBBI);
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *PreTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, PreTestMBB);
MachineBasicBlock *LoopTestMBB =
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
MF.insert(MBBInsertPoint, LoopTestMBB);
@@ -9490,9 +9494,27 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
MF.insert(MBBInsertPoint, LoopBodyMBB);
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
MF.insert(MBBInsertPoint, ExitMBB);
+ MachineBasicBlock *AfterLoopExitMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, AfterLoopExitMBB);
MachineInstr::MIFlag Flags =
FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
+ // PreTest:
+ // Compare SP and TargetReg
+ BuildMI(*PreTestMBB, PreTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(Flags);
+
+ // B.EQ AfterLoopExit
+ BuildMI(*PreTestMBB, PreTestMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::EQ)
+ .addMBB(AfterLoopExitMBB)
+ .setMIFlags(Flags);
+
// LoopTest:
// SUB SP, SP, #ProbeSize
emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
@@ -9506,7 +9528,7 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
.setMIFlags(Flags);
- // B.<Cond> LoopExit
+ // B.LE LoopExit
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::LE)
.addMBB(ExitMBB)
@@ -9539,22 +9561,32 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
.addImm(0)
.setMIFlags(Flags);
- ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // B AfterLoopExit
+ BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::B))
+ .addMBB(AfterLoopExitMBB);
+
+ AfterLoopExitMBB->splice(AfterLoopExitMBB->end(), &MBB, std::next(MBBI),
+ MBB.end());
+ AfterLoopExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ PreTestMBB->addSuccessor(LoopTestMBB);
+ PreTestMBB->addSuccessor(AfterLoopExitMBB);
LoopTestMBB->addSuccessor(ExitMBB);
LoopTestMBB->addSuccessor(LoopBodyMBB);
LoopBodyMBB->addSuccessor(LoopTestMBB);
- MBB.addSuccessor(LoopTestMBB);
+ ExitMBB->addSuccessor(AfterLoopExitMBB);
+ MBB.addSuccessor(PreTestMBB);
// Update liveins.
if (MF.getRegInfo().reservedRegsFrozen()) {
+ recomputeLiveIns(*PreTestMBB);
recomputeLiveIns(*LoopTestMBB);
recomputeLiveIns(*LoopBodyMBB);
recomputeLiveIns(*ExitMBB);
+ recomputeLiveIns(*AfterLoopExitMBB);
}
- return ExitMBB->begin();
+ return AfterLoopExitMBB->begin();
}
#define GET_INSTRINFO_HELPERS
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-64k.ll b/llvm/test/CodeGen/AArch64/stack-probing-64k.ll
index 945c271d37500..d844ea8b3010e 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-64k.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-64k.ll
@@ -302,6 +302,8 @@ define void @static_16_align_131072(ptr %out) #0 {
; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB9_4
; CHECK-NEXT: .LBB9_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
@@ -314,6 +316,7 @@ define void @static_16_align_131072(ptr %out) #0 {
; CHECK-NEXT: .LBB9_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB9_4: // %entry
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index d247ed1b59977..7938771daf99a 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -19,6 +19,8 @@ define void @dynamic(i64 %size, ptr %out) #0 {
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB0_4
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -29,6 +31,7 @@ define void @dynamic(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -59,10 +62,12 @@ define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
; CHECK-NEXT: str xzr, [sp, #-64]!
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: sub x10, x29, #64
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
-; CHECK-NEXT: str x10, [x1]
+; CHECK-NEXT: sub x10, x29, #64
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: str x10, [x1]
+; CHECK-NEXT: b.eq .LBB1_4
; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -73,6 +78,7 @@ define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
; CHECK-NEXT: .LBB1_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: str x8, [x2]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -108,11 +114,13 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
-; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB2_4
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -123,6 +131,7 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .LBB2_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 32
@@ -156,6 +165,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
; CHECK-NEXT: sub x9, x9, #4064
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB3_4
; CHECK-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x9
@@ -165,23 +176,27 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
-; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB3_8
+; CHECK-NEXT: .LBB3_5: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
-; CHECK-NEXT: b.le .LBB3_6
-; CHECK-NEXT: // %bb.5: // in Loop: Header=BB3_4 Depth=1
+; CHECK-NEXT: b.le .LBB3_7
+; CHECK-NEXT: // %bb.6: // in Loop: Header=BB3_5 Depth=1
; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: b .LBB3_4
-; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT: b .LBB3_5
+; CHECK-NEXT: .LBB3_7:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB3_8:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 32
@@ -212,6 +227,8 @@ define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB4_4
; CHECK-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: cmp sp, x8
@@ -222,6 +239,7 @@ define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536
; CHECK-NEXT: .LBB4_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -254,6 +272,8 @@ define void @no_reserved_call_frame(i64 %n) #0 {
; CHECK-NEXT: add x9, x9, #15
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: cmp sp, x0
+; CHECK-NEXT: b.eq .LBB5_4
; CHECK-NEXT: .LBB5_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -266,6 +286,7 @@ define void @no_reserved_call_frame(i64 %n) #0 {
; CHECK-NEXT: .LBB5_3: // %entry
; CHECK-NEXT: mov sp, x0
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB5_4: // %entry
; CHECK-NEXT: sub sp, sp, #1104
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: bl callee_stack_args
@@ -331,10 +352,12 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: mov x10, #15 // =0xf
-; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: madd x9, x0, x9, x10
+; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB7_4
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -345,6 +368,7 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
; CHECK-NEXT: .LBB7_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB7_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 32
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll
index 4dad104e66f20..867c74001a356 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll
@@ -104,6 +104,8 @@ define void @sve_17_vector(ptr %out) #0 {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl x9, sp, #-17
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 136 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB3_4
; CHECK-NEXT: .LBB3_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -116,6 +118,7 @@ define void @sve_17_vector(ptr %out) #0 {
; CHECK-NEXT: .LBB3_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB3_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: addvl sp, sp, #17
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -340,6 +343,8 @@ define void @sve_16v_1p_csr(<vscale x 4 x float> %a) #0 {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl x9, sp, #-17
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 136 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB9_4
; CHECK-NEXT: .LBB9_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -352,6 +357,7 @@ define void @sve_16v_1p_csr(<vscale x 4 x float> %a) #0 {
; CHECK-NEXT: .LBB9_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB9_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill
@@ -456,6 +462,8 @@ define void @sve_1_vector_4096_arr(ptr %out) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x79, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 12304 + 256 * VG
; CHECK-NEXT: addvl x9, x9, #-32
; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x79, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 12304 + 512 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB11_4
; CHECK-NEXT: .LBB11_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -468,6 +476,7 @@ define void @sve_1_vector_4096_arr(ptr %out) #0 {
; CHECK-NEXT: .LBB11_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB11_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: addvl sp, sp, #31
; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x8f, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x88, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 12304 + 264 * VG
@@ -505,6 +514,8 @@ define void @sve_1_vector_16_arr_align_8192(ptr %out) #0 {
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: addvl x9, x9, #-1
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB12_4
; CHECK-NEXT: .LBB12_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -517,6 +528,7 @@ define void @sve_1_vector_16_arr_align_8192(ptr %out) #0 {
; CHECK-NEXT: .LBB12_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB12_4: // %entry
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -605,6 +617,8 @@ define void @sve_1028_64k_guard(ptr %out) #0 "stack-probe-size"="65536" {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 2048 * VG
; CHECK-NEXT: addvl x9, x9, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 2056 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB14_4
; CHECK-NEXT: .LBB14_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
@@ -617,6 +631,7 @@ define void @sve_1028_64k_guard(ptr %out) #0 "stack-probe-size"="65536" {
; CHECK-NEXT: .LBB14_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB14_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: addvl sp, sp, #31
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1808 * VG
diff --git a/llvm/test/CodeGen/AArch64/stack-probing.ll b/llvm/test/CodeGen/AArch64/stack-probing.ll
index 5c5d9321a56e5..4e54d938fd10a 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing.ll
@@ -389,6 +389,8 @@ define void @static_16_align_8192(ptr %out) #0 {
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB13_4
; CHECK-NEXT: .LBB13_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -401,6 +403,7 @@ define void @static_16_align_8192(ptr %out) #0 {
; CHECK-NEXT: .LBB13_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB13_4: // %entry
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
206635a
to
6624c44
Compare
Thanks for finding that! How about just changing the final |
I believe we discussed something like this in an earlier revision of your PR, I like it better. Do we need to worry about optimizations removing it? Should we change other probes to use it? |
6624c44
to
24ff111
Compare
24ff111
to
93510a8
Compare
I accidentally closed #74806 If the dynamic allocation size is 0, then we will still probe the current sp value despite not decrementing sp! This results in overwriting stack data, in my case the stack canary. The fix here is just to load the value of [sp] into xzr which is essentially a no-op but still performs a read/probe of the new page.
This is my first attempt at fixing a subtle bug which has manifested as part of stack probing for AArch64. If the dynamic allocation size is 0, then we will still probe the current
sp
value despite not decrementingsp
! This results in overwriting stack data, in my case the stack canary.The fix here is the create a pretest condition which checks if
sp
==TargetReg
. If this is the case, we skip past the whole loop and probe, if not we dynamically probe as normal. This case adds 2 extra instructions, I spent some time trying to think of a more optimal solution, but I couldn't find something as compressed as the current loop.