diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index c219f79b96ea01..53e0a581762d4a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4276,7 +4276,7 @@ AArch64FrameLowering::inlineStackProbeLoopExactMultiple( return ExitMBB->begin(); } -MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeFixed( +void AArch64FrameLowering::inlineStackProbeFixed( MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize, StackOffset CFAOffset) const { MachineBasicBlock *MBB = MBBI->getParent(); @@ -4353,54 +4353,35 @@ MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeFixed( .setMIFlags(MachineInstr::FrameSetup); } } - - MachineBasicBlock::iterator Next = std::next(MBBI); - return Next; -} - -MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeFixed( - MachineBasicBlock::iterator MBBI) const { - - Register ScratchReg = MBBI->getOperand(0).getReg(); - int64_t FrameSize = MBBI->getOperand(1).getImm(); - StackOffset CFAOffset = StackOffset::get(MBBI->getOperand(2).getImm(), - MBBI->getOperand(3).getImm()); - - MachineBasicBlock::iterator NextInst = - inlineStackProbeFixed(MBBI, ScratchReg, FrameSize, CFAOffset); - - MBBI->eraseFromParent(); - return NextInst; -} - -MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeVar( - MachineBasicBlock::iterator MBBI) const { - MachineBasicBlock &MBB = *MBBI->getParent(); - MachineFunction &MF = *MBB.getParent(); - const AArch64InstrInfo *TII = - MF.getSubtarget().getInstrInfo(); - - DebugLoc DL = MBB.findDebugLoc(MBBI); - Register TargetReg = MBBI->getOperand(0).getReg(); - - MachineBasicBlock::iterator NextInst = - TII->probedStackAlloc(MBBI, TargetReg, true); - - MBBI->eraseFromParent(); - return NextInst; } void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &MBB) const { - for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - if (MBBI->getOpcode() == AArch64::PROBED_STACKALLOC) { - MBBI = inlineStackProbeFixed(MBBI); - E = MBBI->getParent()->end(); - } else if (MBBI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR) { - MBBI = inlineStackProbeVar(MBBI); - E = MBBI->getParent()->end(); + // Get the instructions that need to be replaced. We emit at most two of + // these. Remember them in order to avoid complications coming from the need + // to traverse the block while potentially creating more blocks. + SmallVector ToReplace; + for (MachineInstr &MI : MBB) + if (MI.getOpcode() == AArch64::PROBED_STACKALLOC || + MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR) + ToReplace.push_back(&MI); + + for (MachineInstr *MI : ToReplace) { + if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) { + Register ScratchReg = MI->getOperand(0).getReg(); + int64_t FrameSize = MI->getOperand(1).getImm(); + StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(), + MI->getOperand(3).getImm()); + inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize, + CFAOffset); } else { - ++MBBI; + assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR && + "Stack probe pseudo-instruction expected"); + const AArch64InstrInfo *TII = + MI->getMF()->getSubtarget().getInstrInfo(); + Register TargetReg = MI->getOperand(0).getReg(); + (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true); } + MI->eraseFromParent(); } } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index f0c3106cb7017b..941af03a78b738 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -164,15 +164,9 @@ class AArch64FrameLowering : public TargetFrameLowering { void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override; - MachineBasicBlock::iterator - inlineStackProbeFixed(MachineBasicBlock::iterator MBBI, Register ScratchReg, - int64_t FrameSize, StackOffset CFAOffset) const; - - MachineBasicBlock::iterator - inlineStackProbeFixed(MachineBasicBlock::iterator MBBI) const; - - MachineBasicBlock::iterator - inlineStackProbeVar(MachineBasicBlock::iterator MBBI) const; + void inlineStackProbeFixed(MachineBasicBlock::iterator MBBI, + Register ScratchReg, int64_t FrameSize, + StackOffset CFAOffset) const; MachineBasicBlock::iterator inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI, diff --git a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir new file mode 100644 index 00000000000000..6c8ec7e4c4fa92 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir @@ -0,0 +1,146 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -run-pass=prologepilog %s -o - | FileCheck %s +# Regression test for a crash when the probing instruction +# to replace is last in the block. +--- | + source_filename = "tt.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux" + + declare i1 @g(ptr) + + define void @f(ptr %out) #0 { + entry: + %p = alloca i32, i32 50000, align 4 + br label %loop + + loop: ; preds = %loop, %entry + %c = call i1 @g(ptr %p) + br i1 %c, label %loop, label %exit + + exit: ; preds = %loop + ret void + } + + attributes #0 = { uwtable "frame-pointer"="none" "probe-stack"="inline-asm" "target-features"="+sve" } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 200000 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: p, type: default, offset: 0, size: 200000, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -200000, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $lr, $fp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 48, 12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 196624 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12 + ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv + ; CHECK-NEXT: frame-setup Bcc 1, %bb.3, implicit $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 3392, 0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 200016 + ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.loop: + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x0 = ADDXri $sp, 0, 0 + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: TBNZW killed renamable $w0, 0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.exit: + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 48, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 3408 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 3392, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET_ReallyLR + bb.0.entry: + successors: %bb.1(0x80000000) + + + bb.1.loop: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = ADDXri %stack.0.p, 0, 0 + BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + TBNZW killed renamable $w0, 0, %bb.1 + B %bb.2 + + bb.2.exit: + RET_ReallyLR + +...