From 61dab3c14d4bb6dd42e3dec640175620b3551eb3 Mon Sep 17 00:00:00 2001 From: Victor Ding Date: Mon, 16 Jul 2018 17:06:07 -0400 Subject: [PATCH] Remove deprecated outlined prolog/epilog logic X86 Outlined prolog/epilog logic has been long deprecated on X86, removing it. Signed-off-by: Victor Ding --- runtime/compiler/runtime/Runtime.cpp | 20 -- .../compiler/x/codegen/X86PrivateLinkage.cpp | 305 +++++------------- runtime/compiler/x/runtime/X86Codert.asm | 170 ---------- 3 files changed, 72 insertions(+), 423 deletions(-) diff --git a/runtime/compiler/runtime/Runtime.cpp b/runtime/compiler/runtime/Runtime.cpp index 0c9997322c2..385eefc8334 100644 --- a/runtime/compiler/runtime/Runtime.cpp +++ b/runtime/compiler/runtime/Runtime.cpp @@ -338,16 +338,6 @@ JIT_HELPER(interpreterSyncXMM0DStaticGlue); JIT_HELPER(methodHandleJ2IGlue); JIT_HELPER(methodHandleJ2I_unwrapper); -JIT_HELPER(outlinedPrologue_0preserved); -JIT_HELPER(outlinedPrologue_1preserved); -JIT_HELPER(outlinedPrologue_2preserved); -JIT_HELPER(outlinedPrologue_3preserved); -JIT_HELPER(outlinedPrologue_4preserved); -JIT_HELPER(outlinedPrologue_5preserved); -JIT_HELPER(outlinedPrologue_6preserved); -JIT_HELPER(outlinedPrologue_7preserved); -JIT_HELPER(outlinedPrologue_8preserved); - // -------------------------------------------------------------------------------- // IA32 // -------------------------------------------------------------------------------- @@ -1191,16 +1181,6 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP) SET(TR_methodHandleJ2IGlue, (void *)methodHandleJ2IGlue, TR_Helper); SET(TR_methodHandleJ2I_unwrapper, (void *)methodHandleJ2I_unwrapper, TR_Helper); - SET(TR_outlinedPrologue_0preserved, (void *)outlinedPrologue_0preserved, TR_Helper); - SET(TR_outlinedPrologue_1preserved, (void *)outlinedPrologue_1preserved, TR_Helper); - SET(TR_outlinedPrologue_2preserved, (void *)outlinedPrologue_2preserved, TR_Helper); - SET(TR_outlinedPrologue_3preserved, (void *)outlinedPrologue_3preserved, TR_Helper); - SET(TR_outlinedPrologue_4preserved, (void *)outlinedPrologue_4preserved, TR_Helper); - SET(TR_outlinedPrologue_5preserved, (void *)outlinedPrologue_5preserved, TR_Helper); - SET(TR_outlinedPrologue_6preserved, (void *)outlinedPrologue_6preserved, TR_Helper); - SET(TR_outlinedPrologue_7preserved, (void *)outlinedPrologue_7preserved, TR_Helper); - SET(TR_outlinedPrologue_8preserved, (void *)outlinedPrologue_8preserved, TR_Helper); - #else // AMD64 // -------------------------------- IA32 ------------------------------------ diff --git a/runtime/compiler/x/codegen/X86PrivateLinkage.cpp b/runtime/compiler/x/codegen/X86PrivateLinkage.cpp index b080aff8146..2dbc2f3869e 100644 --- a/runtime/compiler/x/codegen/X86PrivateLinkage.cpp +++ b/runtime/compiler/x/codegen/X86PrivateLinkage.cpp @@ -620,9 +620,6 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) TR_DebugFrameSegmentInfo *debugFrameSlotInfo=NULL; #endif - // Decide whether to use outlined prologue - // - bool disableOutlinedPrologue = !comp()->getOption(TR_EnableOutlinedPrologues); bool trace = comp()->getOption(TR_TraceCG); TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp); @@ -726,18 +723,6 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) _properties.getRetAddressWidth()); } - if (!disableOutlinedPrologue && comp()->getOptLevel() >= hot) - { - // If we're spending lots of time in this method, chances are its - // prologues don't matter because we spend so much time running the body, - // so there's no benefit to the extra path length that outlined prologues add. - // - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Disable because opt level is %s\n", comp()->getHotnessName()); - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, TR::DebugCounter::debugCounterName(comp(), "cg.prologues:refusedToOutline/optLevel/%s",comp()->getHotnessName()), 1, TR::DebugCounter::Undetermined); - } - uint32_t numLocals = localSize >> getProperties().getParmSlotShift(); uint32_t numRegsPreservedOOL = preservedRegsSize >> getProperties().getParmSlotShift(); uint32_t numPreservesOmitted = 0; @@ -766,114 +751,13 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) } } - bool outlinedPrologueWillPreserveRegisters = true; - if (!disableOutlinedPrologue && (preservationMask & (preservationMask+1)) != 0) - { - // Rather than try to communicate which regs to preserve, we'll preserve them inline. - // - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Disable because we're not preserving the first N preserved registers (mask=%x)\n", preservationMask); - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:refusedToOutline/discontiguousPreserves", 1, TR::DebugCounter::Undetermined); - } - - static char *outlinedPrologueSlotThresholdStr = feGetEnv("TR_outlinedPrologueSlotThreshold"); - static int32_t outlinedPrologueSlotThreshold = outlinedPrologueSlotThresholdStr? atoi(outlinedPrologueSlotThresholdStr) : 0; - if (!disableOutlinedPrologue && numRegsPreservedOOL < outlinedPrologueSlotThreshold) - { - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Disable because extra path length is not worthwhile for %d preserved regs (threshold=%d)\n", numRegsPreservedOOL, outlinedPrologueSlotThreshold); - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, TR::DebugCounter::debugCounterName(comp(), "cg.prologues:refusedToOutline/belowSlotThreshold/numPreserved=%d", numRegsPreservedOOL), 1, TR::DebugCounter::Undetermined); - } - // Here we conservatively assume there is a call in this method that will require space for its return address const int32_t peakSize = localSize + preservedRegsSize + outgoingArgSize + _properties.getPointerSize(); - if (!disableOutlinedPrologue && (peakSize - localSize - preservedRegsSize >= STACKCHECKBUFFER)) - { - // Too much stack space required beyond what outlined prologue will check for. - // The outlined overflow check is unsuitable. - // - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Disable because outlined overflow check is unsuitable for frames requiring %d bytes below the preserved registers\n", peakSize - localSize - preservedRegsSize); - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, TR::DebugCounter::debugCounterName(comp(), "cg.prologues:refusedToOutline/hugeOutgoingArgArea/numBytes=%d", outgoingArgSize), 1, TR::DebugCounter::Undetermined); - } - - if (!disableOutlinedPrologue && comp()->getOption(TR_FullSpeedDebug)) - { - // We haven't done the work to ensure FSD is compatible with outlined prologues. - // - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Not supported in FSD\n"); - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:refusedToOutline/FSD", 1, TR::DebugCounter::Undetermined); - } - - if (!disableOutlinedPrologue && (comp()->getOption(TR_PaintAllocatedFrameSlotsDead) || comp()->getOption(TR_PaintAllocatedFrameSlotsFauxObject))) - { - // Painting has to occur between buying the frame and saving preserved - // regs. Since both of these are done by the outlined-prologue helper, - // there's no correct place to do it if we're using the helper. - // - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Not supported with frame slot painting\n"); - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:refusedToOutline/slotPainting", 1, TR::DebugCounter::Undetermined); - } - - bool doOverflowCheck = !comp()->isDLT(); TR::Instruction *stackOverflowInstruction = NULL; -#ifdef TR_TARGET_64BIT - if (!disableOutlinedPrologue) - { - if (performTransformation(comp(), "O^O OUTLINED PROLOGUES: Outline with %d autos and %d regs preserved out of line\n", numLocals, numRegsPreservedOOL)) - { - // Ok, we're committed to outlining the prologue now - // - doOverflowCheck = false; // Outlined prologue does the overflow check - cg()->setPushPreservedRegisters(); // Outlined prologue doesn't allocate outgoing arg area, so it's acting like it's pushing preserved regs - uint32_t omittedPreservesSize = numPreservesOmitted * TR::Compiler->om.sizeofReferenceAddress(); - allocSize = localSize + omittedPreservesSize; // Stack pointer bump is done out of line, but it behaves as though it buys this much - - // If the stack overflow check fails, the outlined prologue code will dork the - // return address to point here - // - stackOverflowInstruction = cursor; - stackOverflowInstruction->setNeedsGCMap(); // Outlined prologue will set the return address to point here when calling jitStackOverflow - - // Outlined prologue takes a descriptor argument in r8. - // - // NOTE! Outlined prologue code knows the length of this instruction. - // If you modify it, fix the outlined prologue helper to match. - // - TR::RealRegister *descriptorReg = machine()->getX86RealRegister(TR::RealRegister::r8); // Must use r8 since we can do a stack overflow check and restart, and the overflow helper uses rdi - TR::RealRegister *spReg = machine()->getX86RealRegister(TR::RealRegister::esp); - intptrj_t descriptor = -(intptrj_t)(allocSize); - if (descriptor == 0) - { - cursor = generateRegRegInstruction(cursor, MOVRegReg(), descriptorReg, spReg, cg()); - } - else - { - cursor = generateRegMemInstruction(cursor, LEARegMem(), descriptorReg, - generateX86MemoryReference(spReg, -(intptrj_t)(allocSize), cg()), cg()); - } - } - else - { - disableOutlinedPrologue = true; - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:refusedToOutline/performTransformation", 1, TR::DebugCounter::Undetermined); - if (trace) - traceMsg(comp(), "OUTLINED PROLOGUES: Disabled by performTransformation\n"); - } - } -#endif - // Small: entire stack usage fits in STACKCHECKBUFFER, so if sp is within // the soft limit before buying the frame, then the whole frame will fit // within the hard limit. @@ -892,10 +776,9 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) if (trace) { - traceMsg(comp(), "\nFrame size: %c%c%c locals=%d frame=%d peak=%d%s\n", + traceMsg(comp(), "\nFrame size: %c%c%c locals=%d frame=%d peak=%d\n", frameIsSmall? 'S':'-', frameIsMedium? 'M':'-', frameIsLarge? 'L':'-', - localSize, cg()->getFrameSizeInBytes(), peakSize, - disableOutlinedPrologue? "" : " using outlined prologue"); + localSize, cg()->getFrameSizeInBytes(), peakSize); } #if defined(DEBUG) @@ -965,7 +848,6 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) if (frameIsLarge && doOverflowCheck) { TR_ASSERT(minInstructionSize <= 5, "Can't guarantee LEA instruction will be at least %d bytes", minInstructionSize); - TR_ASSERT(disableOutlinedPrologue, "Frame using outlined prologue should never qualify as large"); // For large frames, there are no shortcuts. Explicitly compute the // maximum extent of the stack pointer and make sure there's enough @@ -978,7 +860,7 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) minInstructionSize = 0; // The LEA satisfies the constraint } - doAllocateFrameSpeculatively = frameIsMedium && disableOutlinedPrologue; + doAllocateFrameSpeculatively = frameIsMedium; if (doAllocateFrameSpeculatively) { @@ -1045,133 +927,90 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor) bodySymbol->setProloguePushSlots(preservedRegsSize / properties.getPointerSize()); - if (disableOutlinedPrologue) + // + // Inline prologue logic + // + TR::Instruction *outlineablePortionStart = cursor; + + // Allocate the stack frame + // + if (allocSize == 0) { - // - // Inline prologue logic - // - TR::Instruction *outlineablePortionStart = cursor; + // No need to do anything + } + else if (!doAllocateFrameSpeculatively) + { + TR_ASSERT(minInstructionSize <= 5, "Can't guarantee SUB instruction will be at least %d bytes", minInstructionSize); + const TR_X86OpCodes subOp = (allocSize <= 127 && getMinimumFirstInstructionSize() <= 3)? SUBRegImms() : SUBRegImm4(); + cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocSize, cg()); + } - // Allocate the stack frame + //Support to paint allocated frame slots. + // + if (( comp()->getOption(TR_PaintAllocatedFrameSlotsDead) || comp()->getOption(TR_PaintAllocatedFrameSlotsFauxObject) ) && allocSize!=0) + { + uint32_t paintValue32 = 0; + uint64_t paintValue64 = 0; + + TR::RealRegister *paintReg = NULL; + TR::RealRegister *frameSlotIndexReg = machine()->getX86RealRegister(TR::RealRegister::edi); + uint32_t paintBound = 0; + uint32_t paintSlotsOffset = 0; + uint32_t paintSize = allocSize-sizeof(uintptrj_t); + + //Paint the slots with deadf00d // - if (allocSize == 0) + if (comp()->getOption(TR_PaintAllocatedFrameSlotsDead)) { - // No need to do anything + if (TR::Compiler->target.is64Bit()) + paintValue64 = (uint64_t)CONSTANT64(0xdeadf00ddeadf00d); + else + paintValue32 = 0xdeadf00d; } - else if (!doAllocateFrameSpeculatively) + //Paint stack slots with a arbitrary object aligned address. + // + else { - TR_ASSERT(minInstructionSize <= 5, "Can't guarantee SUB instruction will be at least %d bytes", minInstructionSize); - const TR_X86OpCodes subOp = (allocSize <= 127 && getMinimumFirstInstructionSize() <= 3)? SUBRegImms() : SUBRegImm4(); - cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocSize, cg()); + if (TR::Compiler->target.is64Bit()) + { + paintValue64 = ((uintptrj_t) ((uintptrj_t)comp()->getOptions()->getHeapBase() + (uintptrj_t) 4096)); + } + else + { + paintValue32 = ((uintptrj_t) ((uintptrj_t)comp()->getOptions()->getHeapBase() + (uintptrj_t) 4096)); + } } - //Support to paint allocated frame slots. - // - if (( comp()->getOption(TR_PaintAllocatedFrameSlotsDead) || comp()->getOption(TR_PaintAllocatedFrameSlotsFauxObject) ) && allocSize!=0) - { - uint32_t paintValue32 = 0; - uint64_t paintValue64 = 0; - - TR::RealRegister *paintReg = NULL; - TR::RealRegister *frameSlotIndexReg = machine()->getX86RealRegister(TR::RealRegister::edi); - uint32_t paintBound = 0; - uint32_t paintSlotsOffset = 0; - uint32_t paintSize = allocSize-sizeof(uintptrj_t); - - //Paint the slots with deadf00d - // - if (comp()->getOption(TR_PaintAllocatedFrameSlotsDead)) - { - if (TR::Compiler->target.is64Bit()) - paintValue64 = (uint64_t)CONSTANT64(0xdeadf00ddeadf00d); - else - paintValue32 = 0xdeadf00d; - } - //Paint stack slots with a arbitrary object aligned address. - // - else - { - if (TR::Compiler->target.is64Bit()) - { - paintValue64 = ((uintptrj_t) ((uintptrj_t)comp()->getOptions()->getHeapBase() + (uintptrj_t) 4096)); - } - else - { - paintValue32 = ((uintptrj_t) ((uintptrj_t)comp()->getOptions()->getHeapBase() + (uintptrj_t) 4096)); - } - } - - TR::LabelSymbol *startLabel = generateLabelSymbol(cg()); - - //Load the 64 bit paint value into a paint reg. + TR::LabelSymbol *startLabel = generateLabelSymbol(cg()); + + //Load the 64 bit paint value into a paint reg. #ifdef TR_TARGET_64BIT - paintReg = machine()->getX86RealRegister(TR::RealRegister::r8); - cursor = new (trHeapMemory()) TR::AMD64RegImm64Instruction(cursor, MOV8RegImm64, paintReg, paintValue64, cg()); + paintReg = machine()->getX86RealRegister(TR::RealRegister::r8); + cursor = new (trHeapMemory()) TR::AMD64RegImm64Instruction(cursor, MOV8RegImm64, paintReg, paintValue64, cg()); #endif - //Perform the paint. - // - cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, MOVRegImm4(), frameSlotIndexReg, paintSize, cg()); - cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, LABEL, startLabel, cg()); - if (TR::Compiler->target.is64Bit()) - cursor = new (trHeapMemory()) TR::X86MemRegInstruction(cursor, S8MemReg, generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintReg, cg()); - else - cursor = new (trHeapMemory()) TR::X86MemImmInstruction(cursor, SMemImm4(), generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintValue32, cg()); - cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, SUBRegImms(), frameSlotIndexReg, sizeof(intptr_t),cg()); - cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, CMPRegImm4(), frameSlotIndexReg, paintBound, cg()); - cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, JGE4, startLabel,cg()); - } - - // Save preserved regs - // - cursor = savePreservedRegisters(cursor); - - // Insert some counters - // - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#preserved", preservedRegsSize >> getProperties().getParmSlotShift(), TR::DebugCounter::Expensive); - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:inline", 1, TR::DebugCounter::Expensive); - } - else - { - // Outlined prologue logic + //Perform the paint. // - static const TR_RuntimeHelper outlinedPrologueHelpers[] = - { - TR_outlinedPrologue_0preserved, - TR_outlinedPrologue_1preserved, - TR_outlinedPrologue_2preserved, - TR_outlinedPrologue_3preserved, - TR_outlinedPrologue_4preserved, - TR_outlinedPrologue_5preserved, - TR_outlinedPrologue_6preserved, - TR_outlinedPrologue_7preserved, - TR_outlinedPrologue_8preserved, - }; - - cursor = generateHelperCallInstruction(cursor, outlinedPrologueHelpers[numRegsPreservedOOL], cg()); - - int32_t numSlotsPushedOutOfLime = numRegsPreservedOOL; - cursor = new (cg()->trHeapMemory()) TR::X86VFPCallCleanupInstruction(cursor, allocSize + numSlotsPushedOutOfLime * getProperties().getParmSlotSize(), cg()); - - if (outlinedPrologueWillPreserveRegisters) - { - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:outlined", 1, TR::DebugCounter::Undetermined); - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:outlined:#preservesOOL", numRegsPreservedOOL, TR::DebugCounter::Undetermined); - cursor = cg()->generateDebugCounter(cursor, "cg.prologues:outlined:#preservesOmitted", numPreservesOmitted, TR::DebugCounter::Undetermined); - } + cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, MOVRegImm4(), frameSlotIndexReg, paintSize, cg()); + cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, LABEL, startLabel, cg()); + if (TR::Compiler->target.is64Bit()) + cursor = new (trHeapMemory()) TR::X86MemRegInstruction(cursor, S8MemReg, generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintReg, cg()); else - { - cursor = cg()->generateDebugCounter(cursor, TR::DebugCounter::debugCounterName(comp(), "cg.prologues:outlined/inlinePushes/numPreserved=%d", numRegsPreservedOOL), 1, TR::DebugCounter::Undetermined); - cursor = savePreservedRegisters(cursor); // Have to do it inline - } - if (atlas && atlas->getInternalPointerMap()) - { - // These are a hassle to disable - //cursor = cg()->generateDebugCounter(cursor, "cg.prologues:outlined:withInternalPointers", 1, TR::DebugCounter::Moderate); - //cursor = cg()->generateDebugCounter(cursor, "cg.prologues:outlined:withInternalPointers:#initializedSlots", numLocals, TR::DebugCounter::Moderate); - } + cursor = new (trHeapMemory()) TR::X86MemImmInstruction(cursor, SMemImm4(), generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintValue32, cg()); + cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, SUBRegImms(), frameSlotIndexReg, sizeof(intptr_t),cg()); + cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, CMPRegImm4(), frameSlotIndexReg, paintBound, cg()); + cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, JGE4, startLabel,cg()); } + // Save preserved regs + // + cursor = savePreservedRegisters(cursor); + + // Insert some counters + // + cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#preserved", preservedRegsSize >> getProperties().getParmSlotShift(), TR::DebugCounter::Expensive); + cursor = cg()->generateDebugCounter(cursor, "cg.prologues:inline", 1, TR::DebugCounter::Expensive); + // Initialize any local pointers that could otherwise confuse the GC. // TR::RealRegister *framePointer = machine()->getX86RealRegister(TR::RealRegister::vfp); diff --git a/runtime/compiler/x/runtime/X86Codert.asm b/runtime/compiler/x/runtime/X86Codert.asm index 1917a571041..285b3891096 100644 --- a/runtime/compiler/x/runtime/X86Codert.asm +++ b/runtime/compiler/x/runtime/X86Codert.asm @@ -158,179 +158,9 @@ endif public jitFPHelpersBegin public jitFPHelpersEnd - public outlinedPrologue_0preserved - public outlinedPrologue_1preserved - public outlinedPrologue_2preserved - public outlinedPrologue_3preserved - public outlinedPrologue_4preserved - public outlinedPrologue_5preserved - public outlinedPrologue_6preserved - public outlinedPrologue_7preserved - public outlinedPrologue_8preserved - public outlinedEpilogue - - ExternHelper jitStackOverflow - align 16 jitFPHelpersBegin: - -ifdef TR_HOST_64BIT - - align 16 -outlinedPrologue_0preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - push [r8] ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_1preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_2preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_3preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r10 - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_4preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r11 - push r10 - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_5preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r12 - push r11 - push r10 - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_6preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r13 - push r12 - push r11 - push r10 - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_7preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r14 - push r13 - push r12 - push r11 - push r10 - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - - align 16 -outlinedPrologue_8preserved: - cmp r8, [rbp + J9TR_VMThread_stackOverflowMark] - jbe outlinedStackOverflow ; Overflow check - xchg r8, rsp ; Buy the autos - mov r8, [r8] ; Save prologue return address - push r15 - push r14 - push r13 - push r12 - push r11 - push r10 - push r9 - push rbx - push r8 ; Re-push return address - ret ; Use ret to get proper return address branch prediction - -outlinedStackOverflow: - lea rdi, [rsp+8] ; r8's location includes space for the return address; if we bump rdi too, then RA space will be negated when we subtract them - sub rdi, r8 ; Negative of displacement in LEA instruction - je outlinedStackOverflow3 ; No displacement field at all - cmp rdi, 128 ; rdi contains the negative of the displacement field, which can accomodate -128 - jle outlinedStackOverflow2 ; Check whether 8-bit displacement field suffices -longLEA: - sub qword ptr [rsp], 3 ; Displacement field in the LEA will actually be 4 bytes instead of 1 -outlinedStackOverflow2: - sub qword ptr [rsp], 2 ; It's not a mov r8, rsp; it's a lea r8, [rsp+disp8] which is 2 bytes longer -outlinedStackOverflow3: - sub qword ptr [rsp], 8 ; Back up return address to re-execute the mov r8,rsp (3 bytes) - ; and call (5 bytes) to outlined prologue after stack overflow check returns - add rdi, 64 ; Conservatively assume we need 64 bytes for preserved regs - ; Note r8 doesn't matter at this point, because when we re-execute the prologue, it will get the proper value - jmp jitStackOverflow ; Will return to "call outlinedPrologue" instruction in prologue - -else ; !TR_HOST_64BIT - -outlinedPrologue_0preserved: -outlinedPrologue_1preserved: -outlinedPrologue_2preserved: -outlinedPrologue_3preserved: -outlinedPrologue_4preserved: -outlinedPrologue_5preserved: -outlinedPrologue_6preserved: -outlinedPrologue_7preserved: -outlinedPrologue_8preserved: - int 3 ; Not yet used - -endif - -outlinedEpilogue proc - int 3 ; Not yet used -outlinedEpilogue endp - - - ifdef TR_HOST_32BIT ; _doubleToInt