Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized prolog/epilog for private linkage on X86 #2460

Merged
merged 1 commit into from
Jul 26, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 12 additions & 66 deletions runtime/compiler/x/codegen/X86PrivateLinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,17 +673,13 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor)
// Compute frame size
//
// allocSize: bytes to be subtracted from the stack pointer when allocating the frame
// frameSize: total bytes of stack the prologue consumes
// peakSize: maximum bytes of stack this method might consume before encountering another stack check
//
const int32_t localSize = _properties.getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor();
TR_ASSERT(localSize >= 0, "assertion failure");

// Note that the return address doesn't appear here because it is allocated by the call instruction
//
int32_t allocSize = localSize + preservedRegsSize
+ ( _properties.getReservesOutgoingArgsInPrologue()? outgoingArgSize : 0 );

{
int32_t frameSize = localSize + preservedRegsSize + ( _properties.getReservesOutgoingArgsInPrologue()? outgoingArgSize : 0 );
uint32_t stackSize = frameSize + _properties.getRetAddressWidth();
Expand All @@ -698,7 +694,7 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor)
_properties.getOutgoingArgAlignment(),
_properties.getRetAddressWidth());
}

auto allocSize = cg()->getFrameSizeInBytes();

// Here we conservatively assume there is a call in this method that will require space for its return address
const int32_t peakSize = allocSize + _properties.getPointerSize();
Expand Down Expand Up @@ -1001,19 +997,6 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor)
}
}

int32_t allocateSize = 0;
// Add paddings
if (cg()->getStackFramePaddingSizeInBytes())
{
allocateSize += cg()->getStackFramePaddingSizeInBytes();
}

if (allocateSize)
{
const TR_X86OpCodes subOp = (allocateSize <= 127)? SUBRegImms() : SUBRegImm4();
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocateSize, cg());
}

#if defined(DEBUG)
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
-localSize - preservedRegsSize - outgoingArgSize,
Expand Down Expand Up @@ -1049,19 +1032,9 @@ void TR::X86PrivateLinkage::createPrologue(TR::Instruction *cursor)
// for shrinkwrapping
bool TR::X86PrivateLinkage::needsFrameDeallocation()
{
// frame needs a deallocation if allocSize == 0
// frame needs a deallocation if FrameSize == 0
//
TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();
int32_t localSize = _properties.getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor();
int32_t allocSize = cg()->getFrameSizeInBytes();

if (!_properties.getAlwaysDedicateFramePointerRegister() &&
allocSize == 0)
{
return true;
}

return false;
return !_properties.getAlwaysDedicateFramePointerRegister() && cg()->getFrameSizeInBytes() == 0;
}

TR::Instruction *TR::X86PrivateLinkage::deallocateFrameIfNeeded(TR::Instruction *cursor, int32_t size)
Expand All @@ -1072,62 +1045,35 @@ TR::Instruction *TR::X86PrivateLinkage::deallocateFrameIfNeeded(TR::Instruction

void TR::X86PrivateLinkage::createEpilogue(TR::Instruction *cursor)
{
TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp);
TR::RealRegister *framePointer = machine()->getX86RealRegister(TR::RealRegister::vfp);
const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;
TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();

const int32_t localSize = _properties.getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor();
int32_t allocSize = cg()->getFrameSizeInBytes() - cg()->getStackFramePaddingSizeInBytes();
TR::RealRegister* espReal = machine()->getX86RealRegister(TR::RealRegister::esp);

cursor = cg()->generateDebugCounter(cursor, "cg.epilogues", 1, TR::DebugCounter::Expensive);

int32_t deallocateSize = 0;
// Deallocate padding
if (cg()->getStackFramePaddingSizeInBytes())
{
deallocateSize += cg()->getStackFramePaddingSizeInBytes();
if (comp()->getOption(TR_TraceCG))
{
traceMsg(comp(), "Bytes of stack frame padding to be deallocated %d\n", cg()->getStackFramePaddingSizeInBytes());
}
}

if (deallocateSize)
{
TR_X86OpCodes op = (deallocateSize <= 127) ? ADDRegImms() : ADDRegImm4();
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, op, espReal, deallocateSize, cg());
}

// Restore preserved regs
//
cursor = restorePreservedRegisters(cursor);

// Deallocate the stack frame
//
bool needsDeallocationForShrinkWrapping = false;
if (_properties.getAlwaysDedicateFramePointerRegister())
{
// Restore stack pointer from frame pointer
//
cursor = new (trHeapMemory()) TR::X86RegRegInstruction(cursor, MOVRegReg(), espReal, machine()->getX86RealRegister(_properties.getFramePointerRegister()), cg());
cursor = new (trHeapMemory()) TR::X86RegInstruction(cursor, POPReg, machine()->getX86RealRegister(_properties.getFramePointerRegister()), cg());
}
else if (allocSize == 0)
{
// No need to do anything
if (cg()->getShrinkWrappingDone())
needsDeallocationForShrinkWrapping = true;
cursor = generateRegRegInstruction(cursor, MOVRegReg(), espReal, machine()->getX86RealRegister(_properties.getFramePointerRegister()), cg());
cursor = generateRegInstruction(cursor, POPReg, machine()->getX86RealRegister(_properties.getFramePointerRegister()), cg());
}
else
{
TR_X86OpCodes op = (allocSize <= 127) ? ADDRegImms() : ADDRegImm4();
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, op, espReal, allocSize, cg());
auto frameSize = cg()->getFrameSizeInBytes();
if (frameSize != 0)
{
cursor = generateRegImmInstruction(cursor, (frameSize <= 127) ? ADDRegImms() : ADDRegImm4(), espReal, frameSize, cg());
}
}

if (cursor->getNext()->getOpCodeValue() == RETImm2)
{
toIA32ImmInstruction(cursor->getNext())->setSourceImmediate(bodySymbol->getNumParameterSlots() << getProperties().getParmSlotShift());
toIA32ImmInstruction(cursor->getNext())->setSourceImmediate(comp()->getJittedMethodSymbol()->getNumParameterSlots() << getProperties().getParmSlotShift());
}
}

Expand Down