Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle branch/jump in branch delay slots more accurately #15957

Merged
merged 4 commits into from
Sep 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 84 additions & 40 deletions Core/MIPS/ARM/ArmCompBranch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
#define LOOPOPTIMIZATION 0

// We can disable nice delay slots.
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false;
#define CONDITIONAL_NICE_DELAYSLOT ;

using namespace MIPSAnalyst;
Expand All @@ -76,9 +76,13 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;

BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;

bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
Expand Down Expand Up @@ -112,22 +116,20 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
return;
}

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);

u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
FlushAll();

const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);

// We might be able to flip the condition (EQ/NEQ are easy.)
Expand Down Expand Up @@ -156,23 +158,34 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)

ArmGen::FixupBranch ptr;
if (!likely) {
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
} else {
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}

if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}

// Take the branch
WriteExit(targetAddr, js.nextExit++);

SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(notTakenTarget, js.nextExit++);
}

js.compiling = false;
Expand All @@ -189,9 +202,13 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;

BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;

bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
Expand Down Expand Up @@ -231,11 +248,9 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
return;
}

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);

u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (andLink)
Expand All @@ -245,10 +260,10 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
else
FlushAll();

const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);

gpr.MapReg(rs);
Expand All @@ -260,7 +275,7 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
Expand All @@ -270,15 +285,26 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}

if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}

// Take the branch
WriteExit(targetAddr, js.nextExit++);

SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(notTakenTarget, js.nextExit++);
}
js.compiling = false;
}
Expand Down Expand Up @@ -335,11 +361,12 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)

js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);

gpr.MapReg(MIPS_REG_FPCOND);
Expand All @@ -348,7 +375,7 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
Expand All @@ -358,15 +385,26 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}

if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}

// Take the branch
WriteExit(targetAddr, js.nextExit++);

SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
js.compiling = false;
}

Expand Down Expand Up @@ -394,19 +432,16 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);

BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)

js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC());

int imm3 = (op >> 18) & 7;

Expand All @@ -417,7 +452,7 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
Expand All @@ -427,18 +462,27 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
{
FlushAll();
ptr = B_CC(cc);
if (!delaySlotIsBranch)
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;

if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}

// Take the branch
WriteExit(targetAddr, js.nextExit++);

SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, js.nextExit++);
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
js.compiling = false;
}

Expand Down
Loading