From d08ee44cf462211d84d88cfd4e54f4b7c676b2bc Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 3 Sep 2022 19:05:31 -0700 Subject: [PATCH 1/4] irjit: Handle branch/jump in branch delay slots. See #15952 for more detail. --- Core/MIPS/IR/IRCompBranch.cpp | 99 +++++++++++++++++++++++++++++------ 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 79d170031a0a..a8bbdc8f9c9b 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -54,6 +54,27 @@ using namespace MIPSAnalyst; namespace MIPSComp { +// This seems to be the same for all branch types. +static u32 ResolveNotTakenTarget(u32 pc, const MIPSOpcode &op, bool likely, const MIPSOpcode &delaySlotOp, const MIPSInfo &delaySlotInfo) { + u32 notTakenTarget = pc + 8; + if ((delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0) { + // If a branch has a j/jr/jal/jalr as a delay slot, that is run if the branch is not taken. + // TODO: Technically, in the likely case, we should somehow suppress andLink on this exit. + bool isJump = (delaySlotInfo & IS_JUMP) != 0; + // If the delay slot is a branch, likely skips it. + if (isJump || !likely) + notTakenTarget -= 4; + + // For a branch (not a jump), it actually should try the delay slot and take its target potentially. + // This is similar to the VFPU case and has not been seen, so just report it. + if (!isJump && SignExtend16ToU32(delaySlotOp) != SignExtend16ToU32(op) - 1) + ERROR_LOG_REPORT(JIT, "Branch in branch delay slot at %08x with different target", pc); + if (isJump && likely && (delaySlotInfo & (OUT_RA | OUT_RD)) != 0) + ERROR_LOG_REPORT(JIT, "Jump in likely branch delay slot with link at %08x", pc); + } + return notTakenTarget; +} + void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); @@ -65,7 +86,9 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { u32 targetAddr = GetCompilerPC() + offset + 4; MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); + bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); // Often, div/divu are followed by a likely "break" if the divisor was zero. @@ -91,7 +114,9 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { } } - if (!likely) + u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); + + if (!likely && !delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; @@ -99,10 +124,18 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { js.downcountAmount = 0; FlushAll(); - ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs); + ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), lhs, rhs); // This makes the block "impure" :( - if (likely) + if (likely && !delaySlotIsBranch) CompileDelaySlot(); + if (delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((delaySlotInfo & OUT_RA) != 0) + ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); + if ((delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + } FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); @@ -122,7 +155,9 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, u32 targetAddr = GetCompilerPC() + offset + 4; MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); + bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); MIPSGPReg lhs = rs; @@ -133,7 +168,9 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, if (andLink) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8); - if (!likely) + u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); + + if (!likely && !delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; @@ -141,9 +178,18 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, js.downcountAmount = 0; FlushAll(); - ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs); - if (likely) + ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), lhs); + if (likely && !delaySlotIsBranch) CompileDelaySlot(); + if (delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((delaySlotInfo & OUT_RA) != 0) + ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); + if ((delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + } + // Taken FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); @@ -199,20 +245,35 @@ void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; + MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); + bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; + ir.Write(IROp::FpCondToReg, IRTEMP_LHS); - if (!likely) + if (!likely && !delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount)); js.downcountAmount = 0; + u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); + FlushAll(); // Not taken - ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_LHS, 0); + ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0); // Taken - if (likely) + if (likely && !delaySlotIsBranch) CompileDelaySlot(); + if (delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((delaySlotInfo & OUT_RA) != 0) + ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); + if ((delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + } + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); @@ -243,33 +304,39 @@ void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 targetAddr = GetCompilerPC() + offset + 4; MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC); // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. - bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp); - if (!likely) + bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; + if (!likely && !delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount)); js.downcountAmount = 0; - if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) - ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC()); + u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); int imm3 = (op >> 18) & 7; - u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); - ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3)); FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0); - if (likely) + if (likely && !delaySlotIsBranch) CompileDelaySlot(); + if (delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((delaySlotInfo & OUT_RA) != 0) + ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); + if ((delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + } // Taken FlushAll(); From bac36df453d3a80d93a01eae6806ff085a2e8559 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 3 Sep 2022 19:58:46 -0700 Subject: [PATCH 2/4] x86jit: Refactor and fix jump in branch delay slot. This seems cleaner, instead of the duplication of lines for each branch type. --- Core/MIPS/IR/IRCompBranch.cpp | 122 +++++++++++------------------- Core/MIPS/JitCommon/JitCommon.cpp | 31 +++++++- Core/MIPS/JitCommon/JitCommon.h | 17 +++++ Core/MIPS/x86/CompBranch.cpp | 118 ++++++++++++++++++----------- Core/MIPS/x86/Jit.h | 4 +- 5 files changed, 165 insertions(+), 127 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index a8bbdc8f9c9b..c21e2d6b31fa 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -54,27 +54,6 @@ using namespace MIPSAnalyst; namespace MIPSComp { -// This seems to be the same for all branch types. -static u32 ResolveNotTakenTarget(u32 pc, const MIPSOpcode &op, bool likely, const MIPSOpcode &delaySlotOp, const MIPSInfo &delaySlotInfo) { - u32 notTakenTarget = pc + 8; - if ((delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0) { - // If a branch has a j/jr/jal/jalr as a delay slot, that is run if the branch is not taken. - // TODO: Technically, in the likely case, we should somehow suppress andLink on this exit. - bool isJump = (delaySlotInfo & IS_JUMP) != 0; - // If the delay slot is a branch, likely skips it. - if (isJump || !likely) - notTakenTarget -= 4; - - // For a branch (not a jump), it actually should try the delay slot and take its target potentially. - // This is similar to the VFPU case and has not been seen, so just report it. - if (!isJump && SignExtend16ToU32(delaySlotOp) != SignExtend16ToU32(op) - 1) - ERROR_LOG_REPORT(JIT, "Branch in branch delay slot at %08x with different target", pc); - if (isJump && likely && (delaySlotInfo & (OUT_RA | OUT_RD)) != 0) - ERROR_LOG_REPORT(JIT, "Jump in likely branch delay slot with link at %08x", pc); - } - return notTakenTarget; -} - void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); @@ -85,17 +64,16 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs); + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); // Often, div/divu are followed by a likely "break" if the divisor was zero. // Stalling is not really useful for us, so we optimize this out. - if (likely && offset == 4 && MIPS_IS_BREAK(delaySlotOp)) { + if (likely && offset == 4 && MIPS_IS_BREAK(branchInfo.delaySlotOp)) { // Okay, let's not actually branch at all. We're done here. - EatInstruction(delaySlotOp); + EatInstruction(branchInfo.delaySlotOp); // Let's not double-count the downcount, though. js.downcountAmount--; return; @@ -103,7 +81,7 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { MIPSGPReg lhs = rs; MIPSGPReg rhs = rt; - if (!delaySlotIsNice && !likely) { // if likely, we don't need this + if (!branchInfo.delaySlotIsNice && !likely) { // if likely, we don't need this if (rs != 0) { ir.Write(IROp::Mov, IRTEMP_LHS, rs); lhs = (MIPSGPReg)IRTEMP_LHS; @@ -114,9 +92,7 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { } } - u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); - - if (!likely && !delaySlotIsBranch) + if (!likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; @@ -124,17 +100,17 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { js.downcountAmount = 0; FlushAll(); - ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), lhs, rhs); + ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), lhs, rhs); // This makes the block "impure" :( - if (likely && !delaySlotIsBranch) + if (likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); - if (delaySlotIsBranch) { + if (branchInfo.delaySlotIsBranch) { // We still link when the branch is taken (targetAddr case.) // Remember, it's from the perspective of the delay slot, so +12. - if ((delaySlotInfo & OUT_RA) != 0) + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); - if ((delaySlotInfo & OUT_RD) != 0) - ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); } FlushAll(); @@ -154,23 +130,20 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs); + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); MIPSGPReg lhs = rs; - if (!delaySlotIsNice) { // if likely, we don't need this + if (!branchInfo.delaySlotIsNice) { // if likely, we don't need this ir.Write(IROp::Mov, IRTEMP_LHS, rs); lhs = (MIPSGPReg)IRTEMP_LHS; } if (andLink) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8); - u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); - - if (!likely && !delaySlotIsBranch) + if (!likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; @@ -178,16 +151,16 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, js.downcountAmount = 0; FlushAll(); - ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), lhs); - if (likely && !delaySlotIsBranch) + ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), lhs); + if (likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); - if (delaySlotIsBranch) { + if (branchInfo.delaySlotIsBranch) { // We still link when the branch is taken (targetAddr case.) // Remember, it's from the perspective of the delay slot, so +12. - if ((delaySlotInfo & OUT_RA) != 0) + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); - if ((delaySlotInfo & OUT_RD) != 0) - ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); } // Taken @@ -245,33 +218,29 @@ void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); - bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); ir.Write(IROp::FpCondToReg, IRTEMP_LHS); - if (!likely && !delaySlotIsBranch) + if (!likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount)); js.downcountAmount = 0; - u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); - FlushAll(); // Not taken - ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0); + ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), IRTEMP_LHS, 0); // Taken - if (likely && !delaySlotIsBranch) + if (likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); - if (delaySlotIsBranch) { + if (branchInfo.delaySlotIsBranch) { // We still link when the branch is taken (targetAddr case.) // Remember, it's from the perspective of the delay slot, so +12. - if ((delaySlotInfo & OUT_RA) != 0) + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); - if ((delaySlotInfo & OUT_RD) != 0) - ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); } FlushAll(); @@ -303,39 +272,36 @@ void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC); // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. - bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; - if (!likely && !delaySlotIsBranch) + if (!likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount)); js.downcountAmount = 0; - u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo); - int imm3 = (op >> 18) & 7; ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3)); FlushAll(); - ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0); + ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), IRTEMP_LHS, 0); - if (likely && !delaySlotIsBranch) + if (likely && !branchInfo.delaySlotIsBranch) CompileDelaySlot(); - if (delaySlotIsBranch) { + if (branchInfo.delaySlotIsBranch) { // We still link when the branch is taken (targetAddr case.) // Remember, it's from the perspective of the delay slot, so +12. - if ((delaySlotInfo & OUT_RA) != 0) + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12); - if ((delaySlotInfo & OUT_RD) != 0) - ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); } // Taken diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index 6e5e1e4ee4b7..f85e7df74143 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -22,6 +22,7 @@ #include "ext/disarm.h" #include "ext/udis86/udis86.h" +#include "Common/LogReporting.h" #include "Common/StringUtils.h" #include "Common/Serialize/Serializer.h" #include "Common/Serialize/SerializeFuncs.h" @@ -29,9 +30,11 @@ #include "Core/Util/DisArm64.h" #include "Core/Config.h" +#include "Core/MIPS/IR/IRJit.h" #include "Core/MIPS/JitCommon/JitCommon.h" #include "Core/MIPS/JitCommon/JitState.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSTables.h" #if PPSSPP_ARCH(ARM) #include "../ARM/ArmJit.h" @@ -67,6 +70,32 @@ namespace MIPSComp { } } + BranchInfo::BranchInfo(u32 pc, MIPSOpcode o, MIPSOpcode delayO, bool al, bool l) + : compilerPC(pc), op(o), delaySlotOp(delayO), likely(l), andLink(al) { + delaySlotInfo = MIPSGetInfo(delaySlotOp).value; + delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0; + } + + u32 ResolveNotTakenTarget(const BranchInfo &branchInfo) { + u32 notTakenTarget = branchInfo.compilerPC + 8; + if ((branchInfo.delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0) { + // If a branch has a j/jr/jal/jalr as a delay slot, that is run if the branch is not taken. + // TODO: Technically, in the likely case, we should somehow suppress andLink on this exit. + bool isJump = (branchInfo.delaySlotInfo & IS_JUMP) != 0; + // If the delay slot is a branch, likely skips it. + if (isJump || !branchInfo.likely) + notTakenTarget -= 4; + + // For a branch (not a jump), it actually should try the delay slot and take its target potentially. + // This is similar to the VFPU case and has not been seen, so just report it. + if (!isJump && SignExtend16ToU32(branchInfo.delaySlotOp) != SignExtend16ToU32(branchInfo.op) - 1) + ERROR_LOG_REPORT(JIT, "Branch in branch delay slot at %08x with different target", branchInfo.compilerPC); + if (isJump && branchInfo.likely && (branchInfo.delaySlotInfo & (OUT_RA | OUT_RD)) != 0) + ERROR_LOG_REPORT(JIT, "Jump in likely branch delay slot with link at %08x", branchInfo.compilerPC); + } + return notTakenTarget; +} + JitInterface *CreateNativeJit(MIPSState *mipsState) { #if PPSSPP_ARCH(ARM) return new MIPSComp::ArmJit(mipsState); diff --git a/Core/MIPS/JitCommon/JitCommon.h b/Core/MIPS/JitCommon/JitCommon.h index 87eee83a9fe7..f1015911541a 100644 --- a/Core/MIPS/JitCommon/JitCommon.h +++ b/Core/MIPS/JitCommon/JitCommon.h @@ -153,6 +153,23 @@ namespace MIPSComp { typedef void (MIPSFrontendInterface::*MIPSCompileFunc)(MIPSOpcode opcode); typedef int (MIPSFrontendInterface::*MIPSReplaceFunc)(); + struct BranchInfo { + BranchInfo(u32 pc, MIPSOpcode op, MIPSOpcode delaySlotOp, bool andLink, bool likely); + + u32 compilerPC; + MIPSOpcode op; + MIPSOpcode delaySlotOp; + u64 delaySlotInfo; + bool likely; + bool andLink; + // Update manually if it's not always nice (rs/rt, rs/zero, etc.) + bool delaySlotIsNice = true; + bool delaySlotIsBranch; + }; + + // This seems to be the same for all branch types. + u32 ResolveNotTakenTarget(const BranchInfo &branchInfo); + extern JitInterface *jit; extern std::recursive_mutex jitLock; diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index 9bd167d2e229..276c04995588 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -63,7 +63,7 @@ using namespace MIPSAnalyst; #define DO_CONDITIONAL_LOG 0 // We can also disable nice delay slots. -// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false; +// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false; #define CONDITIONAL_NICE_DELAYSLOT ; #if DO_CONDITIONAL_LOG @@ -197,22 +197,22 @@ bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) { return targetAddr > GetCompilerPC(); } -void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) { - if (andLink) +void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo) { + if (branchInfo.andLink) gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); // We may want to try to continue along this branch a little while, to reduce reg flushing. - bool predictTakeBranch = PredictTakeBranch(targetAddr, likely); - if (CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr)) + bool predictTakeBranch = PredictTakeBranch(targetAddr, branchInfo.likely); + if (!branchInfo.delaySlotIsBranch && CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr)) { if (predictTakeBranch) cc = FlipCCFlag(cc); Gen::FixupBranch ptr; RegCacheState state; - if (!likely) + if (!branchInfo.likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice) CompileDelaySlot(DELAYSLOT_SAFE); ptr = J_CC(cc, true); GetStateAndFlushAll(state); @@ -243,7 +243,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del CONDITIONAL_LOG_EXIT(targetAddr); // Don't forget to run the delay slot if likely. - if (likely) + if (branchInfo.likely) CompileDelaySlot(DELAYSLOT_NICE); AddContinuedBlock(targetAddr); @@ -272,9 +272,9 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del else { Gen::FixupBranch ptr; - if (!likely) + if (!branchInfo.likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -284,7 +284,19 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del { FlushAll(); ptr = J_CC(cc, true); - CompileDelaySlot(DELAYSLOT_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + // Handle the linkage of a delay slot, even when we're taking the branch. + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); } // Take the branch @@ -299,14 +311,25 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del } } -void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) { +void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo) { // Continuing is handled in the imm branch case... TODO: move it here? - if (andLink) + if (branchInfo.andLink) gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); - if (taken || !likely) + if (branchInfo.delaySlotIsBranch) { + if (taken) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + } + FlushAll(); + } else if (taken || !branchInfo.likely) { CompileDelaySlot(DELAYSLOT_FLUSH); - else + } else { FlushAll(); + } const u32 destAddr = taken ? targetAddr : notTakenAddr; CONDITIONAL_LOG_EXIT(destAddr); @@ -326,9 +349,13 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely) MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs); + CONDITIONAL_NICE_DELAYSLOT; + bool immBranch = false; bool immBranchTaken = false; - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) { // The cc flags are opposites: when NOT to take the branch. bool immBranchNotTaken; s32 rsImm = (s32)gpr.GetImm(rs); @@ -364,16 +391,14 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely) return; } - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); - CONDITIONAL_NICE_DELAYSLOT; + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); if (immBranch) - CompBranchExit(immBranchTaken, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, false); + CompBranchExit(immBranchTaken, targetAddr, notTakenTarget, branchInfo); else { - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) @@ -387,7 +412,7 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely) CMP(32, gpr.R(rs), gpr.R(rt)); } - CompBranchExits(cc, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, false); + CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo); } } @@ -402,9 +427,14 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs); + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + CONDITIONAL_NICE_DELAYSLOT; + bool immBranch = false; bool immBranchTaken = false; - if (gpr.IsImm(rs)) { + if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) { // The cc flags are opposites: when NOT to take the branch. bool immBranchNotTaken; s32 imm = (s32)gpr.GetImm(rs); @@ -446,22 +476,20 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li return; } - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); - CONDITIONAL_NICE_DELAYSLOT; + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); if (immBranch) - CompBranchExit(immBranchTaken, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, andLink); + CompBranchExit(immBranchTaken, targetAddr, notTakenTarget, branchInfo); else { - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); gpr.MapReg(rs, true, false); CMP(32, gpr.R(rs), Imm32(0)); - CompBranchExits(cc, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, andLink); + CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo); } } @@ -518,17 +546,18 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely) int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp); + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp); + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); gpr.KillImmediate(MIPS_REG_FPCOND, true, false); TEST(32, gpr.R(MIPS_REG_FPCOND), Imm32(1)); - CompBranchExits(cc, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, false); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); + CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo); } @@ -559,19 +588,16 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely) int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. - bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp); - bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp); + branchInfo.delaySlotIsNice = !branchInfo.delaySlotIsBranch && IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp); + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + CONDITIONAL_NICE_DELAYSLOT; - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); - if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) - ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", GetCompilerPC(), (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1); // THE CONDITION int imm3 = (op >> 18) & 7; @@ -579,8 +605,8 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely) gpr.KillImmediate(MIPS_REG_VFPUCC, true, false); TEST(32, gpr.R(MIPS_REG_VFPUCC), Imm32(1 << imm3)); - u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); - CompBranchExits(cc, targetAddr, notTakenTarget, delaySlotIsNice, likely, false); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); + CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo); } diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 4cfd12bdf2e0..0c96188ee484 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -230,8 +230,8 @@ class Jit : public Gen::XCodeBlock, public JitInterface, public MIPSFrontendInte } void CompITypeMemUnpairedLR(MIPSOpcode op, bool isStore); void CompITypeMemUnpairedLRInner(MIPSOpcode op, Gen::X64Reg shiftReg); - void CompBranchExits(Gen::CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink); - void CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink); + void CompBranchExits(Gen::CCFlags cc, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo); + void CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo); static Gen::CCFlags FlipCCFlag(Gen::CCFlags flag); static Gen::CCFlags SwapCCFlag(Gen::CCFlags flag); From f75dadd1d619733aa2d98ccaf4f85fa436e4c4ae Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 3 Sep 2022 21:04:54 -0700 Subject: [PATCH 3/4] arm64jit: Handle branch/jump in branch delay slots. --- Core/MIPS/ARM/ArmCompBranch.cpp | 124 +++++++++++++++++--------- Core/MIPS/ARM64/Arm64CompBranch.cpp | 130 +++++++++++++++++++--------- Core/MIPS/x86/CompBranch.cpp | 9 +- 3 files changed, 176 insertions(+), 87 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index ec3d180be931..ee6a96a62007 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -55,7 +55,7 @@ #define LOOPOPTIMIZATION 0 // We can disable nice delay slots. -// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false; +// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false; #define CONDITIONAL_NICE_DELAYSLOT ; using namespace MIPSAnalyst; @@ -76,9 +76,13 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs); + CONDITIONAL_NICE_DELAYSLOT; + bool immBranch = false; bool immBranchTaken = false; - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) { // The cc flags are opposites: when NOT to take the branch. bool immBranchNotTaken; s32 rsImm = (s32)gpr.GetImm(rs); @@ -112,11 +116,9 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) return; } - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); - CONDITIONAL_NICE_DELAYSLOT; + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); if (immBranch) { // Continuing is handled above, this is just static jumping. if (immBranchTaken || !likely) @@ -124,10 +126,10 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) else FlushAll(); - const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8; + const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget; WriteExit(destAddr, js.nextExit++); } else { - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); // We might be able to flip the condition (EQ/NEQ are easy.) @@ -156,7 +158,7 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) ArmGen::FixupBranch ptr; if (!likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -164,7 +166,18 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) } else { FlushAll(); ptr = B_CC(cc); - CompileDelaySlot(DELAYSLOT_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); } // Take the branch @@ -172,7 +185,7 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) SetJumpTarget(ptr); // Not taken - WriteExit(GetCompilerPC() + 8, js.nextExit++); + WriteExit(notTakenTarget, js.nextExit++); } js.compiling = false; @@ -189,9 +202,13 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs); + CONDITIONAL_NICE_DELAYSLOT; + bool immBranch = false; bool immBranchTaken = false; - if (gpr.IsImm(rs)) { + if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) { // The cc flags are opposites: when NOT to take the branch. bool immBranchNotTaken; s32 imm = (s32)gpr.GetImm(rs); @@ -231,11 +248,9 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like return; } - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); - CONDITIONAL_NICE_DELAYSLOT; + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); if (immBranch) { // Continuing is handled above, this is just static jumping. if (andLink) @@ -245,10 +260,10 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like else FlushAll(); - const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8; + const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget; WriteExit(destAddr, js.nextExit++); } else { - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); gpr.MapReg(rs); @@ -260,7 +275,7 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like ArmGen::FixupBranch ptr; if (!likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -270,7 +285,18 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like { FlushAll(); ptr = B_CC(cc); - CompileDelaySlot(DELAYSLOT_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); } // Take the branch @@ -278,7 +304,7 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like SetJumpTarget(ptr); // Not taken - WriteExit(GetCompilerPC() + 8, js.nextExit++); + WriteExit(notTakenTarget, js.nextExit++); } js.compiling = false; } @@ -335,11 +361,12 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp); + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; - if (!likely && delaySlotIsNice) + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); gpr.MapReg(MIPS_REG_FPCOND); @@ -348,7 +375,7 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) ArmGen::FixupBranch ptr; if (!likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -358,7 +385,18 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) { FlushAll(); ptr = B_CC(cc); - CompileDelaySlot(DELAYSLOT_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); } // Take the branch @@ -366,7 +404,7 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) SetJumpTarget(ptr); // Not taken - WriteExit(GetCompilerPC() + 8, js.nextExit++); + WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++); js.compiling = false; } @@ -394,19 +432,16 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. - bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp); - bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp); + branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; - if (!likely && delaySlotIsNice) + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + if (!likely && branchInfo.delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); - if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) - ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC()); int imm3 = (op >> 18) & 7; @@ -417,7 +452,7 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) js.inDelaySlot = true; if (!likely) { - if (!delaySlotIsNice && !delaySlotIsBranch) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -427,18 +462,27 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) { FlushAll(); ptr = B_CC(cc); - if (!delaySlotIsBranch) + if (!branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_FLUSH); } js.inDelaySlot = false; + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); + } + // Take the branch WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); - WriteExit(notTakenTarget, js.nextExit++); + WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++); js.compiling = false; } diff --git a/Core/MIPS/ARM64/Arm64CompBranch.cpp b/Core/MIPS/ARM64/Arm64CompBranch.cpp index 026cf539e3b7..26713ecce2dd 100644 --- a/Core/MIPS/ARM64/Arm64CompBranch.cpp +++ b/Core/MIPS/ARM64/Arm64CompBranch.cpp @@ -55,7 +55,7 @@ #define LOOPOPTIMIZATION 0 // We can disable nice delay slots. -// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false; +// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false; #define CONDITIONAL_NICE_DELAYSLOT ; using namespace MIPSAnalyst; @@ -76,9 +76,13 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs); + CONDITIONAL_NICE_DELAYSLOT; + bool immBranch = false; bool immBranchTaken = false; - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) { // The cc flags are opposites: when NOT to take the branch. bool immBranchNotTaken; s32 rsImm = (s32)gpr.GetImm(rs); @@ -112,11 +116,9 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) return; } - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); - CONDITIONAL_NICE_DELAYSLOT; + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); if (immBranch) { // Continuing is handled above, this is just static jumping. if (immBranchTaken || !likely) @@ -124,10 +126,10 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) else FlushAll(); - const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8; + const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget; WriteExit(destAddr, js.nextExit++); } else { - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); // We might be able to flip the condition (EQ/NEQ are easy.) @@ -136,7 +138,7 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) const bool rtIsZero = gpr.IsImm(rt) && gpr.GetImm(rt) == 0; Arm64Gen::FixupBranch ptr; - if ((likely || delaySlotIsNice) && (rsIsZero || rtIsZero) && canFlip) { + if ((likely || branchInfo.delaySlotIsNice) && (rsIsZero || rtIsZero) && canFlip) { // Special case, we can just use CBZ/CBNZ directly. MIPSGPReg r = rsIsZero ? rt : rs; gpr.MapReg(r); @@ -169,7 +171,7 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) } if (!likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -180,17 +182,27 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) } } - if (likely) { + if (likely && !branchInfo.delaySlotIsBranch) { // Only executed when taking the branch. CompileDelaySlot(DELAYSLOT_FLUSH); } + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); + } + // Take the branch WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - WriteExit(GetCompilerPC() + 8, js.nextExit++); + WriteExit(notTakenTarget, js.nextExit++); } js.compiling = false; @@ -207,9 +219,13 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li MIPSGPReg rs = _RS; u32 targetAddr = GetCompilerPC() + offset + 4; + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs); + CONDITIONAL_NICE_DELAYSLOT; + bool immBranch = false; bool immBranchTaken = false; - if (gpr.IsImm(rs)) { + if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) { // The cc flags are opposites: when NOT to take the branch. bool immBranchNotTaken; s32 imm = (s32)gpr.GetImm(rs); @@ -249,11 +265,9 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li return; } - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); - CONDITIONAL_NICE_DELAYSLOT; + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + u32 notTakenTarget = ResolveNotTakenTarget(branchInfo); if (immBranch) { // Continuing is handled above, this is just static jumping. if (andLink) @@ -263,10 +277,10 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li else FlushAll(); - const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8; + const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget; WriteExit(destAddr, js.nextExit++); } else { - if (!likely && delaySlotIsNice) + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); gpr.MapReg(rs); @@ -278,7 +292,7 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li Arm64Gen::FixupBranch ptr; if (!likely) { - if (!delaySlotIsNice) + if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); else FlushAll(); @@ -288,7 +302,18 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li { FlushAll(); ptr = B(cc); - CompileDelaySlot(DELAYSLOT_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); } // Take the branch @@ -296,7 +321,7 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li SetJumpTarget(ptr); // Not taken - WriteExit(GetCompilerPC() + 8, js.nextExit++); + WriteExit(notTakenTarget, js.nextExit++); } js.compiling = false; } @@ -352,16 +377,17 @@ void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) { int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp); + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); + branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; - if (!likely && delaySlotIsNice) + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); gpr.MapReg(MIPS_REG_FPCOND); Arm64Gen::FixupBranch ptr; - if (likely || delaySlotIsNice) { + if (likely || branchInfo.delaySlotIsNice) { // FlushAll() won't actually change the reg. ARM64Reg ar = gpr.R(MIPS_REG_FPCOND); FlushAll(); @@ -372,20 +398,31 @@ void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) { } } else { TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, SCRATCH1); - CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); ptr = B(cc); } - if (likely) { + if (likely && !branchInfo.delaySlotIsBranch) { CompileDelaySlot(DELAYSLOT_FLUSH); } + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); + } + // Take the branch WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - WriteExit(GetCompilerPC() + 8, js.nextExit++); + WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++); js.compiling = false; } @@ -410,25 +447,22 @@ void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) { int offset = TARGET16; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp); - + BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. - bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp); - bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp); + branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; - if (!likely && delaySlotIsNice) + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); + if (!likely && branchInfo.delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); - if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) - ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC()); int imm3 = (op >> 18) & 7; gpr.MapReg(MIPS_REG_VFPUCC); Arm64Gen::FixupBranch ptr; - if (likely || delaySlotIsNice || delaySlotIsBranch) { + if (likely || branchInfo.delaySlotIsNice || branchInfo.delaySlotIsBranch) { // FlushAll() won't actually change the reg. ARM64Reg ar = gpr.R(MIPS_REG_VFPUCC); FlushAll(); @@ -439,21 +473,31 @@ void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) { } } else { TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1ULL << imm3, SCRATCH1); - CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + if (!branchInfo.delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); ptr = B(cc); } - if (likely && !delaySlotIsBranch) { + if (likely && !branchInfo.delaySlotIsBranch) { CompileDelaySlot(DELAYSLOT_FLUSH); } + if (branchInfo.delaySlotIsBranch) { + // We still link when the branch is taken (targetAddr case.) + // Remember, it's from the perspective of the delay slot, so +12. + if ((branchInfo.delaySlotInfo & OUT_RA) != 0) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12); + if ((branchInfo.delaySlotInfo & OUT_RD) != 0) + gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12); + FlushAll(); + } + // Take the branch WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); - WriteExit(notTakenTarget, js.nextExit++); + WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++); js.compiling = false; } diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index 276c04995588..99a73c5b608f 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -548,8 +548,9 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely) BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely); branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp); - js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); @@ -592,10 +593,10 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely) // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. - branchInfo.delaySlotIsNice = !branchInfo.delaySlotIsBranch && IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp); - js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); - + branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp); CONDITIONAL_NICE_DELAYSLOT; + + js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp); if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_NICE); From fc50860f55fbc3f5cd26f6d09c25f9e39385505f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 3 Sep 2022 21:05:58 -0700 Subject: [PATCH 4/4] headless: Fix startup on latest Android versions. Otherwise it tries to use deprecated ashmem and chokes. --- Core/System.cpp | 1 + headless/Headless.cpp | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Core/System.cpp b/Core/System.cpp index dc0764f86e4a..29b1765fb06a 100644 --- a/Core/System.cpp +++ b/Core/System.cpp @@ -295,6 +295,7 @@ bool CPU_Init(std::string *errorString) { HLEPlugins::Init(); if (!Memory::Init()) { // We're screwed. + *errorString = "Memory init failed"; return false; } mipsr4k.Reset(); diff --git a/headless/Headless.cpp b/headless/Headless.cpp index cad0c9fb20bb..0be0518e8bbe 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -95,7 +95,11 @@ void NativeResized() { } std::string System_GetProperty(SystemProperty prop) { return ""; } std::vector System_GetPropertyStringVec(SystemProperty prop) { return std::vector(); } -int System_GetPropertyInt(SystemProperty prop) { return -1; } +int System_GetPropertyInt(SystemProperty prop) { + if (prop == SYSPROP_SYSTEMVERSION) + return 31; + return -1; +} float System_GetPropertyFloat(SystemProperty prop) { return -1.0f; } bool System_GetPropertyBool(SystemProperty prop) { switch (prop) {