From d399c4a4708b4022ae159388098c09d693c01968 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 6 May 2016 23:45:37 +0200 Subject: [PATCH 01/77] Initial commit for IRJit --- Core/Config.cpp | 1 + Core/Config.h | 1 + Core/Core.vcxproj | 12 + Core/Core.vcxproj.filters | 39 +++ Core/CoreParameter.h | 1 + Core/MIPS/IR/IRAsm.cpp | 28 ++ Core/MIPS/IR/IRCompALU.cpp | 420 ++++++++++++++++++++++++++++++ Core/MIPS/IR/IRCompBranch.cpp | 363 ++++++++++++++++++++++++++ Core/MIPS/IR/IRCompFPU.cpp | 226 ++++++++++++++++ Core/MIPS/IR/IRCompLoadStore.cpp | 162 ++++++++++++ Core/MIPS/IR/IRCompVFPU.cpp | 326 +++++++++++++++++++++++ Core/MIPS/IR/IRInst.cpp | 316 ++++++++++++++++++++++ Core/MIPS/IR/IRInst.h | 260 ++++++++++++++++++ Core/MIPS/IR/IRJit.cpp | 333 +++++++++++++++++++++++ Core/MIPS/IR/IRJit.h | 276 ++++++++++++++++++++ Core/MIPS/IR/IRRegCache.cpp | 46 ++++ Core/MIPS/IR/IRRegCache.h | 43 +++ Core/MIPS/JitCommon/JitCommon.cpp | 16 +- Core/MIPS/MIPS.h | 6 +- 19 files changed, 2869 insertions(+), 6 deletions(-) create mode 100644 Core/MIPS/IR/IRAsm.cpp create mode 100644 Core/MIPS/IR/IRCompALU.cpp create mode 100644 Core/MIPS/IR/IRCompBranch.cpp create mode 100644 Core/MIPS/IR/IRCompFPU.cpp create mode 100644 Core/MIPS/IR/IRCompLoadStore.cpp create mode 100644 Core/MIPS/IR/IRCompVFPU.cpp create mode 100644 Core/MIPS/IR/IRInst.cpp create mode 100644 Core/MIPS/IR/IRInst.h create mode 100644 Core/MIPS/IR/IRJit.cpp create mode 100644 Core/MIPS/IR/IRJit.h create mode 100644 Core/MIPS/IR/IRRegCache.cpp create mode 100644 Core/MIPS/IR/IRRegCache.h diff --git a/Core/Config.cpp b/Core/Config.cpp index a5b4bb79d557..6faa97a0759b 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -354,6 +354,7 @@ static bool DefaultSasThread() { static ConfigSetting cpuSettings[] = { ReportedConfigSetting("Jit", &g_Config.bJit, &DefaultJit, true, true), + ReportedConfigSetting("CPUCore", &g_Config.bJit, &DefaultJit, true, true), ReportedConfigSetting("SeparateCPUThread", &g_Config.bSeparateCPUThread, false, true, true), ReportedConfigSetting("SeparateSASThread", &g_Config.bSeparateSASThread, &DefaultSasThread, true, true), ReportedConfigSetting("SeparateIOThread", &g_Config.bSeparateIOThread, true, true, true), diff --git a/Core/Config.h b/Core/Config.h index b2cfc1351e45..825091bd0cc6 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -120,6 +120,7 @@ struct Config { bool bIgnoreBadMemAccess; bool bFastMemory; bool bJit; + int iCpuCore; bool bCheckForNewVersion; bool bForceLagSync; bool bFuncReplacements; diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index 83552556d0e1..af4b459fb1b9 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -181,6 +181,15 @@ + + + + + + + + + @@ -507,6 +516,9 @@ + + + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index d4ba376f94db..99af2a2696fe 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -67,6 +67,9 @@ {67687dba-8313-4442-b4eb-4be8c4867b65} + + {119ac973-e457-4025-9e1e-4fb34022ae23} + @@ -634,6 +637,33 @@ Core + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + @@ -1179,6 +1209,15 @@ Core + + MIPS\IR + + + MIPS\IR + + + MIPS\IR + diff --git a/Core/CoreParameter.h b/Core/CoreParameter.h index ac33c91f5827..1517b50c03c1 100644 --- a/Core/CoreParameter.h +++ b/Core/CoreParameter.h @@ -24,6 +24,7 @@ enum CPUCore { CPU_INTERPRETER, CPU_JIT, + CPU_IRJIT, }; enum GPUCore { diff --git a/Core/MIPS/IR/IRAsm.cpp b/Core/MIPS/IR/IRAsm.cpp new file mode 100644 index 000000000000..f5d9c7ad3157 --- /dev/null +++ b/Core/MIPS/IR/IRAsm.cpp @@ -0,0 +1,28 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/logging.h" + +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/System.h" +#include "Core/CoreTiming.h" +#include "Common/MemoryUtil.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/JitCommon/JitCommon.h" + + diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp new file mode 100644 index 000000000000..67059e371e5e --- /dev/null +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -0,0 +1,420 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRRegCache.h" +#include "Common/CPUDetect.h" + +using namespace MIPSAnalyst; + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +namespace MIPSComp { + +void IRJit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, IROp OP) { + if (gpr.IsImm(rs)) { + switch (OP) { + case IROp::AddConst: gpr.SetImm(rt, rs + uimm); break; + case IROp::SubConst: gpr.SetImm(rt, rs - uimm); break; + case IROp::AndConst: gpr.SetImm(rt, rs & uimm); break; + case IROp::OrConst: gpr.SetImm(rt, rs | uimm); break; + case IROp::XorConst: gpr.SetImm(rt, rs ^ uimm); break; + } + } else { + gpr.MapDirtyIn(rt, rs); + ir.Write(OP, rt, ir.AddConstant(uimm)); + } +} + +void IRJit::Comp_IType(MIPSOpcode op) { + CONDITIONAL_DISABLE; + s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension + u32 uimm = op & 0xFFFF; + u32 suimm = (u32)(s32)simm; + + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + + // noop, won't write to ZERO. + if (rt == 0) + return; + + switch (op >> 26) { + case 8: // same as addiu? + case 9: // R(rt) = R(rs) + simm; break; //addiu + // Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others. + if (simm >= 0) { + CompImmLogic(rs, rt, simm, IROp::AddConst); + } else if (simm < 0) { + CompImmLogic(rs, rt, -simm, IROp::SubConst); + } + break; + + case 12: CompImmLogic(rs, rt, uimm, IROp::AndConst); break; + case 13: CompImmLogic(rs, rt, uimm, IROp::OrConst); break; + case 14: CompImmLogic(rs, rt, uimm, IROp::XorConst); break; + + case 10: // R(rt) = (s32)R(rs) < simm; break; //slti + if (gpr.IsImm(rs)) { + gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm ? 1 : 0); + break; + } + gpr.MapDirtyIn(rt, rs); + // Grab the sign bit (< 0) as 1/0. Slightly faster than a shift. + ir.Write(IROp::Slt, rt, rs, ir.AddConstant(simm)); + break; + + case 11: // R(rt) = R(rs) < suimm; break; //sltiu + if (gpr.IsImm(rs)) { + gpr.SetImm(rt, gpr.GetImm(rs) < suimm ? 1 : 0); + break; + } + gpr.MapDirtyIn(rt, rs); + ir.Write(IROp::SltU, rt, rs, ir.AddConstant(suimm)); + break; + + case 15: // R(rt) = uimm << 16; //lui + gpr.SetImm(rt, uimm << 16); + break; + + default: + Comp_Generic(op); + break; + } +} + +void IRJit::Comp_RType2(MIPSOpcode op) { + CONDITIONAL_DISABLE; + + MIPSGPReg rs = _RS; + MIPSGPReg rd = _RD; + + // Don't change $zr. + if (rd == 0) + return; + + switch (op & 63) { + case 22: //clz + gpr.MapDirtyIn(rd, rs); + ir.Write(IROp::Clz, rd, rs); + break; + case 23: //clo + gpr.MapDirtyIn(rd, rs); + ir.Write(IROp::Not, IRTEMP_0, rs); + ir.Write(IROp::Clz, rd, IRTEMP_0); + break; + default: + DISABLE; + } +} + +void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp constOp, bool symmetric) { + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + switch (op) { + case IROp::Add: gpr.SetImm(rd, gpr.GetImm(rs) + gpr.GetImm(rt)); break; + case IROp::Sub: gpr.SetImm(rd, gpr.GetImm(rs) - gpr.GetImm(rt)); break; + case IROp::And: gpr.SetImm(rd, gpr.GetImm(rs) & gpr.GetImm(rt)); break; + case IROp::Or: gpr.SetImm(rd, gpr.GetImm(rs) | gpr.GetImm(rt)); break; + case IROp::Xor: gpr.SetImm(rd, gpr.GetImm(rs) ^ gpr.GetImm(rt)); break; + } + return; + } + + if (gpr.IsImm(rt) || (gpr.IsImm(rs) && symmetric)) { + MIPSGPReg lhs = gpr.IsImm(rs) ? rt : rs; + MIPSGPReg rhs = gpr.IsImm(rs) ? rs : rt; + u32 rhsImm = gpr.GetImm(rhs); + gpr.MapDirtyIn(rd, lhs); + ir.Write(constOp, rd, lhs, ir.AddConstant(rhsImm)); + // If rd is rhs, we may have lost it in the MapDirtyIn(). lhs was kept. + // This means the rhsImm value was never flushed to rhs, and would be garbage. + if (rd == rhs) { + // Luckily, it was just an imm. + gpr.SetImm(rhs, rhsImm); + } + } + + // Can't do the RSB optimization on ARM64 - no RSB! + + // Generic solution. If it's an imm, better to flush at this point. + gpr.MapDirtyInIn(rd, rs, rt); + ir.Write(op, rd, rs, rt); +} + +void IRJit::Comp_RType3(MIPSOpcode op) { + CONDITIONAL_DISABLE; + + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + MIPSGPReg rd = _RD; + + // noop, won't write to ZERO. + if (rd == 0) + return; + + switch (op & 63) { + case 10: //if (!R(rt)) R(rd) = R(rs); break; //movz + gpr.MapDirtyInIn(rd, rt, rs); + ir.Write(IROp::MovZ, rd, rt, rs); + break; + case 11:// if (R(rt)) R(rd) = R(rs); break; //movn + gpr.MapDirtyInIn(rd, rt, rs); + ir.Write(IROp::MovNZ, rd, rt, rs); + break; + + case 32: //R(rd) = R(rs) + R(rt); break; //add + case 33: //R(rd) = R(rs) + R(rt); break; //addu + CompType3(rd, rs, rt, IROp::Add, IROp::AddConst, true); + break; + + case 34: //R(rd) = R(rs) - R(rt); break; //sub + case 35: //R(rd) = R(rs) - R(rt); break; //subu + CompType3(rd, rs, rt, IROp::Sub, IROp::SubConst, false); + break; + + case 36: //R(rd) = R(rs) & R(rt); break; //and + CompType3(rd, rs, rt, IROp::And, IROp::AndConst, true); + break; + case 37: //R(rd) = R(rs) | R(rt); break; //or + CompType3(rd, rs, rt, IROp::Or, IROp::OrConst, true); + break; + case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor + CompType3(rd, rs, rt, IROp::Xor, IROp::XorConst, true); + break; + + case 39: // R(rd) = ~(R(rs) | R(rt)); break; //nor + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + gpr.SetImm(rd, ~(gpr.GetImm(rs) | gpr.GetImm(rt))); + } + + ir.Write(IROp::Or, IRTEMP_0, rs, rt); + ir.Write(IROp::Not, rd, IRTEMP_0); + break; + + case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt)); + } else { + gpr.MapDirtyInIn(rd, rt, rs); + ir.Write(IROp::Slt, rd, rs, rt); + } + break; + + case 43: //R(rd) = R(rs) < R(rt); break; //sltu + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt)); + } else { + gpr.MapDirtyInIn(rd, rt, rs); + ir.Write(IROp::SltU, rd, rs, rt); + } + break; + + case 44: //R(rd) = max(R(rs), R(rt); break; //max + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + gpr.SetImm(rd, std::max(gpr.GetImm(rs), gpr.GetImm(rt))); + break; + } + gpr.MapDirtyInIn(rd, rs, rt); + ir.Write(IROp::Max, rd, rs, rt); + break; + + case 45: //R(rd) = min(R(rs), R(rt)); break; //min + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + gpr.SetImm(rd, std::min(gpr.GetImm(rs), gpr.GetImm(rt))); + break; + } + gpr.MapDirtyInIn(rd, rs, rt); + ir.Write(IROp::Min, rd, rs, rt); + break; + + default: + Comp_Generic(op); + break; + } +} + +void IRJit::CompShiftImm(MIPSOpcode op, IROp shiftOpConst, int sa) { + MIPSGPReg rd = _RD; + MIPSGPReg rt = _RT; + if (gpr.IsImm(rt)) { + switch (shiftOpConst) { + case IROp::ShlImm: + gpr.SetImm(rd, gpr.GetImm(rt) << sa); + break; + case IROp::ShrImm: + gpr.SetImm(rd, gpr.GetImm(rt) >> sa); + break; + case IROp::SarImm: + gpr.SetImm(rd, (int)gpr.GetImm(rt) >> sa); + break; + case IROp::RorImm: + gpr.SetImm(rd, (gpr.GetImm(rt) >> sa) | (gpr.GetImm(rt) << (32 - sa))); + break; + default: + DISABLE; + } + } else { + gpr.MapDirtyIn(rd, rt); + ir.Write(shiftOpConst, rd, rt, sa); + } +} + +void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpConst) { + MIPSGPReg rd = _RD; + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + if (gpr.IsImm(rs)) { + int sa = gpr.GetImm(rs) & 0x1F; + CompShiftImm(op, shiftOpConst, sa); + return; + } + gpr.MapDirtyInIn(rd, rs, rt); + // Not sure if ARM64 wraps like this so let's do it for it. (TODO: According to the ARM ARM, it will indeed mask for us so this is not necessary) + // ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG); + ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(31)); + ir.Write(shiftOp, rd, rt, IRTEMP_0); +} + +void IRJit::Comp_ShiftType(MIPSOpcode op) { + CONDITIONAL_DISABLE; + MIPSGPReg rs = _RS; + MIPSGPReg rd = _RD; + int fd = _FD; + int sa = _SA; + + // noop, won't write to ZERO. + if (rd == 0) + return; + + // WARNING : ROTR + switch (op & 0x3f) { + case 0: CompShiftImm(op, IROp::Shl, sa); break; //sll + case 2: CompShiftImm(op, rs == 1 ? IROp::Ror : IROp::Shr, sa); break; //srl + case 3: CompShiftImm(op, IROp::Sar, sa); break; //sra + case 4: CompShiftVar(op, IROp::Shl, IROp::ShlImm); break; //sllv + case 6: CompShiftVar(op, (fd == 1 ? IROp::Ror : IROp::Shr), (fd == 1 ? IROp::RorImm : IROp::ShrImm)); break; //srlv + case 7: CompShiftVar(op, IROp::Sar, IROp::SarImm); break; //srav + default: + DISABLE; + break; + } +} + +void IRJit::Comp_Special3(MIPSOpcode op) { + DISABLE; +} + +void IRJit::Comp_Allegrex(MIPSOpcode op) { + CONDITIONAL_DISABLE; + MIPSGPReg rt = _RT; + MIPSGPReg rd = _RD; + // Don't change $zr. + if (rd == 0) + return; + + switch ((op >> 6) & 31) { + case 16: // seb // R(rd) = (u32)(s32)(s8)(u8)R(rt); + if (gpr.IsImm(rt)) { + gpr.SetImm(rd, (s32)(s8)(u8)gpr.GetImm(rt)); + return; + } + gpr.MapDirtyIn(rd, rt); + ir.Write(IROp::Ext8to32, rd, rt); + break; + + case 24: // seh + if (gpr.IsImm(rt)) { + gpr.SetImm(rd, (s32)(s16)(u16)gpr.GetImm(rt)); + return; + } + gpr.MapDirtyIn(rd, rt); + ir.Write(IROp::Ext16to32, rd, rt); + break; + + case 20: //bitrev + default: + Comp_Generic(op); + return; + } +} + +void IRJit::Comp_Allegrex2(MIPSOpcode op) { + CONDITIONAL_DISABLE; + MIPSGPReg rt = _RT; + MIPSGPReg rd = _RD; + // Don't change $zr. + if (rd == 0) + return; + + switch (op & 0x3ff) { + case 0xA0: //wsbh + if (gpr.IsImm(rt)) { + gpr.SetImm(rd, ((gpr.GetImm(rt) & 0xFF00FF00) >> 8) | ((gpr.GetImm(rt) & 0x00FF00FF) << 8)); + } else { + gpr.MapDirtyIn(rd, rt); + ir.Write(IROp::BSwap16, rd, rt); + } + break; + case 0xE0: //wsbw + if (gpr.IsImm(rt)) { + gpr.SetImm(rd, swap32(gpr.GetImm(rt))); + } else { + gpr.MapDirtyIn(rd, rt); + ir.Write(IROp::BSwap16, rd, rt); + } + break; + default: + Comp_Generic(op); + break; + } +} + +void IRJit::Comp_MulDivType(MIPSOpcode op) { + CONDITIONAL_DISABLE; + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + MIPSGPReg rd = _RD; + + // Note that in all cases below, LO is actually mapped to HI:LO. + // That is, the host reg is 64 bits and has HI at the top. + // HI is not mappable. + + DISABLE; +} + +} diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp new file mode 100644 index 000000000000..16c7245b82dc --- /dev/null +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -0,0 +1,363 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "profiler/profiler.h" + +#include "Core/Reporting.h" +#include "Core/Config.h" +#include "Core/MemMap.h" +#include "Core/HLE/HLE.h" +#include "Core/HLE/HLETables.h" + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/MIPS/MIPSTables.h" + +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRRegCache.h" +#include "Core/MIPS/JitCommon/JitBlockCache.h" + +#include "Common/Arm64Emitter.h" + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +#define LOOPOPTIMIZATION 0 + +using namespace MIPSAnalyst; + +namespace MIPSComp +{ + using namespace Arm64Gen; + +void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); + return; + } + int offset = _IMM16 << 2; + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + u32 targetAddr = GetCompilerPC() + offset + 4; + + MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); + + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + + MIPSGPReg lhs = rs; + MIPSGPReg rhs = rt; + if (!delaySlotIsNice) { + ir.Write(IROp::Mov, IRTEMP_0, rs); + ir.Write(IROp::Mov, IRTEMP_1, rt); + lhs = (MIPSGPReg)IRTEMP_0; + rhs = (MIPSGPReg)IRTEMP_1; + } + + if (!likely) + CompileDelaySlot(); + + gpr.MapInIn(lhs, rhs); + ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs); + // This makes the block "impure" :( + if (likely) + CompileDelaySlot(); + + ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); + + js.compiling = false; +} + +void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); + return; + } + int offset = _IMM16 << 2; + MIPSGPReg rs = _RS; + u32 targetAddr = GetCompilerPC() + offset + 4; + + MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + + if (!likely && delaySlotIsNice) + CompileDelaySlot(); + int lhs = rs; + gpr.MapIn(rs); + if (!delaySlotIsNice) { + ir.Write(IROp::Mov, IRTEMP_0, rs); + lhs = IRTEMP_0; + } + ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs); + if (likely) { + CompileDelaySlot(); + } + // Taken + ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); + js.compiling = false; +} + +void IRJit::Comp_RelBranch(MIPSOpcode op) { + // The CC flags here should be opposite of the actual branch becuase they skip the branching action. + switch (op >> 26) { + case 4: BranchRSRTComp(op, IRComparison::NotEqual, false); break;//beq + case 5: BranchRSRTComp(op, IRComparison::Equal, false); break;//bne + + case 6: BranchRSZeroComp(op, IRComparison::Greater, false, false); break;//blez + case 7: BranchRSZeroComp(op, IRComparison::LessEqual, false, false); break;//bgtz + + case 20: BranchRSRTComp(op, IRComparison::NotEqual, true); break;//beql + case 21: BranchRSRTComp(op, IRComparison::Equal, true); break;//bnel + + case 22: BranchRSZeroComp(op, IRComparison::Greater, false, true); break;//blezl + case 23: BranchRSZeroComp(op, IRComparison::LessEqual, false, true); break;//bgtzl + + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } +} + +void IRJit::Comp_RelBranchRI(MIPSOpcode op) { + switch ((op >> 16) & 0x1F) { + case 0: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz + case 1: BranchRSZeroComp(op, IRComparison::Less, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez + case 2: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl + case 3: BranchRSZeroComp(op, IRComparison::Less, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl + case 16: BranchRSZeroComp(op, IRComparison::GreaterEqual, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal + case 17: BranchRSZeroComp(op, IRComparison::Less, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal + case 18: BranchRSZeroComp(op, IRComparison::GreaterEqual, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall + case 19: BranchRSZeroComp(op, IRComparison::Less, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } +} + +// If likely is set, discard the branch slot if NOT taken. +void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); + return; + } + int offset = _IMM16 << 2; + u32 targetAddr = GetCompilerPC() + offset + 4; + + MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + ir.Write(IROp::FpCondToReg, IRTEMP_0); + if (!likely) + CompileDelaySlot(); + + FlushAll(); + // Not taken + ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_0, 0); + // Taken + if (likely) + CompileDelaySlot(); + ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); + js.compiling = false; +} + +void IRJit::Comp_FPUBranch(MIPSOpcode op) { + switch((op >> 16) & 0x1f) { + case 0: BranchFPFlag(op, IRComparison::NotEqual, false); break; // bc1f + case 1: BranchFPFlag(op, IRComparison::Equal, false); break; // bc1t + case 2: BranchFPFlag(op, IRComparison::NotEqual, true); break; // bc1fl + case 3: BranchFPFlag(op, IRComparison::Equal, true); break; // bc1tl + default: + _dbg_assert_msg_(CPU, 0, "Trying to interpret instruction that can't be interpreted"); + break; + } +} + +// If likely is set, discard the branch slot if NOT taken. +void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); + return; + } + int offset = _IMM16 << 2; + u32 targetAddr = GetCompilerPC() + offset + 4; + + MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + + ir.Write(IROp::VfpCondToReg, IRTEMP_0); + + // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) + // The behavior is undefined - the CPU may take the second branch even if the first one passes. + // However, it does consistently try each branch, which these games seem to expect. + bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp); + if (!likely) + CompileDelaySlot(); + + if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) + ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC()); + + int imm3 = (op >> 18) & 7; + + u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); + + ir.Write(IROp::AndConst, IRTEMP_0, IRTEMP_0, ir.AddConstant(imm3)); + ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_0, 0); + + if (likely) + CompileDelaySlot(); + + // Taken + ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); + js.compiling = false; +} + +void IRJit::Comp_VBranch(MIPSOpcode op) { + switch ((op >> 16) & 3) { + case 0: BranchVFPUFlag(op, IRComparison::NotEqual, false); break; // bvf + case 1: BranchVFPUFlag(op, IRComparison::Equal, false); break; // bvt + case 2: BranchVFPUFlag(op, IRComparison::NotEqual, true); break; // bvfl + case 3: BranchVFPUFlag(op, IRComparison::Equal, true); break; // bvtl + } +} + +void IRJit::Comp_Jump(MIPSOpcode op) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); + return; + } + + u32 off = _IMM26 << 2; + u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off; + + // Might be a stubbed address or something? + if (!Memory::IsValidAddress(targetAddr)) { + if (js.nextExit == 0) { + ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr); + } else { + js.compiling = false; + } + // TODO: Mark this block dirty or something? May be indication it will be changed by imports. + return; + } + + switch (op >> 26) { + case 2: //j + CompileDelaySlot(); + FlushAll(); + ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); + break; + + case 3: //jal + if (ReplaceJalTo(targetAddr)) + return; + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); + CompileDelaySlot(); + FlushAll(); + ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); + break; + + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + js.compiling = false; +} + +void IRJit::Comp_JumpReg(MIPSOpcode op) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); + return; + } + MIPSGPReg rs = _RS; + MIPSGPReg rd = _RD; + bool andLink = (op & 0x3f) == 9 && rd != MIPS_REG_ZERO; + + MIPSOpcode delaySlotOp = GetOffsetInstruction(1); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + if (andLink && rs == rd) + delaySlotIsNice = false; + + int destReg; + if (IsSyscall(delaySlotOp)) { + gpr.MapDirty(rs); + ir.Write(IROp::SetPC, 0, rs); + if (andLink) + gpr.SetImm(rd, GetCompilerPC() + 8); + CompileDelaySlot(); + return; // Syscall (delay slot) wrote exit code. + } else if (delaySlotIsNice) { + if (andLink) + gpr.SetImm(rd, GetCompilerPC() + 8); + CompileDelaySlot(); + gpr.MapDirty(rs); + destReg = rs; // Safe because FlushAll doesn't change any regs + FlushAll(); + } else { + // Bad delay slot. + gpr.MapDirty(rs); + ir.Write(IROp::Mov, IRTEMP_0, rs); + destReg = IRTEMP_0; + if (andLink) + gpr.SetImm(rd, GetCompilerPC() + 8); + CompileDelaySlot(); + FlushAll(); + } + + switch (op & 0x3f) + { + case 8: //jr + break; + case 9: //jalr + break; + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + + ir.Write(IROp::ExitToReg, ir.AddConstant(js.downcountAmount), rs, 0); + js.compiling = false; +} + +void IRJit::Comp_Syscall(MIPSOpcode op) { + // If we're in a delay slot, this is off by one. + const int offset = js.inDelaySlot ? -1 : 0; + RestoreRoundingMode(); + js.downcountAmount = -offset; + + FlushAll(); + + ir.Write(IROp::Syscall, 0, ir.AddConstant(op.encoding)); + + ApplyRoundingMode(); + js.compiling = false; +} + +void IRJit::Comp_Break(MIPSOpcode op) +{ + Comp_Generic(op); + js.compiling = false; +} + +} // namespace Mipscomp diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp new file mode 100644 index 000000000000..00a8ec63991c --- /dev/null +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -0,0 +1,226 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/Config.h" +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSTables.h" + +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRRegCache.h" +#include "Common/CPUDetect.h" + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + + +// FPCR interesting bits: +// 24: FZ (flush-to-zero) +// 23:22: RMode (0 = nearest, 1 = +inf, 2 = -inf, 3 = zero) +// not much else is interesting for us, but should be preserved. +// To access: MRS Xt, FPCR ; MSR FPCR, Xt + + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +namespace MIPSComp { + +void IRJit::Comp_FPU3op(MIPSOpcode op) { + CONDITIONAL_DISABLE; + + int ft = _FT; + int fs = _FS; + int fd = _FD; + + switch (op & 0x3f) { + case 0: ir.Write(IROp::FAdd, fd, fs, ft); break; //F(fd) = F(fs) + F(ft); //add + case 1: ir.Write(IROp::FSub, fd, fs, ft); break; //F(fd) = F(fs) - F(ft); //sub + case 2: ir.Write(IROp::FMul, fd, fs, ft); break; //F(fd) = F(fs) * F(ft); //mul + case 3: ir.Write(IROp::FDiv, fd, fs, ft); break; //F(fd) = F(fs) / F(ft); //div + default: + DISABLE; + return; + } +} + +void IRJit::Comp_FPULS(MIPSOpcode op) { + DISABLE; +} + +void IRJit::Comp_FPUComp(MIPSOpcode op) { + CONDITIONAL_DISABLE; + + int opc = op & 0xF; + if (opc >= 8) opc -= 8; // alias + if (opc == 0) { // f, sf (signalling false) + gpr.SetImm(MIPS_REG_FPCOND, 0); + return; + } + + int fs = _FS; + int ft = _FT; + + IROp irOp; + switch (opc) { + case 1: // un, ngle (unordered) + irOp = IROp::FCmpUnordered; + break; + case 2: // eq, seq (equal, ordered) + irOp = IROp::FCmpEqual; + break; + case 3: // ueq, ngl (equal, unordered) + irOp = IROp::FCmpEqualUnordered; + return; + case 4: // olt, lt (less than, ordered) + irOp = IROp::FCmpLessOrdered; + break; + case 5: // ult, nge (less than, unordered) + irOp = IROp::FCmpLessUnordered; + break; + case 6: // ole, le (less equal, ordered) + irOp = IROp::FCmpLessEqualOrdered; + break; + case 7: // ule, ngt (less equal, unordered) + irOp = IROp::FCmpLessEqualUnordered; + break; + default: + Comp_Generic(op); + return; + } + ir.Write(irOp, fs, ft); +} + +void IRJit::Comp_FPU2op(MIPSOpcode op) { + CONDITIONAL_DISABLE; + int fs = _FS; + int fd = _FD; + + switch (op & 0x3f) { + case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt + ir.Write(IROp::FSqrt, fd, fs); + break; + case 5: //F(fd) = fabsf(F(fs)); break; //abs + ir.Write(IROp::FAbs, fd, fs); + break; + case 6: //F(fd) = F(fs); break; //mov + ir.Write(IROp::FMov, fd, fs); + break; + case 7: //F(fd) = -F(fs); break; //neg + ir.Write(IROp::FNeg, fd, fs); + break; + + case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s + { + ir.Write(IROp::FRound, fd, fs); + break; + } + + case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s + { + ir.Write(IROp::FTrunc, fd, fs); + break; + } + + case 14://FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s + { + ir.Write(IROp::FCeil, fd, fs); + break; + } + case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s + { + ir.Write(IROp::FFloor, fd, fs); + break; + } + + case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w + ir.Write(IROp::FCvtSW, fd, fs); + break; + + case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s + ir.Write(IROp::FCvtWS, fd, fs); + break; + + default: + DISABLE; + } +} + +void IRJit::Comp_mxc1(MIPSOpcode op) +{ + CONDITIONAL_DISABLE; + + int fs = _FS; + MIPSGPReg rt = _RT; + + switch ((op >> 21) & 0x1f) { + case 0: // R(rt) = FI(fs); break; //mfc1 + if (rt == MIPS_REG_ZERO) { + return; + } + gpr.MapDirty(rt); + ir.Write(IROp::FMovToGPR, rt, fs); + return; + + case 2: //cfc1 + if (rt == MIPS_REG_ZERO) { + return; + } + if (fs == 31) { + DISABLE; + } else if (fs == 0) { + gpr.SetImm(rt, MIPSState::FCR0_VALUE); + } else { + // Unsupported regs are always 0. + gpr.SetImm(rt, 0); + } + return; + + case 4: //FI(fs) = R(rt); break; //mtc1 + gpr.MapDirty(rt); + ir.Write(IROp::FMovFromGPR, fs, rt); + return; + + case 6: //ctc1 + if (fs == 31) { + // Set rounding mode + DISABLE; + } else { + Comp_Generic(op); + } + return; + default: + DISABLE; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp new file mode 100644 index 000000000000..53ea1f866fe9 --- /dev/null +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -0,0 +1,162 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +// Optimization ideas: +// +// It's common to see sequences of stores writing or reading to a contiguous set of +// addresses in function prologues/epilogues: +// sw s5, 104(sp) +// sw s4, 100(sp) +// sw s3, 96(sp) +// sw s2, 92(sp) +// sw s1, 88(sp) +// sw s0, 84(sp) +// sw ra, 108(sp) +// mov s4, a0 +// mov s3, a1 +// ... +// Such sequences could easily be detected and turned into nice contiguous +// sequences of ARM stores instead of the current 3 instructions per sw/lw. +// +// Also, if we kept track of the likely register content of a cached register, +// (pointer or data), we could avoid many BIC instructions. + + +#include "Core/MemMap.h" +#include "Core/Config.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRRegCache.h" + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +namespace MIPSComp { + void IRJit::Comp_ITypeMemLR(MIPSOpcode op, bool load) { + DISABLE; + } + + void IRJit::Comp_ITypeMem(MIPSOpcode op) { + CONDITIONAL_DISABLE; + + int offset = (signed short)(op & 0xFFFF); + bool load = false; + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + int o = op >> 26; + if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) { + // Don't load anything into $zr + return; + } + + u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF; + int addrReg = IRTEMP_0; + switch (o) { + // Load + case 35: + ir.Write(IROp::Load32, rt, rs, ir.AddConstant(offset)); + break; + case 37: + ir.Write(IROp::Load16, rt, rs, ir.AddConstant(offset)); + break; + case 33: + ir.Write(IROp::Load16Ext, rt, rs, ir.AddConstant(offset)); + break; + case 36: + ir.Write(IROp::Load8, rt, rs, ir.AddConstant(offset)); + break; + case 32: + ir.Write(IROp::Load8Ext, rt, rs, ir.AddConstant(offset)); + break; + // Store + case 43: + ir.Write(IROp::Store32, rt, rs, ir.AddConstant(offset)); + break; + case 41: + ir.Write(IROp::Store16, rt, rs, ir.AddConstant(offset)); + break; + case 40: + ir.Write(IROp::Store8, rt, rs, ir.AddConstant(offset)); + break; + + case 34: //lwl + case 38: //lwr + load = true; + case 42: //swl + case 46: //swr + DISABLE; + break; + default: + Comp_Generic(op); + return; + } + } + + void IRJit::Comp_Cache(MIPSOpcode op) { +// int imm = (s16)(op & 0xFFFF); +// int rs = _RS; +// int addr = R(rs) + imm; + int func = (op >> 16) & 0x1F; + + // It appears that a cache line is 0x40 (64) bytes, loops in games + // issue the cache instruction at that interval. + + // These codes might be PSP-specific, they don't match regular MIPS cache codes very well + switch (func) { + // Icache + case 8: + // Invalidate the instruction cache at this address + DISABLE; + break; + // Dcache + case 24: + // "Create Dirty Exclusive" - for avoiding a cacheline fill before writing to it. + // Will cause garbage on the real machine so we just ignore it, the app will overwrite the cacheline. + break; + case 25: // Hit Invalidate - zaps the line if present in cache. Should not writeback???? scary. + // No need to do anything. + break; + case 27: // D-cube. Hit Writeback Invalidate. Tony Hawk Underground 2 + break; + case 30: // GTA LCS, a lot. Fill (prefetch). Tony Hawk Underground 2 + break; + + default: + DISABLE; + break; + } + } +} diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp new file mode 100644 index 000000000000..d7b807fe6347 --- /dev/null +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -0,0 +1,326 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include "math/math_util.h" + +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSTables.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Common/CPUDetect.h" +#include "Core/Config.h" +#include "Core/Reporting.h" + +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRRegCache.h" + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +namespace MIPSComp { + + void IRJit::Comp_VPFX(MIPSOpcode op) { + CONDITIONAL_DISABLE; + int data = op & 0xFFFFF; + int regnum = (op >> 24) & 3; + switch (regnum) { + case 0: // S + js.prefixS = data; + js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY; + break; + case 1: // T + js.prefixT = data; + js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY; + break; + case 2: // D + js.prefixD = data; + js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY; + break; + default: + ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data); + break; + } + } + + void IRJit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { + if (prefix == 0xE4) + return; + + int n = GetNumVectorElements(sz); + u8 origV[4]; + static const float constantArray[8] = { 0.f, 1.f, 2.f, 0.5f, 3.f, 1.f / 3.f, 0.25f, 1.f / 6.f }; + + for (int i = 0; i < n; i++) + origV[i] = vregs[i]; + + for (int i = 0; i < n; i++) { + int regnum = (prefix >> (i * 2)) & 3; + int abs = (prefix >> (8 + i)) & 1; + int negate = (prefix >> (16 + i)) & 1; + int constants = (prefix >> (12 + i)) & 1; + + // Unchanged, hurray. + if (!constants && regnum == i && !abs && !negate) + continue; + + /* + // This puts the value into a temp reg, so we won't write the modified value back. + vregs[i] = fpr.GetTempV(); + if (!constants) { + fpr.MapDirtyInV(vregs[i], origV[regnum]); + fpr.SpillLockV(vregs[i]); + + // Prefix may say "z, z, z, z" but if this is a pair, we force to x. + // TODO: But some ops seem to use const 0 instead? + if (regnum >= n) { + WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, GetCompilerPC(), MIPSDisasmAt(GetCompilerPC())); + regnum = 0; + } + + if (abs) { + fp.FABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); + if (negate) + fp.FNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); + } else { + if (negate) + fp.FNEG(fpr.V(vregs[i]), fpr.V(origV[regnum])); + else + fp.FMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); + } + } else { + fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT); + fpr.SpillLockV(vregs[i]); + fp.MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs << 2)], SCRATCH1, (bool)negate); + } + */ + } + } + + void IRJit::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); + + GetVectorRegs(regs, sz, vectorReg); + if (js.prefixD == 0) + return; + + int n = GetNumVectorElements(sz); + for (int i = 0; i < n; i++) { + // Hopefully this is rare, we'll just write it into a reg we drop. + //if (js.VfpuWriteMask(i)) + // regs[i] = fpr.GetTempV(); + } + } + + void IRJit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { + _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); + if (!js.prefixD) + return; + + /* + int n = GetNumVectorElements(sz); + for (int i = 0; i < n; i++) { + if (js.VfpuWriteMask(i)) + continue; + + int sat = (js.prefixD >> (i * 2)) & 3; + if (sat == 1) { + // clamped = x < 0 ? (x > 1 ? 1 : x) : x [0, 1] + fpr.MapRegV(vregs[i], MAP_DIRTY); + + fp.MOVI2F(S0, 0.0f, SCRATCH1); + fp.MOVI2F(S1, 1.0f, SCRATCH1); + fp.FMIN(fpr.V(vregs[i]), fpr.V(vregs[i]), S1); + fp.FMAX(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); + } else if (sat == 3) { + // clamped = x < -1 ? (x > 1 ? 1 : x) : x [-1, 1] + fpr.MapRegV(vregs[i], MAP_DIRTY); + + fp.MOVI2F(S0, -1.0f, SCRATCH1); + fp.MOVI2F(S1, 1.0f, SCRATCH1); + fp.FMIN(fpr.V(vregs[i]), fpr.V(vregs[i]), S1); + fp.FMAX(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); + } + } + */ + } + + void IRJit::Comp_SV(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_SVQ(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VVectorInit(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VIdt(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VMatrixInit(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VHdp(MIPSOpcode op) { + DISABLE; + } + + static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f }; + + void IRJit::Comp_Vhoriz(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VDot(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VecDo3(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VV2Op(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vi2f(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vh2f(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vf2i(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Mftv(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vmfvc(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vmtvc(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vmmov(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VScl(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vmmul(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vmscl(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vtfm(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VCrs(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VDet(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vi2x(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vx2i(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_VCrossQuat(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vcmp(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vcmov(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Viim(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vfim(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vcst(MIPSOpcode op) { + DISABLE; + } + + // Very heavily used by FF:CC. Should be replaced by a fast approximation instead of + // calling the math library. + void IRJit::Comp_VRot(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vsgn(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vocp(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_ColorConv(MIPSOpcode op) { + DISABLE; + } + + void IRJit::Comp_Vbfy(MIPSOpcode op) { + DISABLE; + } +} diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp new file mode 100644 index 000000000000..cfcbe4349747 --- /dev/null +++ b/Core/MIPS/IR/IRInst.cpp @@ -0,0 +1,316 @@ +#include "Core/MIPS/IR/IRInst.h" +#include "Core/MemMap.h" + +IRMeta meta[] = { + { IROp::SetConst, "SetConst", "GC" }, + { IROp::Add, "Add", "GGG" }, + { IROp::Sub, "Sub", "GGG" }, + { IROp::Neg, "Neg", "GG" }, + { IROp::Not, "Not", "GG" }, + { IROp::And, "And", "GGG" }, + { IROp::Or, "Or", "GGG" }, + { IROp::Xor, "Xor", "GGG" }, + { IROp::AddConst, "AddConst", "GGC" }, + { IROp::SubConst, "SubConst", "GGC" }, + { IROp::AndConst, "AndConst", "GGC" }, + { IROp::OrConst, "OrConst", "GGC" }, + { IROp::XorConst, "XorConst", "GGC" }, + { IROp::Shl, "Shl", "GGG" }, + { IROp::Shr, "Shr", "GGG" }, + { IROp::Sar, "Sar", "GGG" }, + { IROp::Ror, "Ror", "GGG" }, + { IROp::ShlImm, "ShlImm", "GGI" }, + { IROp::ShrImm, "ShrImm", "GGI" }, + { IROp::SarImm, "SarImm", "GGI" }, + { IROp::RorImm, "RorImm", "GGI" }, + { IROp::Slt, "Slt","GGC" }, + { IROp::SltConst, "SltConst","GGC" }, + { IROp::SltU, "SltU", "GGC" }, + { IROp::SltUConst, "SltUConst", "GGC" }, + { IROp::Clz, "Clz", "GG" }, + { IROp::MovZ, "MovZ", "GGG" }, + { IROp::MovNZ, "MovNZ", "GGG" }, + { IROp::Max, "Max", "GGG" }, + { IROp::Min, "Min", "GGG" }, + { IROp::BSwap16, "BSwap16", "GG" }, + { IROp::BSwap32, "BSwap32", "GG" }, + { IROp::Mul, "Mul", "_GG" }, + { IROp::Ext8to32, "Ext8to32", "GG" }, + { IROp::Ext16to32, "Ext16to32", "GG" }, + { IROp::FAdd, "FAdd", "FFF" }, + { IROp::FSub, "FSub", "FFF" }, + { IROp::FMul, "FMul", "FFF" }, + { IROp::FDiv, "FDiv", "FFF" }, + { IROp::FMov, "FMov", "FF" }, + { IROp::FSqrt, "FSqrt", "FF" }, + { IROp::FNeg, "FNeg", "FF" }, + { IROp::FAbs, "FAbs", "FF" }, + { IROp::FRound, "FRound", "FF" }, + { IROp::FTrunc, "FTrunc", "FF" }, + { IROp::FCeil, "FCeil", "FF" }, + { IROp::FFloor, "FFloor", "FF" }, + { IROp::FCvtWS, "FCvtWS", "FF" }, + { IROp::FCvtSW, "FCvtSW", "FF" }, + { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, + { IROp::FMovToGPR, "FMovToGPR", "GF" }, + { IROp::FpCondToReg, "FpCondToReg", "G" }, + { IROp::SetCtrlVFPU, "SetCtrlVFPU", "T" }, + { IROp::Interpret, "Interpret", "_C" }, + { IROp::Downcount, "Downcount", "_II" }, + { IROp::Syscall, "Syscall", "_C"}, + { IROp::SetPC, "SetPC", "_C"}, +}; + +const IRMeta *metaIndex[256]; + +void InitIR() { + for (size_t i = 0; i < ARRAY_SIZE(meta); i++) { + metaIndex[(int)meta[i].op] = &meta[i]; + } +} + +u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) { + const IRInst *end = inst + count; + while (inst != end) { + switch (inst->op) { + case IROp::SetConst: + mips->r[inst->dest] = constPool[inst->src1]; + break; + case IROp::Add: + mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; + break; + case IROp::Sub: + mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; + break; + case IROp::Neg: + mips->r[inst->dest] = -(s32)mips->r[inst->src1]; + break; + case IROp::Ext8to32: + mips->r[inst->dest] = (s32)(s8)mips->r[inst->src1]; + break; + case IROp::Ext16to32: + mips->r[inst->dest] = (s32)(s16)mips->r[inst->src1]; + break; + + case IROp::Load8: + mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load8Ext: + mips->r[inst->dest] = (s32)(s8)Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load16: + mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load16Ext: + mips->r[inst->dest] = (s32)(s16)Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load32: + mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + constPool[inst->src2]); + break; + + case IROp::Store8: + Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Store16: + Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Store32: + Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + + case IROp::ShlImm: + mips->r[inst->dest] = mips->r[inst->src1] << inst->src2; + break; + case IROp::ShrImm: + mips->r[inst->dest] = mips->r[inst->src1] >> inst->src2; + break; + case IROp::SarImm: + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> inst->src2; + break; + case IROp::RorImm: + { + u32 x = mips->r[inst->src1]; + int sa = inst->src2; + mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); + } + break; + + case IROp::Shl: + mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31); + break; + case IROp::Shr: + mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31); + break; + case IROp::Sar: + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31); + break; + case IROp::Ror: + { + u32 x = mips->r[inst->src1]; + int sa = mips->r[inst->src2] & 31; + mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); + } + break; + + case IROp::MovZ: + if (mips->r[inst->src1] == 0) + mips->r[inst->dest] = mips->r[inst->src2]; + break; + case IROp::MovNZ: + if (mips->r[inst->src1] != 0) + mips->r[inst->dest] = mips->r[inst->src2]; + break; + + case IROp::Max: + mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; + break; + case IROp::Min: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; + break; + + case IROp::BSwap16: + { + u32 x = mips->r[inst->src1]; + mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8); + break; + } + case IROp::BSwap32: + mips->r[inst->dest] = swap32(mips->r[inst->src1]); + break; + + case IROp::FAdd: + mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2]; + break; + case IROp::FSub: + mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2]; + break; + case IROp::FMul: + mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2]; + break; + case IROp::FDiv: + mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2]; + break; + + case IROp::FMov: + mips->f[inst->dest] = mips->f[inst->src1]; + break; + case IROp::FAbs: + mips->f[inst->dest] = fabsf(mips->f[inst->src1]); + break; + case IROp::FSqrt: + mips->f[inst->dest] = sqrtf(mips->f[inst->src1]); + break; + case IROp::FNeg: + mips->f[inst->dest] = -mips->f[inst->src1]; + break; + case IROp::FpCondToReg: + mips->r[inst->dest] = mips->fpcond; + break; + + case IROp::ExitToConst: + return constPool[inst->src1]; + + case IROp::ExitToReg: + return mips->r[inst->src1]; + + case IROp::ExitToConstIfEq: + if (mips->r[inst->src1] == mips->r[inst->src2]) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfNeq: + if (mips->r[inst->src1] != mips->r[inst->src2]) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfGtZ: + if ((s32)mips->r[inst->src1] > 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfGeZ: + if ((s32)mips->r[inst->src1] >= 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfLtZ: + if ((s32)mips->r[inst->src1] < 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfLeZ: + if ((s32)mips->r[inst->src1] <= 0) + return constPool[inst->dest]; + break; + + case IROp::SetPC: + return mips->pc = mips->r[inst->src1]; + + default: + Crash(); + } + inst++; + } + + // If we got here, the block was badly constructed. + // Crash(); + return 0; +} + +void IRWriter::Write(IROp op, u8 dst, u8 src1, u8 src2) { + IRInst inst; + inst.op = op; + inst.dest = dst; + inst.src1 = src1; + inst.src2 = src2; + insts_.push_back(inst); +} + +void IRWriter::WriteSetConstant(u8 dst, u32 value) { + // TODO: Check for the fixed ones first. + Write(IROp::SetConstImm, AddConstant(value)); +} + +int IRWriter::AddConstant(u32 value) { + for (size_t i = 0; i < constPool_.size(); i++) { + if (constPool_[i] == value) + return i; + } + constPool_.push_back(value); + return (int)constPool_.size() - 1; +} + +int IRWriter::AddConstantFloat(float value) { + u32 val; + memcpy(&val, &value, 4); + return AddConstant(val); +} + +void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *constPool) { + switch (type) { + case 'G': + snprintf(buf, bufSize, "r%d", param); + break; + case 'F': + snprintf(buf, bufSize, "r%d", param); + break; + case 'C': + snprintf(buf, bufSize, "%08x", constPool[param]); + break; + default: + snprintf(buf, bufSize, "?"); + break; + } +} + +void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool) { + const IRMeta *meta = metaIndex[(int)inst.op]; + char bufDst[16]; + char bufSrc1[16]; + char bufSrc2[16]; + DisassembleParam(bufDst, sizeof(bufDst) - 2, inst.dest, meta->types[0], constPool); + DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.dest, meta->types[1], constPool); + DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.dest, meta->types[2], constPool); + if (meta->types[1]) { + strcat(bufDst, ", "); + } + if (meta->types[2]) { + strcat(bufSrc1, ", "); + } + snprintf(buf, bufsize, "%s %s%s%s", meta->name, bufDst, bufSrc1, bufSrc2); +} \ No newline at end of file diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h new file mode 100644 index 000000000000..a1aa75edc7cf --- /dev/null +++ b/Core/MIPS/IR/IRInst.h @@ -0,0 +1,260 @@ +#pragma once + +#include + +#include "Common/CommonTypes.h" +#include "Core/MIPS/MIPS.h" + +// Basic IR +// +// This IR refers implicitly to the MIPS register set and is simple to interpret. +// To do real compiler things with it and do full-function compilation, it probably +// needs to be lifted to a higher IR first, before being lowered onto each target. +// But this gets rid of a lot of MIPS idiosyncrasies that makes it tricky, like +// delay slots, and is very suitable for translation into other IRs. Can of course +// even be directly JIT-ed, but the gains will probably be tiny over our older direct +// MIPS->target JITs. + +enum class IROp : u8 { + SetConst, + SetConstImm, + FSetConst, + + Mov, + + Add, + Sub, + Neg, + Not, + + And, + Or, + Xor, + + AddConst, + SubConst, + + AndConst, + OrConst, + XorConst, + + Shl, + Shr, + Sar, + Ror, + + // The shift is stored directly, not in the const table, so Imm instead of Const + ShlImm, + ShrImm, + SarImm, + RorImm, + + Slt, + SltConst, + SltU, + SltUConst, + + Clz, + + // Conditional moves + MovZ, + MovNZ, + + Max, + Min, + + // Byte swaps. All CPUs have native ones so worth keeping. + BSwap16, // Swaps both the high and low byte pairs. + BSwap32, + + // Hi/Lo semantics preserved. + Mul, + MulU, + Madd, + MaddU, + Msub, + MsubU, + + // These take a constant from the pool as an offset. + // Loads from a constant address can be represented by using r0. + Load8, + Load8Ext, + Load16, + Load16Ext, + Load32, + LoadFloat, + + Store8, + Store16, + Store32, + StoreFloat, + + Ext8to32, + Ext16to32, + + FAdd, + FSub, + FMul, + FDiv, + + FMov, + FSqrt, + FNeg, + FAbs, + + FRound, + FTrunc, + FCeil, + FFloor, + + FCvtWS, + FCvtSW, + + FMovFromGPR, + FMovToGPR, + + FpCondToReg, + VfpCondToReg, + + FCmpUnordered, + FCmpEqual, + FCmpEqualUnordered, + FCmpLessOrdered, + FCmpLessUnordered, + FCmpLessEqualOrdered, + FCmpLessEqualUnordered, + + // Rounding Mode + RestoreRoundingMode, + ApplyRoundingMode, + UpdateRoundingMode, + + SetCtrlVFPU, + + // Fake/System instructions + Interpret, + + // Emit this before you exits. Semantic is to set the downcount + // that will be used at the actual exit. + Downcount, // src1 + (src2<<8) + + // End-of-basic-block. + ExitToConst, // 0, const, downcount + ExitToReg, + ExitToConstIfEq, // const, reg1, reg2 + ExitToConstIfNeq, // const, reg1, reg2 + ExitToConstIfGtZ, // const, reg1, 0 + ExitToConstIfGeZ, // const, reg1, 0 + ExitToConstIfLtZ, // const, reg1, 0 + ExitToConstIfLeZ, // const, reg1, 0 + + ExitToConstIfFpTrue, + ExitToConstIfFpFalse, + + Syscall, + SetPC, // hack to make syscall returns work + Break, +}; + +enum IRComparison { + Greater, + GreaterEqual, + Less, + LessEqual, + Equal, + NotEqual, + Bad, +}; + +// Hm, unused +inline IRComparison Invert(IRComparison comp) { + switch (comp) { + case IRComparison::Equal: return IRComparison::NotEqual; + case IRComparison::NotEqual: return IRComparison::Equal; + case IRComparison::Greater: return IRComparison::LessEqual; + case IRComparison::GreaterEqual: return IRComparison::Less; + case IRComparison::Less: return IRComparison::GreaterEqual; + case IRComparison::LessEqual: return IRComparison::Greater; + default: + return IRComparison::Bad; + } +} + +inline IROp ComparisonToExit(IRComparison comp) { + switch (comp) { + case IRComparison::Equal: return IROp::ExitToConstIfEq; + case IRComparison::NotEqual: return IROp::ExitToConstIfNeq; + case IRComparison::Greater: return IROp::ExitToConstIfGtZ; + case IRComparison::GreaterEqual: return IROp::ExitToConstIfGeZ; + case IRComparison::Less: return IROp::ExitToConstIfLtZ; + case IRComparison::LessEqual: return IROp::ExitToConstIfLeZ; + default: + return IROp::Break; + } +} + +enum { + IRTEMP_0 = 192, + IRTEMP_1, + IRTEMP_2, + IRTEMP_3, + + // Hacky way to get to other state + IRREG_LO = 226, // offset of lo in MIPSState / 4 + IRREG_HI = 227, +}; + +enum class IRParam { + Ignore = '_', + UImm8 = 'U', + Const = 'C', + GPR = 'G', + FPR = 'F', + VPR = 'V', + VCtrl = 'T', +}; + +struct IRMeta { + IROp op; + const char *name; + const char types[4]; // GGG + u32 flags; +}; + +// 32 bits. +struct IRInst { + IROp op; + union { + u8 dest; + u8 src3; + }; + u8 src1; + u8 src2; +}; + +// Returns the new PC. +u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count); + +// Each IR block gets a constant pool. +class IRWriter { +public: + void Write(IROp op, u8 dst = 0, u8 src1 = 0, u8 src2 = 0); + void WriteSetConstant(u8 dst, u32 value); + + int AddConstant(u32 value); + int AddConstantFloat(float value); + + void Clear() { + insts_.clear(); + constPool_.clear(); + } + + const std::vector &GetInstructions() { return insts_; } + const std::vector &GetConstants() { return constPool_; } + +private: + std::vector insts_; + std::vector constPool_; +}; + +void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp new file mode 100644 index 000000000000..b16706b2d68e --- /dev/null +++ b/Core/MIPS/IR/IRJit.cpp @@ -0,0 +1,333 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/logging.h" +#include "profiler/profiler.h" +#include "Common/ChunkFile.h" +#include "Common/CPUDetect.h" +#include "Common/StringUtils.h" + +#include "Core/Reporting.h" +#include "Core/Config.h" +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/Debugger/SymbolMap.h" +#include "Core/MemMap.h" + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSInt.h" +#include "Core/MIPS/MIPSTables.h" +#include "Core/HLE/ReplaceTables.h" +#include "Core/HLE/sceKernelMemory.h" +#include "Core/MIPS/IR/IRRegCache.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/JitCommon/JitCommon.h" + +void DisassembleArm64Print(const u8 *data, int size) { + std::vector lines = DisassembleArm64(data, size); + for (auto s : lines) { + ILOG("%s", s.c_str()); + } + /* + ILOG("+++"); + // A format friendly to Online Disassembler which gets endianness wrong + for (size_t i = 0; i < lines.size(); i++) { + uint32_t opcode = ((const uint32_t *)data)[i]; + ILOG("%d/%d: %08x", (int)(i+1), (int)lines.size(), swap32(opcode)); + } + ILOG("==="); + ILOG("===");*/ +} + +namespace MIPSComp +{ + +IRJit::IRJit(MIPSState *mips) : gpr(), mips_(mips) { + logBlocks = 0; + dontLogBlocks = 0; + js.startDefaultPrefix = mips_->HasDefaultPrefix(); + js.currentRoundingFunc = convertS0ToSCRATCH1[0]; + u32 size = 128 * 1024; + blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); +} + +IRJit::~IRJit() { +} + +void IRJit::DoState(PointerWrap &p) { + auto s = p.Section("Jit", 1, 2); + if (!s) + return; + + p.Do(js.startDefaultPrefix); + if (s >= 2) { + p.Do(js.hasSetRounding); + js.lastSetRounding = 0; + } else { + js.hasSetRounding = 1; + } + + if (p.GetMode() == PointerWrap::MODE_READ) { + js.currentRoundingFunc = convertS0ToSCRATCH1[(mips_->fcr31) & 3]; + } +} + +// This is here so the savestate matches between jit and non-jit. +void IRJit::DoDummyState(PointerWrap &p) { + auto s = p.Section("Jit", 1, 2); + if (!s) + return; + + bool dummy = false; + p.Do(dummy); + if (s >= 2) { + dummy = true; + p.Do(dummy); + } +} + +void IRJit::FlushAll() { + FlushPrefixV(); +} + +void IRJit::FlushPrefixV() { + if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) { + ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_SPREFIX, ir.AddConstant(js.prefixS)); + js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) { + ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_TPREFIX, ir.AddConstant(js.prefixT)); + js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) { + ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_DPREFIX, ir.AddConstant(js.prefixD)); + js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY); + } +} + +void IRJit::ClearCache() { + ILOG("ARM64Jit: Clearing the cache!"); + blocks_.Clear(); +} + +void IRJit::InvalidateCache() { + blocks_.Clear(); +} + +void IRJit::InvalidateCacheAt(u32 em_address, int length) { + blocks_.InvalidateICache(em_address, length); +} + +void IRJit::EatInstruction(MIPSOpcode op) { + MIPSInfo info = MIPSGetInfo(op); + if (info & DELAYSLOT) { + ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op."); + } + if (js.inDelaySlot) { + ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot."); + } + + js.numInstructions++; + js.compilerPC += 4; + js.downcountAmount += MIPSGetInstructionCycleEstimate(op); +} + +void IRJit::CompileDelaySlot() { + js.inDelaySlot = true; + MIPSOpcode op = GetOffsetInstruction(1); + MIPSCompileOp(op, this); + js.inDelaySlot = false; +} + +void IRJit::Compile(u32 em_address) { + PROFILE_THIS_SCOPE("jitc"); + + int block_num = blocks_.AllocateBlock(em_address); + IRBlock *b = blocks_.GetBlock(block_num); + DoJit(em_address, b); + + bool cleanSlate = false; + + if (js.hasSetRounding && !js.lastSetRounding) { + WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); + // Won't loop, since hasSetRounding is only ever set to 1. + js.lastSetRounding = js.hasSetRounding; + cleanSlate = true; + } + + // Drat. The VFPU hit an uneaten prefix at the end of a block. + if (js.startDefaultPrefix && js.MayHavePrefix()) { + WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); + js.LogPrefix(); + + // Let's try that one more time. We won't get back here because we toggled the value. + js.startDefaultPrefix = false; + // TODO ARM64: This crashes. + //cleanSlate = true; + } + + if (cleanSlate) { + // Our assumptions are all wrong so it's clean-slate time. + ClearCache(); + Compile(em_address); + } +} + +void IRJit::RunLoopUntil(u64 globalticks) { + PROFILE_THIS_SCOPE("jit"); + ((void (*)())enterDispatcher)(); +} + +u32 IRJit::GetCompilerPC() { + return js.compilerPC; +} + +MIPSOpcode IRJit::GetOffsetInstruction(int offset) { + return Memory::Read_Instruction(GetCompilerPC() + 4 * offset); +} + +void IRJit::DoJit(u32 em_address, IRBlock *b) { + js.cancel = false; + js.blockStart = mips_->pc; + js.compilerPC = mips_->pc; + js.lastContinuedPC = 0; + js.initialBlockSize = 0; + js.nextExit = 0; + js.downcountAmount = 0; + js.curBlock = nullptr; + js.compiling = true; + js.inDelaySlot = false; + js.PrefixStart(); + ir.Clear(); + + gpr.Start(&ir); + + int partialFlushOffset = 0; + + js.numInstructions = 0; + while (js.compiling) { + MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC()); + js.downcountAmount += MIPSGetInstructionCycleEstimate(inst); + MIPSCompileOp(inst, this); + js.compilerPC += 4; + js.numInstructions++; + } + + b->SetInstructions(ir.GetInstructions(), ir.GetConstants()); + + char temp[256]; + if (logBlocks > 0 && dontLogBlocks == 0) { + ILOG("=============== mips %d ===============", blocks_.GetNumBlocks()); + for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { + MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true); + ILOG("M: %08x %s", cpc, temp); + } + } + + if (logBlocks > 0 && dontLogBlocks == 0) { + ILOG("=============== IR (%d instructions) ===============", js.numInstructions); + for (int i = 0; i < js.numInstructions; i++) { + char buf[256]; + DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); + ILOG("%s", buf); + } + } + + if (logBlocks > 0) + logBlocks--; + if (dontLogBlocks > 0) + dontLogBlocks--; +} + +bool IRJit::DescribeCodePtr(const u8 *ptr, std::string &name) { + // Used in disassembly viewer. + return false; +} + +void IRJit::Comp_RunBlock(MIPSOpcode op) { + // This shouldn't be necessary, the dispatcher should catch us before we get here. + ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); +} + +void IRJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) { + Crash(); +} + +void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) { + Crash(); +} + +bool IRJit::ReplaceJalTo(u32 dest) { + Crash(); + return false; +} + +void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { + Crash(); +} + +void IRJit::Comp_Generic(MIPSOpcode op) { + ir.Write(IROp::Interpret, ir.AddConstant(op.encoding)); + const MIPSInfo info = MIPSGetInfo(op); + if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) { + // If it does eat them, it'll happen in MIPSCompileOp(). + if ((info & OUT_EAT_PREFIX) == 0) + js.PrefixUnknown(); + } +} + +// Destroys SCRATCH2 +void IRJit::RestoreRoundingMode(bool force) { + // If the game has never set an interesting rounding mode, we can safely skip this. + if (force || js.hasSetRounding) { + ir.Write(IROp::RestoreRoundingMode); + } +} + +// Destroys SCRATCH1 and SCRATCH2 +void IRJit::ApplyRoundingMode(bool force) { + // If the game has never set an interesting rounding mode, we can safely skip this. + if (force || js.hasSetRounding) { + ir.Write(IROp::ApplyRoundingMode); + } +} + +// Destroys SCRATCH1 and SCRATCH2 +void IRJit::UpdateRoundingMode() { + ir.Write(IROp::UpdateRoundingMode); +} + +void IRJit::Comp_DoNothing(MIPSOpcode op) { +} + +int IRJit::Replace_fabsf() { + Crash(); + return 0; +} + +void IRBlockCache::Clear() { + blocks_.clear(); +} + +void IRBlockCache::InvalidateICache(u32 addess, u32 length) { + // TODO +} + +} // namespace MIPSComp \ No newline at end of file diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h new file mode 100644 index 000000000000..686eefe6c274 --- /dev/null +++ b/Core/MIPS/IR/IRJit.h @@ -0,0 +1,276 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "Common/CPUDetect.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/MIPS/JitCommon/JitBlockCache.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/IR/IRRegCache.h" +#include "Core/MIPS/IR/IRInst.h" +#include "Core/MIPS/MIPSVFPUUtils.h" + +#ifndef offsetof +#include "stddef.h" +#endif + +namespace MIPSComp { + +// TODO : Use arena allocators. For now let's just malloc. +class IRBlock { +public: + IRBlock() {} + IRBlock(u32 emAddr) : instr_(nullptr), const_(nullptr), origAddr_(emAddr), numInstructions_(0) {} + ~IRBlock() { + delete[] instr_; + delete[] const_; + } + + void SetInstructions(const std::vector &inst, const std::vector &constants) { + instr_ = new IRInst[inst.size()]; + numInstructions_ = (u16)inst.size(); + memcpy(instr_, inst.data(), sizeof(IRInst) * inst.size()); + const_ = new u32[constants.size()]; + numConstants_ = (u16)constants.size(); + memcpy(const_, constants.data(), sizeof(u32) * constants.size()); + } + +private: + IRInst *instr_; + u32 *const_; + u16 numInstructions_; + u16 numConstants_; + u32 origAddr_; +}; + +class IRBlockCache { +public: + void Clear(); + void InvalidateICache(u32 addess, u32 length); + int GetNumBlocks() const { return (int)blocks_.size(); } + int AllocateBlock(int emAddr) { + blocks_.emplace_back(IRBlock(emAddr)); + return (int)blocks_.size() - 1; + } + IRBlock *GetBlock(int i) { + return &blocks_[i]; + } +private: + std::vector blocks_; +}; + +class IRJit : public JitInterface { +public: + IRJit(MIPSState *mips); + virtual ~IRJit(); + + void DoState(PointerWrap &p) override; + void DoDummyState(PointerWrap &p) override; + + const JitOptions &GetJitOptions() { return jo; } + + // Compiled ops should ignore delay slots + // the compiler will take care of them by itself + // OR NOT + void Comp_Generic(MIPSOpcode op) override; + + void RunLoopUntil(u64 globalticks) override; + + void Compile(u32 em_address) override; // Compiles a block at current MIPS PC + void DoJit(u32 em_address, IRBlock *b); + + bool DescribeCodePtr(const u8 *ptr, std::string &name) override; + + void Comp_RunBlock(MIPSOpcode op) override; + void Comp_ReplacementFunc(MIPSOpcode op) override; + + // Ops + void Comp_ITypeMem(MIPSOpcode op) override; + void Comp_Cache(MIPSOpcode op) override; + + void Comp_RelBranch(MIPSOpcode op) override; + void Comp_RelBranchRI(MIPSOpcode op) override; + void Comp_FPUBranch(MIPSOpcode op) override; + void Comp_FPULS(MIPSOpcode op) override; + void Comp_FPUComp(MIPSOpcode op) override; + void Comp_Jump(MIPSOpcode op) override; + void Comp_JumpReg(MIPSOpcode op) override; + void Comp_Syscall(MIPSOpcode op) override; + void Comp_Break(MIPSOpcode op) override; + + void Comp_IType(MIPSOpcode op) override; + void Comp_RType2(MIPSOpcode op) override; + void Comp_RType3(MIPSOpcode op) override; + void Comp_ShiftType(MIPSOpcode op) override; + void Comp_Allegrex(MIPSOpcode op) override; + void Comp_Allegrex2(MIPSOpcode op) override; + void Comp_VBranch(MIPSOpcode op) override; + void Comp_MulDivType(MIPSOpcode op) override; + void Comp_Special3(MIPSOpcode op) override; + + void Comp_FPU3op(MIPSOpcode op) override; + void Comp_FPU2op(MIPSOpcode op) override; + void Comp_mxc1(MIPSOpcode op) override; + + void Comp_DoNothing(MIPSOpcode op) override; + + void Comp_SV(MIPSOpcode op) override; + void Comp_SVQ(MIPSOpcode op) override; + void Comp_VPFX(MIPSOpcode op) override; + void Comp_VVectorInit(MIPSOpcode op) override; + void Comp_VMatrixInit(MIPSOpcode op) override; + void Comp_VDot(MIPSOpcode op) override; + void Comp_VecDo3(MIPSOpcode op) override; + void Comp_VV2Op(MIPSOpcode op) override; + void Comp_Mftv(MIPSOpcode op) override; + void Comp_Vmfvc(MIPSOpcode op) override; + void Comp_Vmtvc(MIPSOpcode op) override; + void Comp_Vmmov(MIPSOpcode op) override; + void Comp_VScl(MIPSOpcode op) override; + void Comp_Vmmul(MIPSOpcode op) override; + void Comp_Vmscl(MIPSOpcode op) override; + void Comp_Vtfm(MIPSOpcode op) override; + void Comp_VHdp(MIPSOpcode op) override; + void Comp_VCrs(MIPSOpcode op) override; + void Comp_VDet(MIPSOpcode op) override; + void Comp_Vi2x(MIPSOpcode op) override; + void Comp_Vx2i(MIPSOpcode op) override; + void Comp_Vf2i(MIPSOpcode op) override; + void Comp_Vi2f(MIPSOpcode op) override; + void Comp_Vh2f(MIPSOpcode op) override; + void Comp_Vcst(MIPSOpcode op) override; + void Comp_Vhoriz(MIPSOpcode op) override; + void Comp_VRot(MIPSOpcode op) override; + void Comp_VIdt(MIPSOpcode op) override; + void Comp_Vcmp(MIPSOpcode op) override; + void Comp_Vcmov(MIPSOpcode op) override; + void Comp_Viim(MIPSOpcode op) override; + void Comp_Vfim(MIPSOpcode op) override; + void Comp_VCrossQuat(MIPSOpcode op) override; + void Comp_Vsgn(MIPSOpcode op) override; + void Comp_Vocp(MIPSOpcode op) override; + void Comp_ColorConv(MIPSOpcode op) override; + void Comp_Vbfy(MIPSOpcode op) override; + + int Replace_fabsf(); + + // Not using a regular block cache. + JitBlockCache *GetBlockCache() { return nullptr; } + + void ClearCache(); + void InvalidateCache(); + void InvalidateCacheAt(u32 em_address, int length = 4); + + void EatPrefix() { js.EatPrefix(); } + + const u8 *GetDispatcher() const override { + return dispatcher; + } + + void LinkBlock(u8 *exitPoint, const u8 *checkedEntry) override; + void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) override; + +private: + void FlushAll(); + void FlushPrefixV(); + + u32 GetCompilerPC(); + void CompileDelaySlot(); + void EatInstruction(MIPSOpcode op); + MIPSOpcode GetOffsetInstruction(int offset); + + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void UpdateRoundingMode(); + + bool ReplaceJalTo(u32 dest); + + // Utility compilation functions + void BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely); + void BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely); + void BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely); + void BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely); + + // Utilities to reduce duplicated code + void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, IROp op); + void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp constOp, bool symmetric = false); + void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa); + void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst); + + void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); + void ApplyPrefixD(const u8 *vregs, VectorSize sz); + void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixS, sz); + } + void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixT, sz); + } + void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); + + // Utils + void Comp_ITypeMemLR(MIPSOpcode op, bool load); + + JitOptions jo; + JitState js; + + IRBlockCache blocks_; + + IRRegCache gpr; + // Arm64RegCacheFPU fpr; + + MIPSState *mips_; + + int dontLogBlocks; + int logBlocks; + + IRWriter ir; + + // where to write branch-likely trampolines + u32 blTrampolines_; + int blTrampolineCount_; + +public: + // Code pointers + const u8 *enterDispatcher; + + const u8 *outerLoop; + const u8 *outerLoopPCInSCRATCH1; + const u8 *dispatcherCheckCoreState; + const u8 *dispatcherPCInSCRATCH1; + const u8 *dispatcher; + const u8 *dispatcherNoCheck; + + const u8 *breakpointBailout; + + const u8 *saveStaticRegisters; + const u8 *loadStaticRegisters; + + const u8 *restoreRoundingMode; + const u8 *applyRoundingMode; + const u8 *updateRoundingMode; + + // Indexed by FPCR FZ:RN bits for convenience. Uses SCRATCH2. + const u8 *convertS0ToSCRATCH1[8]; +}; + +} // namespace MIPSComp + diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp new file mode 100644 index 000000000000..7a31a463e4e5 --- /dev/null +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -0,0 +1,46 @@ +#include "Core/MIPS/IR/IRRegCache.h" +#include "Core/MIPS/IR/IRInst.h" + +void IRRegCache::Dirty(MIPSGPReg rd) { + if (rd == 0) { + return; + } + if (reg_[rd].isImm) { + ir_->WriteSetConstant(rd, reg_[rd].immVal); + reg_[rd].isImm = false; + } +} + +void IRRegCache::MapIn(MIPSGPReg rd) { + Dirty(rd); +} + +void IRRegCache::MapInIn(MIPSGPReg rs, MIPSGPReg rt) { + Dirty(rs); + Dirty(rt); +} + +void IRRegCache::MapDirty(MIPSGPReg rd) { + Dirty(rd); +} + +void IRRegCache::MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs) { + Dirty(rd); + Dirty(rs); +} + +void IRRegCache::MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt) { + Dirty(rd); + Dirty(rs); + Dirty(rt); +} + +void IRRegCache::Start(IRWriter *ir) { + memset(®_, 0, sizeof(reg_)); + reg_[0].isImm = true; + ir_ = ir; +} + +void IRRegCache::FlushAll() { + +} diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h new file mode 100644 index 000000000000..bf53e2a818f0 --- /dev/null +++ b/Core/MIPS/IR/IRRegCache.h @@ -0,0 +1,43 @@ +#pragma once + +// IRRegCache is only to perform pre-constant folding. This is worth it to get cleaner +// IR. + +#include "Common/CommonTypes.h" +#include "Core/MIPS/MIPS.h" + +enum { + TOTAL_MAPPABLE_MIPSREGS = 256, +}; + +struct RegIR { + bool isImm; + u32 immVal; +}; + +class IRWriter; + +class IRRegCache { +public: + void SetImm(MIPSGPReg r, u32 immVal) { + reg_[r].isImm = true; + reg_[r].immVal = immVal; + } + + bool IsImm(MIPSGPReg r) const { return reg_[r].isImm; } + u32 GetImm(MIPSGPReg r) const { return reg_[r].immVal; } + + void MapIn(MIPSGPReg rd); + void MapInIn(MIPSGPReg rs, MIPSGPReg rt); + void MapDirty(MIPSGPReg rd); + void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs); + void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt); + + void Start(IRWriter *ir); + void FlushAll(); + +private: + void Dirty(MIPSGPReg rd); + RegIR reg_[TOTAL_MAPPABLE_MIPSREGS]; + IRWriter *ir_; +}; diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index 91ed4c10d625..0b7fc1932244 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -22,9 +22,11 @@ #include "Common/StringUtils.h" #include "Core/Util/DisArm64.h" +#include "Core/Config.h" #include "Core/MIPS/JitCommon/JitCommon.h" #include "Core/MIPS/JitCommon/JitState.h" +#include "Core/MIPS/IR/IRJit.h" #if defined(ARM) #include "../ARM/ArmJit.h" @@ -45,17 +47,21 @@ namespace MIPSComp { } JitInterface *CreateNativeJit(MIPSState *mips) { + if (false && g_Config.iCpuCore == (int)CPUCore::CPU_JIT) { #if defined(ARM) - return new MIPSComp::ArmJit(mips); + return new MIPSComp::ArmJit(mips); #elif defined(ARM64) - return new MIPSComp::Arm64Jit(mips); + return new MIPSComp::IRJit(mips); #elif defined(_M_IX86) || defined(_M_X64) - return new MIPSComp::Jit(mips); + return new MIPSComp::Jit(mips); #elif defined(MIPS) - return new MIPSComp::MipsJit(mips); + return new MIPSComp::MipsJit(mips); #else - return new MIPSComp::FakeJit(mips); + return new MIPSComp::FakeJit(mips); #endif + } else if (true || g_Config.iCpuCore == (int)CPUCore::CPU_IRJIT) { + return new MIPSComp::IRJit(mips); + } } } diff --git a/Core/MIPS/MIPS.h b/Core/MIPS/MIPS.h index a24c2f3d8b81..bbc9952c4dc1 100644 --- a/Core/MIPS/MIPS.h +++ b/Core/MIPS/MIPS.h @@ -166,6 +166,10 @@ class MIPSState float v[128]; u32 vi[128]; }; + // Used for temporary variables by IR Interpreter. + // Can be indexed through r[] using indices 192+. + u32 t[16]; + // Temps don't get flushed so we don't reserve space for them. // If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code. u32 vfpuCtrl[16]; @@ -177,7 +181,7 @@ class MIPSState struct { u32 pc; - u32 lo; + u32 lo; // offset 192 + 16 + 16 + 1 + 1 u32 hi; u32 fcr31; //fpu control register From 4acf85aa06ffbfdc54a936993aa4e310b0a75367 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 7 May 2016 17:37:19 +0200 Subject: [PATCH 02/77] It's kind of starting to run --- Core/Core.vcxproj | 2 + Core/Core.vcxproj.filters | 6 ++ Core/MIPS/IR/IRCompALU.cpp | 39 +++++---- Core/MIPS/IR/IRCompBranch.cpp | 42 ++++++--- Core/MIPS/IR/IRCompFPU.cpp | 2 +- Core/MIPS/IR/IRCompLoadStore.cpp | 3 +- Core/MIPS/IR/IRInst.cpp | 146 +++++++++++++++++++++++++++---- Core/MIPS/IR/IRInst.h | 6 +- Core/MIPS/IR/IRJit.cpp | 77 +++++++++++----- Core/MIPS/IR/IRJit.h | 23 ++++- Core/MIPS/IR/IRPassSimplify.cpp | 14 +++ Core/MIPS/IR/IRPassSimplify.h | 5 ++ Core/MIPS/IR/IRRegCache.cpp | 4 +- Core/MIPS/JitCommon/JitCommon.h | 1 + Core/MIPS/x86/Jit.cpp | 10 +++ Core/MIPS/x86/Jit.h | 1 + Core/MemMap.cpp | 8 +- 17 files changed, 309 insertions(+), 80 deletions(-) create mode 100644 Core/MIPS/IR/IRPassSimplify.cpp create mode 100644 Core/MIPS/IR/IRPassSimplify.h diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index af4b459fb1b9..e902adf7332d 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -189,6 +189,7 @@ + @@ -518,6 +519,7 @@ + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 99af2a2696fe..5905d62de115 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -664,6 +664,9 @@ MIPS\IR + + MIPS\IR + @@ -1218,6 +1221,9 @@ MIPS\IR + + MIPS\IR + diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 67059e371e5e..69cf25de5604 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -49,15 +49,15 @@ namespace MIPSComp { void IRJit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, IROp OP) { if (gpr.IsImm(rs)) { switch (OP) { - case IROp::AddConst: gpr.SetImm(rt, rs + uimm); break; - case IROp::SubConst: gpr.SetImm(rt, rs - uimm); break; - case IROp::AndConst: gpr.SetImm(rt, rs & uimm); break; - case IROp::OrConst: gpr.SetImm(rt, rs | uimm); break; - case IROp::XorConst: gpr.SetImm(rt, rs ^ uimm); break; + case IROp::AddConst: gpr.SetImm(rt, gpr.GetImm(rs) + uimm); break; + case IROp::SubConst: gpr.SetImm(rt, gpr.GetImm(rs) - uimm); break; + case IROp::AndConst: gpr.SetImm(rt, gpr.GetImm(rs) & uimm); break; + case IROp::OrConst: gpr.SetImm(rt, gpr.GetImm(rs) | uimm); break; + case IROp::XorConst: gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm); break; } } else { gpr.MapDirtyIn(rt, rs); - ir.Write(OP, rt, ir.AddConstant(uimm)); + ir.Write(OP, rt, rs, ir.AddConstant(uimm)); } } @@ -95,8 +95,7 @@ void IRJit::Comp_IType(MIPSOpcode op) { break; } gpr.MapDirtyIn(rt, rs); - // Grab the sign bit (< 0) as 1/0. Slightly faster than a shift. - ir.Write(IROp::Slt, rt, rs, ir.AddConstant(simm)); + ir.Write(IROp::SltConst, rt, rs, ir.AddConstant(simm)); break; case 11: // R(rt) = R(rs) < suimm; break; //sltiu @@ -105,7 +104,7 @@ void IRJit::Comp_IType(MIPSOpcode op) { break; } gpr.MapDirtyIn(rt, rs); - ir.Write(IROp::SltU, rt, rs, ir.AddConstant(suimm)); + ir.Write(IROp::SltUConst, rt, rs, ir.AddConstant(suimm)); break; case 15: // R(rt) = uimm << 16; //lui @@ -167,6 +166,7 @@ void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp co // Luckily, it was just an imm. gpr.SetImm(rhs, rhsImm); } + return; } // Can't do the RSB optimization on ARM64 - no RSB! @@ -220,10 +220,17 @@ void IRJit::Comp_RType3(MIPSOpcode op) { case 39: // R(rd) = ~(R(rs) | R(rt)); break; //nor if (gpr.IsImm(rs) && gpr.IsImm(rt)) { gpr.SetImm(rd, ~(gpr.GetImm(rs) | gpr.GetImm(rt))); - } - - ir.Write(IROp::Or, IRTEMP_0, rs, rt); - ir.Write(IROp::Not, rd, IRTEMP_0); + } else { + gpr.MapDirtyInIn(rd, rs, rt); + if (rs == 0) { + ir.Write(IROp::Not, rd, rt); + } else if (rt == 0) { + ir.Write(IROp::Not, rd, rs); + } else { + ir.Write(IROp::Or, IRTEMP_0, rs, rt); + ir.Write(IROp::Not, rd, IRTEMP_0); + } + } break; case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt @@ -323,9 +330,9 @@ void IRJit::Comp_ShiftType(MIPSOpcode op) { // WARNING : ROTR switch (op & 0x3f) { - case 0: CompShiftImm(op, IROp::Shl, sa); break; //sll - case 2: CompShiftImm(op, rs == 1 ? IROp::Ror : IROp::Shr, sa); break; //srl - case 3: CompShiftImm(op, IROp::Sar, sa); break; //sra + case 0: CompShiftImm(op, IROp::ShlImm, sa); break; //sll + case 2: CompShiftImm(op, (rs == 1 ? IROp::RorImm : IROp::ShrImm), sa); break; //srl + case 3: CompShiftImm(op, IROp::SarImm, sa); break; //sra case 4: CompShiftVar(op, IROp::Shl, IROp::ShlImm); break; //sllv case 6: CompShiftVar(op, (fd == 1 ? IROp::Ror : IROp::Shr), (fd == 1 ? IROp::RorImm : IROp::ShrImm)); break; //srlv case 7: CompShiftVar(op, IROp::Sar, IROp::SarImm); break; //srav diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 16c7245b82dc..7d01d0b685da 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -72,22 +72,28 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) MIPSGPReg lhs = rs; MIPSGPReg rhs = rt; - if (!delaySlotIsNice) { - ir.Write(IROp::Mov, IRTEMP_0, rs); - ir.Write(IROp::Mov, IRTEMP_1, rt); - lhs = (MIPSGPReg)IRTEMP_0; - rhs = (MIPSGPReg)IRTEMP_1; + if (!delaySlotIsNice && !likely) { // if likely, we don't need this + if (rs != 0) { + ir.Write(IROp::Mov, IRTEMP_0, rs); + lhs = (MIPSGPReg)IRTEMP_0; + } + if (rt != 0) { + ir.Write(IROp::Mov, IRTEMP_1, rt); + rhs = (MIPSGPReg)IRTEMP_1; + } } if (!likely) CompileDelaySlot(); gpr.MapInIn(lhs, rhs); + FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs); // This makes the block "impure" :( if (likely) CompileDelaySlot(); + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; @@ -105,19 +111,25 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + if (!likely && delaySlotIsNice) CompileDelaySlot(); int lhs = rs; gpr.MapIn(rs); - if (!delaySlotIsNice) { + if (!delaySlotIsNice && !likely) { // if likely, we don't need this ir.Write(IROp::Mov, IRTEMP_0, rs); lhs = IRTEMP_0; } + if (andLink) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); + FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs); if (likely) { CompileDelaySlot(); } // Taken + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; } @@ -173,12 +185,15 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { if (!likely) CompileDelaySlot(); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + FlushAll(); // Not taken ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_0, 0); // Taken if (likely) CompileDelaySlot(); + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; } @@ -208,6 +223,8 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { ir.Write(IROp::VfpCondToReg, IRTEMP_0); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. @@ -223,12 +240,14 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); ir.Write(IROp::AndConst, IRTEMP_0, IRTEMP_0, ir.AddConstant(imm3)); + FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_0, 0); if (likely) CompileDelaySlot(); // Taken + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; } @@ -251,6 +270,8 @@ void IRJit::Comp_Jump(MIPSOpcode op) { u32 off = _IMM26 << 2; u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off; + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + // Might be a stubbed address or something? if (!Memory::IsValidAddress(targetAddr)) { if (js.nextExit == 0) { @@ -270,8 +291,6 @@ void IRJit::Comp_Jump(MIPSOpcode op) { break; case 3: //jal - if (ReplaceJalTo(targetAddr)) - return; gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); CompileDelaySlot(); FlushAll(); @@ -299,6 +318,8 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { if (andLink && rs == rd) delaySlotIsNice = false; + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int destReg; if (IsSyscall(delaySlotOp)) { gpr.MapDirty(rs); @@ -336,7 +357,7 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { break; } - ir.Write(IROp::ExitToReg, ir.AddConstant(js.downcountAmount), rs, 0); + ir.Write(IROp::ExitToReg, destReg, 0, 0); js.compiling = false; } @@ -354,8 +375,7 @@ void IRJit::Comp_Syscall(MIPSOpcode op) { js.compiling = false; } -void IRJit::Comp_Break(MIPSOpcode op) -{ +void IRJit::Comp_Break(MIPSOpcode op) { Comp_Generic(op); js.compiling = false; } diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 00a8ec63991c..86e8d126e7a0 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -82,7 +82,7 @@ void IRJit::Comp_FPUComp(MIPSOpcode op) { int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0) { // f, sf (signalling false) - gpr.SetImm(MIPS_REG_FPCOND, 0); + gpr.SetImm((MIPSGPReg)IRREG_FPCOND, 0); return; } diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index 53ea1f866fe9..fb0a143dd8a6 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -82,7 +82,8 @@ namespace MIPSComp { return; } - u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF; + gpr.MapIn(rs); + gpr.MapDirty(rt); int addrReg = IRTEMP_0; switch (o) { // Load diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index cfcbe4349747..1e0cdabf0bb2 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -1,8 +1,13 @@ #include "Core/MIPS/IR/IRInst.h" +#include "Core/MIPS/IR/IRPassSimplify.h" +#include "Core/MIPS/MIPSDebugInterface.h" +#include "Core/MIPS/MIPSTables.h" #include "Core/MemMap.h" +#include "Core/HLE/HLE.h" IRMeta meta[] = { - { IROp::SetConst, "SetConst", "GC" }, + { IROp::SetConst, "SetConst", "GC_" }, + { IROp::Mov, "Mov", "GG" }, { IROp::Add, "Add", "GGG" }, { IROp::Sub, "Sub", "GGG" }, { IROp::Neg, "Neg", "GG" }, @@ -23,9 +28,9 @@ IRMeta meta[] = { { IROp::ShrImm, "ShrImm", "GGI" }, { IROp::SarImm, "SarImm", "GGI" }, { IROp::RorImm, "RorImm", "GGI" }, - { IROp::Slt, "Slt","GGC" }, - { IROp::SltConst, "SltConst","GGC" }, - { IROp::SltU, "SltU", "GGC" }, + { IROp::Slt, "Slt", "GGG" }, + { IROp::SltConst, "SltConst", "GGC" }, + { IROp::SltU, "SltU", "GGG" }, { IROp::SltUConst, "SltUConst", "GGC" }, { IROp::Clz, "Clz", "GG" }, { IROp::MovZ, "MovZ", "GGG" }, @@ -37,6 +42,14 @@ IRMeta meta[] = { { IROp::Mul, "Mul", "_GG" }, { IROp::Ext8to32, "Ext8to32", "GG" }, { IROp::Ext16to32, "Ext16to32", "GG" }, + { IROp::Load8, "Load8", "GGC" }, + { IROp::Load8Ext, "Load8", "GGC" }, + { IROp::Load16, "Load16", "GGC" }, + { IROp::Load16Ext, "Load16Ext", "GGC" }, + { IROp::Load32, "Load32", "GGC" }, + { IROp::Store8, "Store8", "GGC" }, + { IROp::Store16, "Store16", "GGC" }, + { IROp::Store32, "Store32", "GGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, @@ -57,8 +70,16 @@ IRMeta meta[] = { { IROp::SetCtrlVFPU, "SetCtrlVFPU", "T" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, + { IROp::ExitToConst, "Exit", "C" }, + { IROp::ExitToConstIfEq, "ExitIfEq", "CGG" }, + { IROp::ExitToConstIfNeq, "ExitIfNeq", "CGG" }, + { IROp::ExitToConstIfGtZ, "ExitIfGtZ", "CG" }, + { IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG" }, + { IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG" }, + { IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG" }, + { IROp::ExitToReg, "ExitToReg", "G" }, { IROp::Syscall, "Syscall", "_C"}, - { IROp::SetPC, "SetPC", "_C"}, + { IROp::SetPC, "SetPC", "_G"}, }; const IRMeta *metaIndex[256]; @@ -82,9 +103,39 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::Sub: mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; break; + case IROp::And: + mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2]; + break; + case IROp::Or: + mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2]; + break; + case IROp::Xor: + mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2]; + break; + case IROp::Mov: + mips->r[inst->dest] = mips->r[inst->src1]; + break; + case IROp::AddConst: + mips->r[inst->dest] = mips->r[inst->src1] + constPool[inst->src2]; + break; + case IROp::SubConst: + mips->r[inst->dest] = mips->r[inst->src1] - constPool[inst->src2]; + break; + case IROp::AndConst: + mips->r[inst->dest] = mips->r[inst->src1] & constPool[inst->src2]; + break; + case IROp::OrConst: + mips->r[inst->dest] = mips->r[inst->src1] | constPool[inst->src2]; + break; + case IROp::XorConst: + mips->r[inst->dest] = mips->r[inst->src1] ^ constPool[inst->src2]; + break; case IROp::Neg: mips->r[inst->dest] = -(s32)mips->r[inst->src1]; break; + case IROp::Not: + mips->r[inst->dest] = ~mips->r[inst->src1]; + break; case IROp::Ext8to32: mips->r[inst->dest] = (s32)(s8)mips->r[inst->src1]; break; @@ -152,6 +203,22 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c } break; + case IROp::Slt: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; + break; + + case IROp::SltU: + mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2]; + break; + + case IROp::SltConst: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)constPool[inst->src2]; + break; + + case IROp::SltUConst: + mips->r[inst->dest] = mips->r[inst->src1] < constPool[inst->src2]; + break; + case IROp::MovZ: if (mips->r[inst->src1] == 0) mips->r[inst->dest] = mips->r[inst->src2]; @@ -208,10 +275,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; case IROp::ExitToConst: - return constPool[inst->src1]; + return constPool[inst->dest]; case IROp::ExitToReg: - return mips->r[inst->src1]; + return mips->r[inst->dest]; case IROp::ExitToConstIfEq: if (mips->r[inst->src1] == mips->r[inst->src2]) @@ -238,8 +305,28 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c return constPool[inst->dest]; break; + case IROp::Downcount: + mips->downcount -= (inst->src1) | ((inst->src2) << 8); + break; + case IROp::SetPC: - return mips->pc = mips->r[inst->src1]; + mips->pc = mips->r[inst->src1]; + break; + + case IROp::Syscall: + // SetPC was executed before. + { + MIPSOpcode op(constPool[inst->src1]); + CallSyscall(op); + return mips->pc; + } + + case IROp::Interpret: // SLOW fallback. Can be made faster. + { + MIPSOpcode op(constPool[inst->src1]); + MIPSInterpret(op); + break; + } default: Crash(); @@ -262,14 +349,13 @@ void IRWriter::Write(IROp op, u8 dst, u8 src1, u8 src2) { } void IRWriter::WriteSetConstant(u8 dst, u32 value) { - // TODO: Check for the fixed ones first. - Write(IROp::SetConstImm, AddConstant(value)); + Write(IROp::SetConst, dst, AddConstant(value)); } int IRWriter::AddConstant(u32 value) { for (size_t i = 0; i < constPool_.size(); i++) { if (constPool_[i] == value) - return i; + return (int)i; } constPool_.push_back(value); return (int)constPool_.size() - 1; @@ -281,10 +367,25 @@ int IRWriter::AddConstantFloat(float value) { return AddConstant(val); } +void IRWriter::Simplify() { + SimplifyInPlace(&insts_[0], insts_.size(), constPool_.data()); +} + +const char *GetGPRName(int r) { + if (r < 32) { + return currentDebugMIPS->GetRegName(0, r); + } + switch (r) { + case IRTEMP_0: return "irtemp0"; + case IRTEMP_1: return "irtemp1"; + default: return "(unk)"; + } +} + void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *constPool) { switch (type) { case 'G': - snprintf(buf, bufSize, "r%d", param); + snprintf(buf, bufSize, "%s", GetGPRName(param)); break; case 'F': snprintf(buf, bufSize, "r%d", param); @@ -292,6 +393,13 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'C': snprintf(buf, bufSize, "%08x", constPool[param]); break; + case 'I': + snprintf(buf, bufSize, "%02x", param); + break; + case '_': + case '\0': + buf[0] = 0; + break; default: snprintf(buf, bufSize, "?"); break; @@ -300,17 +408,21 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool) { const IRMeta *meta = metaIndex[(int)inst.op]; + if (!meta) { + snprintf(buf, bufsize, "Unknown %d", (int)inst.op); + return; + } char bufDst[16]; char bufSrc1[16]; char bufSrc2[16]; DisassembleParam(bufDst, sizeof(bufDst) - 2, inst.dest, meta->types[0], constPool); - DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.dest, meta->types[1], constPool); - DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.dest, meta->types[2], constPool); - if (meta->types[1]) { + DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.src1, meta->types[1], constPool); + DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.src2, meta->types[2], constPool); + if (meta->types[1] && meta->types[0] != '_') { strcat(bufDst, ", "); } - if (meta->types[2]) { + if (meta->types[2] && meta->types[1] != '_') { strcat(bufSrc1, ", "); } snprintf(buf, bufsize, "%s %s%s%s", meta->name, bufDst, bufSrc1, bufSrc2); -} \ No newline at end of file +} diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index a1aa75edc7cf..c3cb6021de50 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -17,7 +17,6 @@ enum class IROp : u8 { SetConst, - SetConstImm, FSetConst, Mov, @@ -202,6 +201,8 @@ enum { // Hacky way to get to other state IRREG_LO = 226, // offset of lo in MIPSState / 4 IRREG_HI = 227, + IRREG_FCR31 = 228, + IRREG_FPCOND = 229 }; enum class IRParam { @@ -249,6 +250,8 @@ class IRWriter { constPool_.clear(); } + void Simplify(); + const std::vector &GetInstructions() { return insts_; } const std::vector &GetConstants() { return constPool_; } @@ -258,3 +261,4 @@ class IRWriter { }; void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool); +void InitIR(); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index b16706b2d68e..7fae3255c34f 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -38,32 +38,18 @@ #include "Core/MIPS/IR/IRJit.h" #include "Core/MIPS/JitCommon/JitCommon.h" -void DisassembleArm64Print(const u8 *data, int size) { - std::vector lines = DisassembleArm64(data, size); - for (auto s : lines) { - ILOG("%s", s.c_str()); - } - /* - ILOG("+++"); - // A format friendly to Online Disassembler which gets endianness wrong - for (size_t i = 0; i < lines.size(); i++) { - uint32_t opcode = ((const uint32_t *)data)[i]; - ILOG("%d/%d: %08x", (int)(i+1), (int)lines.size(), swap32(opcode)); - } - ILOG("==="); - ILOG("===");*/ -} - namespace MIPSComp { IRJit::IRJit(MIPSState *mips) : gpr(), mips_(mips) { logBlocks = 0; dontLogBlocks = 0; - js.startDefaultPrefix = mips_->HasDefaultPrefix(); + js.startDefaultPrefix = true; js.currentRoundingFunc = convertS0ToSCRATCH1[0]; u32 size = 128 * 1024; blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); + logBlocks = 100; + InitIR(); } IRJit::~IRJit() { @@ -102,7 +88,8 @@ void IRJit::DoDummyState(PointerWrap &p) { } void IRJit::FlushAll() { - FlushPrefixV(); + gpr.FlushAll(); + // FlushPrefixV(); } void IRJit::FlushPrefixV() { @@ -162,6 +149,7 @@ void IRJit::Compile(u32 em_address) { int block_num = blocks_.AllocateBlock(em_address); IRBlock *b = blocks_.GetBlock(block_num); DoJit(em_address, b); + b->Finalize(block_num); // Overwrites the first instruction bool cleanSlate = false; @@ -192,7 +180,35 @@ void IRJit::Compile(u32 em_address) { void IRJit::RunLoopUntil(u64 globalticks) { PROFILE_THIS_SCOPE("jit"); - ((void (*)())enterDispatcher)(); + + // ApplyRoundingMode(true); + // IR Dispatcher + + while (true) { + // RestoreRoundingMode(true); + CoreTiming::Advance(); + // ApplyRoundingMode(true); + if (coreState != 0) { + break; + } + while (mips_->downcount >= 0) { + u32 inst = Memory::ReadUnchecked_U32(mips_->pc); + u32 opcode = inst >> 24; + u32 data = inst & 0xFFFFFF; + if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) { + IRBlock *block = blocks_.GetBlock(data); + ILOG("Run block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); + } else { + // RestoreRoundingMode(true); + ILOG("Compile block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + Compile(mips_->pc); + // ApplyRoundingMode(true); + } + } + } + + // RestoreRoundingMode(true); } u32 IRJit::GetCompilerPC() { @@ -230,24 +246,28 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { js.numInstructions++; } + ir.Simplify(); + b->SetInstructions(ir.GetInstructions(), ir.GetConstants()); - char temp[256]; if (logBlocks > 0 && dontLogBlocks == 0) { + char temp2[256]; ILOG("=============== mips %d ===============", blocks_.GetNumBlocks()); for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { - MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true); - ILOG("M: %08x %s", cpc, temp); + temp2[0] = 0; + MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); + ILOG("M: %08x %s", cpc, temp2); } } if (logBlocks > 0 && dontLogBlocks == 0) { ILOG("=============== IR (%d instructions) ===============", js.numInstructions); - for (int i = 0; i < js.numInstructions; i++) { + for (int i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); ILOG("%s", buf); } + ILOG("=============== end ================="); } if (logBlocks > 0) @@ -330,4 +350,15 @@ void IRBlockCache::InvalidateICache(u32 addess, u32 length) { // TODO } +void IRBlock::Finalize(int number) { + origFirstOpcode_= Memory::Read_Opcode_JIT(origAddr_); + MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number); + Memory::Write_Opcode_JIT(origAddr_, opcode); +} + +MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) { + IRBlock *b = blocks_.GetBlock(op.encoding & 0xFFFFFF); + return b->GetOriginalFirstOp(); +} + } // namespace MIPSComp \ No newline at end of file diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 686eefe6c274..440e96d28286 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -34,8 +34,18 @@ namespace MIPSComp { // TODO : Use arena allocators. For now let's just malloc. class IRBlock { public: - IRBlock() {} + IRBlock() : instr_(nullptr), const_(nullptr), numInstructions_(0), numConstants_(0), origAddr_(0) {} IRBlock(u32 emAddr) : instr_(nullptr), const_(nullptr), origAddr_(emAddr), numInstructions_(0) {} + IRBlock(IRBlock &&b) { + instr_ = b.instr_; + const_ = b.const_; + numInstructions_ = b.numInstructions_; + numConstants_ = b.numConstants_; + origAddr_ = b.origAddr_; + b.instr_ = nullptr; + b.const_ = nullptr; + } + ~IRBlock() { delete[] instr_; delete[] const_; @@ -50,12 +60,20 @@ class IRBlock { memcpy(const_, constants.data(), sizeof(u32) * constants.size()); } + const IRInst *GetInstructions() const { return instr_; } + const u32 *GetConstants() const { return const_; } + int GetNumInstructions() const { return numInstructions_; } + MIPSOpcode GetOriginalFirstOp() const { return origFirstOpcode_; } + + void Finalize(int number); + private: IRInst *instr_; u32 *const_; u16 numInstructions_; u16 numConstants_; u32 origAddr_; + MIPSOpcode origFirstOpcode_; }; class IRBlockCache { @@ -170,7 +188,8 @@ class IRJit : public JitInterface { int Replace_fabsf(); // Not using a regular block cache. - JitBlockCache *GetBlockCache() { return nullptr; } + JitBlockCache *GetBlockCache() override { return nullptr; } + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; void ClearCache(); void InvalidateCache(); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp new file mode 100644 index 000000000000..e110b7380874 --- /dev/null +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -0,0 +1,14 @@ +#include "Core/MIPS/IR/IRPassSimplify.h" + +void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool) { + for (int i = 0; i < count; i++) { + switch (inst[i].op) { + case IROp::AddConst: + if (constPool[inst[i].src2] == 0) + inst[i].op = IROp::Mov; + break; + default: + break; + } + } +} \ No newline at end of file diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h new file mode 100644 index 000000000000..c798d89f92b0 --- /dev/null +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -0,0 +1,5 @@ +#pragma once + +#include "Core/MIPS/IR/IRInst.h" + +void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index 7a31a463e4e5..808370ce6321 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -42,5 +42,7 @@ void IRRegCache::Start(IRWriter *ir) { } void IRRegCache::FlushAll() { - + for (int i = 0; i < TOTAL_MAPPABLE_MIPSREGS; i++) { + Dirty((MIPSGPReg)i); + } } diff --git a/Core/MIPS/JitCommon/JitCommon.h b/Core/MIPS/JitCommon/JitCommon.h index 9c440a6cdbcb..e27707ea3558 100644 --- a/Core/MIPS/JitCommon/JitCommon.h +++ b/Core/MIPS/JitCommon/JitCommon.h @@ -57,6 +57,7 @@ namespace MIPSComp { virtual void Compile(u32 em_address) = 0; virtual void ClearCache() = 0; virtual void EatPrefix() = 0; + virtual MIPSOpcode GetOriginalOp(MIPSOpcode op) = 0; // Block linking. This may need to work differently for whole-function JITs and stuff // like that. diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 3ada3ad123e3..c2c01a56f4d4 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -839,4 +839,14 @@ void Jit::CallProtectedFunction(const void *func, const OpArg &arg1, const u32 a void Jit::Comp_DoNothing(MIPSOpcode op) { } +MIPSOpcode Jit::GetOriginalOp(MIPSOpcode op) { + JitBlockCache *bc = GetBlockCache(); + int block_num = bc->GetBlockNumberFromEmuHackOp(op, true); + if (block_num >= 0) { + return bc->GetOriginalFirstOp(block_num); + } else { + return op; + } +} + } // namespace diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 68d160a9589f..a6f44443311a 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -161,6 +161,7 @@ class Jit : public Gen::XCodeBlock, public JitInterface { void UpdateRoundingMode(); JitBlockCache *GetBlockCache() { return &blocks; } + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; void ClearCache(); void InvalidateCache() override; diff --git a/Core/MemMap.cpp b/Core/MemMap.cpp index 557bc461c4a5..68e1f3e5a84f 100644 --- a/Core/MemMap.cpp +++ b/Core/MemMap.cpp @@ -479,13 +479,7 @@ Opcode Read_Opcode_JIT(u32 address) { Opcode inst = Opcode(Read_U32(address)); if (MIPS_IS_RUNBLOCK(inst.encoding) && MIPSComp::jit) { - JitBlockCache *bc = MIPSComp::jit->GetBlockCache(); - int block_num = bc->GetBlockNumberFromEmuHackOp(inst, true); - if (block_num >= 0) { - return bc->GetOriginalFirstOp(block_num); - } else { - return inst; - } + return MIPSComp::jit->GetOriginalOp(inst); } else { return inst; } From 12edfcea5aad7ff0509229154a2bfcb1d080ea5f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 7 May 2016 21:00:30 +0200 Subject: [PATCH 03/77] Enough to run cpu_alu.prx. --- Core/MIPS/IR/IRCompALU.cpp | 56 +++++++++++++++++++++++++++++-- Core/MIPS/IR/IRCompBranch.cpp | 22 +++++++----- Core/MIPS/IR/IRInst.cpp | 19 +++++++++-- Core/MIPS/IR/IRJit.cpp | 26 ++++++++++---- Core/MIPS/IR/IRJit.h | 7 +++- Core/MIPS/JitCommon/JitCommon.cpp | 18 +++++----- Core/MIPS/MIPSTables.cpp | 14 ++++++++ Core/MIPS/x86/Asm.cpp | 2 +- Core/MIPS/x86/Jit.cpp | 3 +- 9 files changed, 133 insertions(+), 34 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 69cf25de5604..7c360082c9ed 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -153,7 +153,7 @@ void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp co } return; } - + /* if (gpr.IsImm(rt) || (gpr.IsImm(rs) && symmetric)) { MIPSGPReg lhs = gpr.IsImm(rs) ? rt : rs; MIPSGPReg rhs = gpr.IsImm(rs) ? rs : rt; @@ -167,7 +167,7 @@ void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp co gpr.SetImm(rhs, rhsImm); } return; - } + }*/ // Can't do the RSB optimization on ARM64 - no RSB! @@ -343,7 +343,57 @@ void IRJit::Comp_ShiftType(MIPSOpcode op) { } void IRJit::Comp_Special3(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + + MIPSGPReg rs = _RS; + MIPSGPReg rt = _RT; + + int pos = _POS; + int size = _SIZE + 1; + u32 mask = 0xFFFFFFFFUL >> (32 - size); + + // Don't change $zr. + if (rt == 0) + return; + + switch (op & 0x3f) { + case 0x0: //ext + if (gpr.IsImm(rs)) { + gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask); + return; + } + + gpr.MapDirtyIn(rt, rs); + ir.Write(IROp::Shl, rt, rs); + ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(mask)); + break; + + case 0x4: //ins + { + u32 sourcemask = mask >> pos; + u32 destmask = ~(sourcemask << pos); + if (gpr.IsImm(rs)) { + u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos; + if (gpr.IsImm(rt)) { + gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted); + return; + } + + gpr.MapDirty(rt); + ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); + if (inserted != 0) { + ir.Write(IROp::OrConst, rt, rt, inserted); + } + } else { + gpr.MapDirtyIn(rt, rs); + ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); + ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); + ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); + ir.Write(IROp::Or, rt, rt, IRTEMP_0); + } + } + break; + } } void IRJit::Comp_Allegrex(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 7d01d0b685da..27fb5ae52d4e 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -72,12 +72,14 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) MIPSGPReg lhs = rs; MIPSGPReg rhs = rt; - if (!delaySlotIsNice && !likely) { // if likely, we don't need this + if (!delaySlotIsNice) { // if likely, we don't need this if (rs != 0) { + gpr.MapIn(rs); ir.Write(IROp::Mov, IRTEMP_0, rs); lhs = (MIPSGPReg)IRTEMP_0; } if (rt != 0) { + gpr.MapIn(rt); ir.Write(IROp::Mov, IRTEMP_1, rt); rhs = (MIPSGPReg)IRTEMP_1; } @@ -113,21 +115,22 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); - if (!likely && delaySlotIsNice) - CompileDelaySlot(); - int lhs = rs; - gpr.MapIn(rs); - if (!delaySlotIsNice && !likely) { // if likely, we don't need this + MIPSGPReg lhs = rs; + if (!delaySlotIsNice) { // if likely, we don't need this ir.Write(IROp::Mov, IRTEMP_0, rs); - lhs = IRTEMP_0; + lhs = (MIPSGPReg)IRTEMP_0; } if (andLink) gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); + + if (!likely) + CompileDelaySlot(); + + gpr.MapIn(lhs); FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs); - if (likely) { + if (likely) CompileDelaySlot(); - } // Taken FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); @@ -327,6 +330,7 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { if (andLink) gpr.SetImm(rd, GetCompilerPC() + 8); CompileDelaySlot(); + // Syscall (the delay slot) does FlushAll. return; // Syscall (delay slot) wrote exit code. } else if (delaySlotIsNice) { if (andLink) diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 1e0cdabf0bb2..fc4a07a2ec2d 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -170,13 +170,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; case IROp::ShlImm: - mips->r[inst->dest] = mips->r[inst->src1] << inst->src2; + mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; break; case IROp::ShrImm: - mips->r[inst->dest] = mips->r[inst->src1] >> inst->src2; + mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2; break; case IROp::SarImm: - mips->r[inst->dest] = (s32)mips->r[inst->src1] >> inst->src2; + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2; break; case IROp::RorImm: { @@ -203,6 +203,19 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c } break; + case IROp::Clz: + { + int x = 31; + int count = 0; + int value = mips->r[inst->src1]; + while (x >= 0 && !(value & (1 << x))) { + count++; + x--; + } + mips->r[inst->dest] = count; + break; + } + case IROp::Slt: mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; break; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 7fae3255c34f..d99cebfc3ec8 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -48,7 +48,7 @@ IRJit::IRJit(MIPSState *mips) : gpr(), mips_(mips) { js.currentRoundingFunc = convertS0ToSCRATCH1[0]; u32 size = 128 * 1024; blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); - logBlocks = 100; + logBlocks = 0; InitIR(); } @@ -184,6 +184,12 @@ void IRJit::RunLoopUntil(u64 globalticks) { // ApplyRoundingMode(true); // IR Dispatcher + FILE *f; + int numBlocks = 0; + if (numBlocks) { + f = fopen("E:\\blockir.txt", "w"); + } + while (true) { // RestoreRoundingMode(true); CoreTiming::Advance(); @@ -197,11 +203,18 @@ void IRJit::RunLoopUntil(u64 globalticks) { u32 data = inst & 0xFFFFFF; if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) { IRBlock *block = blocks_.GetBlock(data); - ILOG("Run block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + if (numBlocks > 0) { + // ILOG("Run block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + fprintf(f, "BLOCK : %08x v0: %08x v1: %08x a0: %08x s0: %08x s4: %08x\n", mips_->pc, mips_->r[MIPS_REG_V0], mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0], mips_->r[MIPS_REG_S0], mips_->r[MIPS_REG_S4]); + fflush(f); + numBlocks--; + } mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); } else { + if (mips_->pc == 0x0880de94) + logBlocks = 10; // RestoreRoundingMode(true); - ILOG("Compile block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + // ILOG("Compile block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); Compile(mips_->pc); // ApplyRoundingMode(true); } @@ -252,7 +265,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { if (logBlocks > 0 && dontLogBlocks == 0) { char temp2[256]; - ILOG("=============== mips %d ===============", blocks_.GetNumBlocks()); + ILOG("=============== mips %d %08x ===============", blocks_.GetNumBlocks(), em_address); for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { temp2[0] = 0; MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); @@ -304,7 +317,8 @@ void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { } void IRJit::Comp_Generic(MIPSOpcode op) { - ir.Write(IROp::Interpret, ir.AddConstant(op.encoding)); + FlushAll(); + ir.Write(IROp::Interpret, 0, ir.AddConstant(op.encoding)); const MIPSInfo info = MIPSGetInfo(op); if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) { // If it does eat them, it'll happen in MIPSCompileOp(). @@ -351,7 +365,7 @@ void IRBlockCache::InvalidateICache(u32 addess, u32 length) { } void IRBlock::Finalize(int number) { - origFirstOpcode_= Memory::Read_Opcode_JIT(origAddr_); + origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_); MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number); Memory::Write_Opcode_JIT(origAddr_, opcode); } diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 440e96d28286..63badb5ce2cf 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -42,6 +42,7 @@ class IRBlock { numInstructions_ = b.numInstructions_; numConstants_ = b.numConstants_; origAddr_ = b.origAddr_; + origFirstOpcode_ = b.origFirstOpcode_; b.instr_ = nullptr; b.const_ = nullptr; } @@ -86,7 +87,11 @@ class IRBlockCache { return (int)blocks_.size() - 1; } IRBlock *GetBlock(int i) { - return &blocks_[i]; + if (i >= 0 && i < blocks_.size()) { + return &blocks_[i]; + } else { + return nullptr; + } } private: std::vector blocks_; diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index 0b7fc1932244..630494f97329 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -47,21 +47,21 @@ namespace MIPSComp { } JitInterface *CreateNativeJit(MIPSState *mips) { - if (false && g_Config.iCpuCore == (int)CPUCore::CPU_JIT) { +#if 1 + return new MIPSComp::IRJit(mips); +#else #if defined(ARM) - return new MIPSComp::ArmJit(mips); + return new MIPSComp::ArmJit(mips); #elif defined(ARM64) - return new MIPSComp::IRJit(mips); + return new MIPSComp::IRJit(mips); #elif defined(_M_IX86) || defined(_M_X64) - return new MIPSComp::Jit(mips); + return new MIPSComp::Jit(mips); #elif defined(MIPS) - return new MIPSComp::MipsJit(mips); + return new MIPSComp::MipsJit(mips); #else - return new MIPSComp::FakeJit(mips); + return new MIPSComp::FakeJit(mips); +#endif #endif - } else if (true || g_Config.iCpuCore == (int)CPUCore::CPU_IRJIT) { - return new MIPSComp::IRJit(mips); - } } } diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index 39038df6ae9e..c5ef44bc0d04 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -28,6 +28,7 @@ #include "Core/CoreTiming.h" #include "Core/Reporting.h" #include "Core/Debugger/Breakpoints.h" +#include "base/logging.h" #include "JitCommon/JitCommon.h" @@ -973,10 +974,13 @@ void MIPSInterpret(MIPSOpcode op) { int MIPSInterpret_RunUntil(u64 globalTicks) { + int blockCount = 150000; + FILE *f = fopen("E:\\blockjit.txt", "w"); MIPSState *curMips = currentMIPS; while (coreState == CORE_RUNNING) { CoreTiming::Advance(); + u32 lastPC = 0; // NEVER stop in a delay slot! while (curMips->downcount >= 0 && coreState == CORE_RUNNING) @@ -1015,6 +1019,16 @@ int MIPSInterpret_RunUntil(u64 globalTicks) bool wasInDelaySlot = curMips->inDelaySlot; + if (curMips->pc != lastPC + 4) { + if (blockCount > 0) { + MIPSState *mips_ = curMips; + fprintf(f, "BLOCK : %08x v0: %08x v1: %08x a0: %08x s0: %08x s4: %08x\n", mips_->pc, mips_->r[MIPS_REG_V0], mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0], mips_->r[MIPS_REG_S0], mips_->r[MIPS_REG_S4]); + fflush(f); + blockCount--; + } + } + lastPC = curMips->pc; + MIPSInterpret(op); if (curMips->inDelaySlot) diff --git a/Core/MIPS/x86/Asm.cpp b/Core/MIPS/x86/Asm.cpp index 6d03ba5f6fbc..86dfc1d7fb1f 100644 --- a/Core/MIPS/x86/Asm.cpp +++ b/Core/MIPS/x86/Asm.cpp @@ -40,7 +40,7 @@ namespace MIPSComp //TODO - make an option //#if _DEBUG -static bool enableDebug = false; + static bool enableDebug = true; //#else // bool enableDebug = false; diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index c2c01a56f4d4..4bfce6814e85 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -81,8 +81,7 @@ u32 JitBreakpoint() host->SetDebugMode(true); // There's probably a better place for this. - if (USE_JIT_MISSMAP) - { + if (USE_JIT_MISSMAP) { std::map notJitSorted; std::transform(notJitOps.begin(), notJitOps.end(), std::inserter(notJitSorted, notJitSorted.begin()), flip_pair); From 38b7d89dfbbf49a036a1ee4536e52085bd4265e0 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 7 May 2016 21:34:27 +0200 Subject: [PATCH 04/77] Fix a performance issue in CallSyscall --- Core/HLE/HLE.cpp | 7 ++++--- Core/HLE/ReplaceTables.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Core/HLE/HLE.cpp b/Core/HLE/HLE.cpp index a4a448f574d7..2083b1ba24e8 100644 --- a/Core/HLE/HLE.cpp +++ b/Core/HLE/HLE.cpp @@ -70,6 +70,7 @@ static int delayedResultEvent = -1; static int hleAfterSyscall = HLE_AFTER_NOTHING; static const char *hleAfterSyscallReschedReason; static const HLEFunction *latestSyscall = nullptr; +static int idleOp; void hleDelayResultFinish(u64 userdata, int cycleslate) { @@ -93,6 +94,7 @@ void HLEInit() { RegisterAllModules(); delayedResultEvent = CoreTiming::RegisterEvent("HLEDelayedResult", hleDelayResultFinish); + idleOp = GetSyscallOp("FakeSysCalls", NID_IDLE); } void HLEDoState(PointerWrap &p) @@ -540,9 +542,8 @@ void CallSyscall(MIPSOpcode op) return; } - if (info->func) - { - if (op == GetSyscallOp("FakeSysCalls", NID_IDLE)) + if (info->func) { + if (op == idleOp) info->func(); else if (info->flags != 0) CallSyscallWithFlags(info); diff --git a/Core/HLE/ReplaceTables.h b/Core/HLE/ReplaceTables.h index 84f85b7eea35..94ee26d69ea1 100644 --- a/Core/HLE/ReplaceTables.h +++ b/Core/HLE/ReplaceTables.h @@ -33,6 +33,8 @@ #pragma once +#include + #include "Common/CommonTypes.h" #include "Core/MIPS/JitCommon/JitCommon.h" From 3c5510e5a36be46800dcdb7a8417f90007231368 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 7 May 2016 21:35:12 +0200 Subject: [PATCH 05/77] Disable debug file logging, fix issue with replacement functions, etc --- Core/MIPS/IR/IRCompFPU.cpp | 2 +- Core/MIPS/IR/IRInst.cpp | 64 ++++++++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRInst.h | 1 + Core/MIPS/IR/IRJit.cpp | 28 ++++++++++++++++- Core/MIPS/MIPSTables.cpp | 5 ++- Core/MemMap.cpp | 28 +++++++---------- 6 files changed, 107 insertions(+), 21 deletions(-) diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 86e8d126e7a0..8d9d7b2aa5f2 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -77,7 +77,7 @@ void IRJit::Comp_FPULS(MIPSOpcode op) { } void IRJit::Comp_FPUComp(MIPSOpcode op) { - CONDITIONAL_DISABLE; + DISABLE; int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index fc4a07a2ec2d..4876ce697da1 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -4,6 +4,9 @@ #include "Core/MIPS/MIPSTables.h" #include "Core/MemMap.h" #include "Core/HLE/HLE.h" +#include "Core/HLE/ReplaceTables.h" + +#include "math/math_util.h" IRMeta meta[] = { { IROp::SetConst, "SetConst", "GC_" }, @@ -286,6 +289,58 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::FpCondToReg: mips->r[inst->dest] = mips->fpcond; break; + case IROp::FRound: + mips->r[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); + break; + case IROp::FTrunc: + { + float src = mips->f[inst->src1]; + if (src >= 0.0f) { + mips->fs[inst->dest] = (int)floorf(src); + // Overflow, but it was positive. + if (mips->fs[inst->dest] == -2147483648LL) { + mips->fs[inst->dest] = 2147483647LL; + } + } else { + // Overflow happens to be the right value anyway. + mips->fs[inst->dest] = (int)ceilf(src); + } + break; + } + case IROp::FCeil: + mips->r[inst->dest] = (int)ceilf(mips->f[inst->src1]); + break; + case IROp::FFloor: + mips->r[inst->dest] = (int)floorf(mips->f[inst->src1]); + break; + + case IROp::FCvtSW: + mips->f[inst->dest] = (float)mips->fs[inst->src1]; + break; + case IROp::FCvtWS: + { + float src = mips->f[inst->src1]; + if (my_isnanorinf(src)) + { + mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL; + break; + } + switch (mips->fcr31 & 3) + { + case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break; // RINT_0 + case 1: mips->fs[inst->dest] = (int)src; break; // CAST_1 + case 2: mips->fs[inst->dest] = (int)ceilf(src); break; // CEIL_2 + case 3: mips->fs[inst->dest] = (int)floorf(src); break; // FLOOR_3 + } + break; //cvt.w.s + } + + case IROp::FMovFromGPR: + memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); + break; + case IROp::FMovToGPR: + memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); + break; case IROp::ExitToConst: return constPool[inst->dest]; @@ -341,6 +396,15 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } + case IROp::CallReplacement: + { + int funcIndex = constPool[inst->src1]; + const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); + int cycles = f->replaceFunc(); + mips->downcount -= cycles; + break; + } + default: Crash(); } diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index c3cb6021de50..988601993c7e 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -152,6 +152,7 @@ enum class IROp : u8 { Syscall, SetPC, // hack to make syscall returns work + CallReplacement, Break, }; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index d99cebfc3ec8..6e1317cdc8f3 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -313,7 +313,33 @@ bool IRJit::ReplaceJalTo(u32 dest) { } void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { - Crash(); + int index = op.encoding & MIPS_EMUHACK_VALUE_MASK; + + const ReplacementTableEntry *entry = GetReplacementFunc(index); + if (!entry) { + ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding); + return; + } + + if (entry->flags & REPFLAG_DISABLED) { + MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); + } else if (entry->replaceFunc) { + FlushAll(); + RestoreRoundingMode(); + ir.Write(IROp::SetPC, 0, ir.AddConstant(GetCompilerPC())); + ir.Write(IROp::CallReplacement, 0, ir.AddConstant(index)); + + if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { + // Compile the original instruction at this address. We ignore cycles for hooks. + ApplyRoundingMode(); + MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); + } else { + ApplyRoundingMode(); + js.compiling = false; + } + } else { + ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name); + } } void IRJit::Comp_Generic(MIPSOpcode op) { diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index c5ef44bc0d04..f0b51db0c0df 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -974,8 +974,6 @@ void MIPSInterpret(MIPSOpcode op) { int MIPSInterpret_RunUntil(u64 globalTicks) { - int blockCount = 150000; - FILE *f = fopen("E:\\blockjit.txt", "w"); MIPSState *curMips = currentMIPS; while (coreState == CORE_RUNNING) { @@ -1019,6 +1017,7 @@ int MIPSInterpret_RunUntil(u64 globalTicks) bool wasInDelaySlot = curMips->inDelaySlot; + /* if (curMips->pc != lastPC + 4) { if (blockCount > 0) { MIPSState *mips_ = curMips; @@ -1028,7 +1027,7 @@ int MIPSInterpret_RunUntil(u64 globalTicks) } } lastPC = curMips->pc; - + */ MIPSInterpret(op); if (curMips->inDelaySlot) diff --git a/Core/MemMap.cpp b/Core/MemMap.cpp index 68e1f3e5a84f..0d3b3817ccdc 100644 --- a/Core/MemMap.cpp +++ b/Core/MemMap.cpp @@ -426,26 +426,22 @@ __forceinline static Opcode Read_Instruction(u32 address, bool resolveReplacemen if (MIPS_IS_RUNBLOCK(inst.encoding) && MIPSComp::jit) { JitBlockCache *bc = MIPSComp::jit->GetBlockCache(); - int block_num = bc->GetBlockNumberFromEmuHackOp(inst, true); - if (block_num >= 0) { - inst = bc->GetOriginalFirstOp(block_num); - if (resolveReplacements && MIPS_IS_REPLACEMENT(inst)) { - u32 op; - if (GetReplacedOpAt(address, &op)) { - if (MIPS_IS_EMUHACK(op)) { - ERROR_LOG(HLE,"WTF 1"); - return Opcode(op); - } else { - return Opcode(op); - } + + inst = MIPSComp::jit->GetOriginalOp(inst); + if (resolveReplacements && MIPS_IS_REPLACEMENT(inst)) { + u32 op; + if (GetReplacedOpAt(address, &op)) { + if (MIPS_IS_EMUHACK(op)) { + ERROR_LOG(HLE,"WTF 1"); + return Opcode(op); } else { - ERROR_LOG(HLE, "Replacement, but no replacement op? %08x", inst.encoding); + return Opcode(op); } + } else { + ERROR_LOG(HLE, "Replacement, but no replacement op? %08x", inst.encoding); } - return inst; - } else { - return inst; } + return inst; } else if (resolveReplacements && MIPS_IS_REPLACEMENT(inst.encoding)) { u32 op; if (GetReplacedOpAt(address, &op)) { From 750d520cc7361e943636a4128274ccdade0fac60 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 7 May 2016 22:27:58 +0200 Subject: [PATCH 06/77] Initial work on mult and friends. Fix another bug. --- Core/MIPS/IR/IRCompALU.cpp | 85 +++++++++++++++++++++++++++-------- Core/MIPS/IR/IRCompBranch.cpp | 2 +- Core/MIPS/IR/IRInst.cpp | 60 ++++++++++++++++++++++--- Core/MIPS/IR/IRInst.h | 13 ++++-- Core/MIPS/IR/IRJit.cpp | 3 +- Core/MIPS/IR/IRPassSimplify.h | 1 + 6 files changed, 133 insertions(+), 31 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 7c360082c9ed..7a55c9f8c3d4 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -153,21 +153,6 @@ void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp co } return; } - /* - if (gpr.IsImm(rt) || (gpr.IsImm(rs) && symmetric)) { - MIPSGPReg lhs = gpr.IsImm(rs) ? rt : rs; - MIPSGPReg rhs = gpr.IsImm(rs) ? rs : rt; - u32 rhsImm = gpr.GetImm(rhs); - gpr.MapDirtyIn(rd, lhs); - ir.Write(constOp, rd, lhs, ir.AddConstant(rhsImm)); - // If rd is rhs, we may have lost it in the MapDirtyIn(). lhs was kept. - // This means the rhsImm value was never flushed to rhs, and would be garbage. - if (rd == rhs) { - // Luckily, it was just an imm. - gpr.SetImm(rhs, rhsImm); - } - return; - }*/ // Can't do the RSB optimization on ARM64 - no RSB! @@ -463,15 +448,77 @@ void IRJit::Comp_Allegrex2(MIPSOpcode op) { void IRJit::Comp_MulDivType(MIPSOpcode op) { CONDITIONAL_DISABLE; + DISABLE; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; MIPSGPReg rd = _RD; - // Note that in all cases below, LO is actually mapped to HI:LO. - // That is, the host reg is 64 bits and has HI at the top. - // HI is not mappable. + switch (op & 63) { + case 16: // R(rd) = HI; //mfhi + if (rd != MIPS_REG_ZERO) { + gpr.MapDirty(rd); + ir.Write(IROp::MfHi, rd); + } + break; + + case 17: // HI = R(rs); //mthi + gpr.MapIn(rs); + ir.Write(IROp::MtHi, 0, rs); + break; - DISABLE; + case 18: // R(rd) = LO; break; //mflo + if (rd != MIPS_REG_ZERO) { + gpr.MapDirty(rd); + ir.Write(IROp::MfLo, rd); + } + break; + + case 19: // LO = R(rs); break; //mtlo + gpr.MapIn(rs); + ir.Write(IROp::MtLo, 0, rs); + break; + + case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt) + ir.Write(IROp::Mult, 0, rs, rt); + break; + + case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt) + ir.Write(IROp::MultU, 0, rs, rt); + break; + + case 26: //div + DISABLE; + ir.Write(IROp::Div, 0, rs, rt); + break; + + case 27: //divu + DISABLE; + ir.Write(IROp::DivU, 0, rs, rt); + break; + + case 28: //madd + DISABLE; + ir.Write(IROp::Madd, 0, rs, rt); + break; + + case 29: //maddu + DISABLE; + ir.Write(IROp::MaddU, 0, rs, rt); + break; + + case 46: // msub + DISABLE; + ir.Write(IROp::Msub, 0, rs, rt); + break; + + case 47: // msubu + DISABLE; + ir.Write(IROp::MsubU, 0, rs, rt); + break; + + default: + DISABLE; + } } } diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 27fb5ae52d4e..7a4601838f4a 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -380,7 +380,7 @@ void IRJit::Comp_Syscall(MIPSOpcode op) { } void IRJit::Comp_Break(MIPSOpcode op) { - Comp_Generic(op); + ir.Write(IROp::Break); js.compiling = false; } diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 4876ce697da1..460bca7c9052 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -8,7 +8,7 @@ #include "math/math_util.h" -IRMeta meta[] = { +static const IRMeta irMeta[] = { { IROp::SetConst, "SetConst", "GC_" }, { IROp::Mov, "Mov", "GG" }, { IROp::Add, "Add", "GGG" }, @@ -42,7 +42,18 @@ IRMeta meta[] = { { IROp::Min, "Min", "GGG" }, { IROp::BSwap16, "BSwap16", "GG" }, { IROp::BSwap32, "BSwap32", "GG" }, - { IROp::Mul, "Mul", "_GG" }, + { IROp::Mult, "Mult", "_GG" }, + { IROp::MultU, "MultU", "_GG" }, + { IROp::Madd, "Madd", "_GG" }, + { IROp::MaddU, "MaddU", "_GG" }, + { IROp::Msub, "Msub", "_GG" }, + { IROp::MsubU, "MsubU", "_GG" }, + { IROp::Div, "Div", "_GG" }, + { IROp::DivU, "DivU", "_GG" }, + { IROp::MtLo, "MtLo", "_G" }, + { IROp::MtHi, "MtHi", "_G" }, + { IROp::MfLo, "MfLo", "G" }, + { IROp::MfHi, "MfHi", "G" }, { IROp::Ext8to32, "Ext8to32", "GG" }, { IROp::Ext16to32, "Ext16to32", "GG" }, { IROp::Load8, "Load8", "GGC" }, @@ -81,15 +92,16 @@ IRMeta meta[] = { { IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG" }, { IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG" }, { IROp::ExitToReg, "ExitToReg", "G" }, - { IROp::Syscall, "Syscall", "_C"}, + { IROp::Syscall, "Syscall", "_C" }, + { IROp::Break, "Break", ""}, { IROp::SetPC, "SetPC", "_G"}, }; const IRMeta *metaIndex[256]; void InitIR() { - for (size_t i = 0; i < ARRAY_SIZE(meta); i++) { - metaIndex[(int)meta[i].op] = &meta[i]; + for (size_t i = 0; i < ARRAY_SIZE(irMeta); i++) { + metaIndex[(int)irMeta[i].op] = &irMeta[i]; } } @@ -251,6 +263,32 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; break; + case IROp::MtLo: + mips->lo = mips->r[inst->src1]; + break; + case IROp::MtHi: + mips->hi = mips->r[inst->src1]; + break; + case IROp::MfLo: + mips->r[inst->dest] = mips->lo; + break; + case IROp::MfHi: + mips->r[inst->dest] = mips->hi; + break; + + case IROp::Mult: + { + s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; + memcpy(&mips->lo, &result, 8); + break; + } + case IROp::MultU: + { + u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; + memcpy(&mips->lo, &result, 8); + break; + } + case IROp::BSwap16: { u32 x = mips->r[inst->src1]; @@ -381,6 +419,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c mips->pc = mips->r[inst->src1]; break; + case IROp::SetPCConst: + mips->pc = constPool[inst->src1]; + break; + case IROp::Syscall: // SetPC was executed before. { @@ -402,9 +444,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); int cycles = f->replaceFunc(); mips->downcount -= cycles; - break; + return mips->r[MIPS_REG_RA]; } + case IROp::Break: + Crash(); + break; + default: Crash(); } @@ -412,7 +458,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c } // If we got here, the block was badly constructed. - // Crash(); + Crash(); return 0; } diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 988601993c7e..d3fecabe69b1 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -66,13 +66,19 @@ enum class IROp : u8 { BSwap16, // Swaps both the high and low byte pairs. BSwap32, - // Hi/Lo semantics preserved. - Mul, - MulU, + // Weird Hi/Lo semantics preserved. Too annoying to do something more generic. + MtLo, + MtHi, + MfLo, + MfHi, + Mult, + MultU, Madd, MaddU, Msub, MsubU, + Div, + DivU, // These take a constant from the pool as an offset. // Loads from a constant address can be represented by using r0. @@ -152,6 +158,7 @@ enum class IROp : u8 { Syscall, SetPC, // hack to make syscall returns work + SetPCConst, // hack to make replacement know PC CallReplacement, Break, }; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 6e1317cdc8f3..2f88b7740f1c 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -273,6 +273,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { } } + if (logBlocks > 0 && dontLogBlocks == 0) { ILOG("=============== IR (%d instructions) ===============", js.numInstructions); for (int i = 0; i < ir.GetInstructions().size(); i++) { @@ -326,7 +327,7 @@ void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { } else if (entry->replaceFunc) { FlushAll(); RestoreRoundingMode(); - ir.Write(IROp::SetPC, 0, ir.AddConstant(GetCompilerPC())); + ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC())); ir.Write(IROp::CallReplacement, 0, ir.AddConstant(index)); if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index c798d89f92b0..8706661704b6 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -2,4 +2,5 @@ #include "Core/MIPS/IR/IRInst.h" +// Dumb example of a simplification pass that can't add or remove instructions. void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); From a33f8b68c62bd483d8a8c018398d9be9262313d3 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 7 May 2016 23:12:53 +0200 Subject: [PATCH 07/77] ir-jit: Get rid of the regcache. Should be replaced with optimization passes. --- Core/MIPS/IR/IRCompALU.cpp | 197 ++++--------------------------- Core/MIPS/IR/IRCompBranch.cpp | 18 +-- Core/MIPS/IR/IRCompFPU.cpp | 11 +- Core/MIPS/IR/IRCompLoadStore.cpp | 2 - Core/MIPS/IR/IRInst.cpp | 7 ++ Core/MIPS/IR/IRInst.h | 1 + Core/MIPS/IR/IRJit.cpp | 9 +- Core/MIPS/IR/IRJit.h | 11 +- Core/MIPS/IR/IRPassSimplify.cpp | 4 + 9 files changed, 55 insertions(+), 205 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 7a55c9f8c3d4..078e296c6c9b 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -20,7 +20,6 @@ #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSCodeUtils.h" #include "Core/MIPS/IR/IRJit.h" -#include "Core/MIPS/IR/IRRegCache.h" #include "Common/CPUDetect.h" using namespace MIPSAnalyst; @@ -46,21 +45,6 @@ using namespace MIPSAnalyst; namespace MIPSComp { -void IRJit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, IROp OP) { - if (gpr.IsImm(rs)) { - switch (OP) { - case IROp::AddConst: gpr.SetImm(rt, gpr.GetImm(rs) + uimm); break; - case IROp::SubConst: gpr.SetImm(rt, gpr.GetImm(rs) - uimm); break; - case IROp::AndConst: gpr.SetImm(rt, gpr.GetImm(rs) & uimm); break; - case IROp::OrConst: gpr.SetImm(rt, gpr.GetImm(rs) | uimm); break; - case IROp::XorConst: gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm); break; - } - } else { - gpr.MapDirtyIn(rt, rs); - ir.Write(OP, rt, rs, ir.AddConstant(uimm)); - } -} - void IRJit::Comp_IType(MIPSOpcode op) { CONDITIONAL_DISABLE; s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension @@ -79,36 +63,26 @@ void IRJit::Comp_IType(MIPSOpcode op) { case 9: // R(rt) = R(rs) + simm; break; //addiu // Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others. if (simm >= 0) { - CompImmLogic(rs, rt, simm, IROp::AddConst); + ir.Write(IROp::AddConst, rt, rs, ir.AddConstant(simm)); } else if (simm < 0) { - CompImmLogic(rs, rt, -simm, IROp::SubConst); + ir.Write(IROp::SubConst, rt, rs, ir.AddConstant(-simm)); } break; - case 12: CompImmLogic(rs, rt, uimm, IROp::AndConst); break; - case 13: CompImmLogic(rs, rt, uimm, IROp::OrConst); break; - case 14: CompImmLogic(rs, rt, uimm, IROp::XorConst); break; + case 12: ir.Write(IROp::AndConst, rt, rs, ir.AddConstant(uimm)); break; + case 13: ir.Write(IROp::OrConst, rt, rs, ir.AddConstant(uimm)); break; + case 14: ir.Write(IROp::XorConst, rt, rs, ir.AddConstant(uimm)); break; case 10: // R(rt) = (s32)R(rs) < simm; break; //slti - if (gpr.IsImm(rs)) { - gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm ? 1 : 0); - break; - } - gpr.MapDirtyIn(rt, rs); ir.Write(IROp::SltConst, rt, rs, ir.AddConstant(simm)); break; case 11: // R(rt) = R(rs) < suimm; break; //sltiu - if (gpr.IsImm(rs)) { - gpr.SetImm(rt, gpr.GetImm(rs) < suimm ? 1 : 0); - break; - } - gpr.MapDirtyIn(rt, rs); ir.Write(IROp::SltUConst, rt, rs, ir.AddConstant(suimm)); break; case 15: // R(rt) = uimm << 16; //lui - gpr.SetImm(rt, uimm << 16); + ir.WriteSetConstant(rt, uimm << 16); break; default: @@ -129,11 +103,9 @@ void IRJit::Comp_RType2(MIPSOpcode op) { switch (op & 63) { case 22: //clz - gpr.MapDirtyIn(rd, rs); ir.Write(IROp::Clz, rd, rs); break; case 23: //clo - gpr.MapDirtyIn(rd, rs); ir.Write(IROp::Not, IRTEMP_0, rs); ir.Write(IROp::Clz, rd, IRTEMP_0); break; @@ -142,25 +114,6 @@ void IRJit::Comp_RType2(MIPSOpcode op) { } } -void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp constOp, bool symmetric) { - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { - switch (op) { - case IROp::Add: gpr.SetImm(rd, gpr.GetImm(rs) + gpr.GetImm(rt)); break; - case IROp::Sub: gpr.SetImm(rd, gpr.GetImm(rs) - gpr.GetImm(rt)); break; - case IROp::And: gpr.SetImm(rd, gpr.GetImm(rs) & gpr.GetImm(rt)); break; - case IROp::Or: gpr.SetImm(rd, gpr.GetImm(rs) | gpr.GetImm(rt)); break; - case IROp::Xor: gpr.SetImm(rd, gpr.GetImm(rs) ^ gpr.GetImm(rt)); break; - } - return; - } - - // Can't do the RSB optimization on ARM64 - no RSB! - - // Generic solution. If it's an imm, better to flush at this point. - gpr.MapDirtyInIn(rd, rs, rt); - ir.Write(op, rd, rs, rt); -} - void IRJit::Comp_RType3(MIPSOpcode op) { CONDITIONAL_DISABLE; @@ -174,83 +127,56 @@ void IRJit::Comp_RType3(MIPSOpcode op) { switch (op & 63) { case 10: //if (!R(rt)) R(rd) = R(rs); break; //movz - gpr.MapDirtyInIn(rd, rt, rs); ir.Write(IROp::MovZ, rd, rt, rs); break; case 11:// if (R(rt)) R(rd) = R(rs); break; //movn - gpr.MapDirtyInIn(rd, rt, rs); ir.Write(IROp::MovNZ, rd, rt, rs); break; case 32: //R(rd) = R(rs) + R(rt); break; //add case 33: //R(rd) = R(rs) + R(rt); break; //addu - CompType3(rd, rs, rt, IROp::Add, IROp::AddConst, true); + ir.Write(IROp::Add, rd, rs, rt); break; case 34: //R(rd) = R(rs) - R(rt); break; //sub case 35: //R(rd) = R(rs) - R(rt); break; //subu - CompType3(rd, rs, rt, IROp::Sub, IROp::SubConst, false); + ir.Write(IROp::Sub, rd, rs, rt); break; case 36: //R(rd) = R(rs) & R(rt); break; //and - CompType3(rd, rs, rt, IROp::And, IROp::AndConst, true); + ir.Write(IROp::And, rd, rs, rt); break; case 37: //R(rd) = R(rs) | R(rt); break; //or - CompType3(rd, rs, rt, IROp::Or, IROp::OrConst, true); + ir.Write(IROp::Or, rd, rs, rt); break; case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor - CompType3(rd, rs, rt, IROp::Xor, IROp::XorConst, true); + ir.Write(IROp::Xor, rd, rs, rt); break; case 39: // R(rd) = ~(R(rs) | R(rt)); break; //nor - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { - gpr.SetImm(rd, ~(gpr.GetImm(rs) | gpr.GetImm(rt))); + if (rs == 0) { + ir.Write(IROp::Not, rd, rt); + } else if (rt == 0) { + ir.Write(IROp::Not, rd, rs); } else { - gpr.MapDirtyInIn(rd, rs, rt); - if (rs == 0) { - ir.Write(IROp::Not, rd, rt); - } else if (rt == 0) { - ir.Write(IROp::Not, rd, rs); - } else { - ir.Write(IROp::Or, IRTEMP_0, rs, rt); - ir.Write(IROp::Not, rd, IRTEMP_0); - } + ir.Write(IROp::Or, IRTEMP_0, rs, rt); + ir.Write(IROp::Not, rd, IRTEMP_0); } break; case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { - gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt)); - } else { - gpr.MapDirtyInIn(rd, rt, rs); - ir.Write(IROp::Slt, rd, rs, rt); - } + ir.Write(IROp::Slt, rd, rs, rt); break; case 43: //R(rd) = R(rs) < R(rt); break; //sltu - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { - gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt)); - } else { - gpr.MapDirtyInIn(rd, rt, rs); - ir.Write(IROp::SltU, rd, rs, rt); - } + ir.Write(IROp::SltU, rd, rs, rt); break; case 44: //R(rd) = max(R(rs), R(rt); break; //max - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { - gpr.SetImm(rd, std::max(gpr.GetImm(rs), gpr.GetImm(rt))); - break; - } - gpr.MapDirtyInIn(rd, rs, rt); ir.Write(IROp::Max, rd, rs, rt); break; case 45: //R(rd) = min(R(rs), R(rt)); break; //min - if (gpr.IsImm(rs) && gpr.IsImm(rt)) { - gpr.SetImm(rd, std::min(gpr.GetImm(rs), gpr.GetImm(rt))); - break; - } - gpr.MapDirtyInIn(rd, rs, rt); ir.Write(IROp::Min, rd, rs, rt); break; @@ -263,39 +189,13 @@ void IRJit::Comp_RType3(MIPSOpcode op) { void IRJit::CompShiftImm(MIPSOpcode op, IROp shiftOpConst, int sa) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; - if (gpr.IsImm(rt)) { - switch (shiftOpConst) { - case IROp::ShlImm: - gpr.SetImm(rd, gpr.GetImm(rt) << sa); - break; - case IROp::ShrImm: - gpr.SetImm(rd, gpr.GetImm(rt) >> sa); - break; - case IROp::SarImm: - gpr.SetImm(rd, (int)gpr.GetImm(rt) >> sa); - break; - case IROp::RorImm: - gpr.SetImm(rd, (gpr.GetImm(rt) >> sa) | (gpr.GetImm(rt) << (32 - sa))); - break; - default: - DISABLE; - } - } else { - gpr.MapDirtyIn(rd, rt); - ir.Write(shiftOpConst, rd, rt, sa); - } + ir.Write(shiftOpConst, rd, rt, sa); } void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpConst) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; - if (gpr.IsImm(rs)) { - int sa = gpr.GetImm(rs) & 0x1F; - CompShiftImm(op, shiftOpConst, sa); - return; - } - gpr.MapDirtyInIn(rd, rs, rt); // Not sure if ARM64 wraps like this so let's do it for it. (TODO: According to the ARM ARM, it will indeed mask for us so this is not necessary) // ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG); ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(31)); @@ -343,12 +243,6 @@ void IRJit::Comp_Special3(MIPSOpcode op) { switch (op & 0x3f) { case 0x0: //ext - if (gpr.IsImm(rs)) { - gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask); - return; - } - - gpr.MapDirtyIn(rt, rs); ir.Write(IROp::Shl, rt, rs); ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(mask)); break; @@ -357,25 +251,10 @@ void IRJit::Comp_Special3(MIPSOpcode op) { { u32 sourcemask = mask >> pos; u32 destmask = ~(sourcemask << pos); - if (gpr.IsImm(rs)) { - u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos; - if (gpr.IsImm(rt)) { - gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted); - return; - } - - gpr.MapDirty(rt); - ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); - if (inserted != 0) { - ir.Write(IROp::OrConst, rt, rt, inserted); - } - } else { - gpr.MapDirtyIn(rt, rs); - ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); - ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); - ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); - ir.Write(IROp::Or, rt, rt, IRTEMP_0); - } + ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); + ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); + ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); + ir.Write(IROp::Or, rt, rt, IRTEMP_0); } break; } @@ -391,20 +270,10 @@ void IRJit::Comp_Allegrex(MIPSOpcode op) { switch ((op >> 6) & 31) { case 16: // seb // R(rd) = (u32)(s32)(s8)(u8)R(rt); - if (gpr.IsImm(rt)) { - gpr.SetImm(rd, (s32)(s8)(u8)gpr.GetImm(rt)); - return; - } - gpr.MapDirtyIn(rd, rt); ir.Write(IROp::Ext8to32, rd, rt); break; case 24: // seh - if (gpr.IsImm(rt)) { - gpr.SetImm(rd, (s32)(s16)(u16)gpr.GetImm(rt)); - return; - } - gpr.MapDirtyIn(rd, rt); ir.Write(IROp::Ext16to32, rd, rt); break; @@ -425,20 +294,10 @@ void IRJit::Comp_Allegrex2(MIPSOpcode op) { switch (op & 0x3ff) { case 0xA0: //wsbh - if (gpr.IsImm(rt)) { - gpr.SetImm(rd, ((gpr.GetImm(rt) & 0xFF00FF00) >> 8) | ((gpr.GetImm(rt) & 0x00FF00FF) << 8)); - } else { - gpr.MapDirtyIn(rd, rt); - ir.Write(IROp::BSwap16, rd, rt); - } + ir.Write(IROp::BSwap16, rd, rt); break; case 0xE0: //wsbw - if (gpr.IsImm(rt)) { - gpr.SetImm(rd, swap32(gpr.GetImm(rt))); - } else { - gpr.MapDirtyIn(rd, rt); - ir.Write(IROp::BSwap16, rd, rt); - } + ir.Write(IROp::BSwap16, rd, rt); break; default: Comp_Generic(op); @@ -456,25 +315,21 @@ void IRJit::Comp_MulDivType(MIPSOpcode op) { switch (op & 63) { case 16: // R(rd) = HI; //mfhi if (rd != MIPS_REG_ZERO) { - gpr.MapDirty(rd); ir.Write(IROp::MfHi, rd); } break; case 17: // HI = R(rs); //mthi - gpr.MapIn(rs); ir.Write(IROp::MtHi, 0, rs); break; case 18: // R(rd) = LO; break; //mflo if (rd != MIPS_REG_ZERO) { - gpr.MapDirty(rd); ir.Write(IROp::MfLo, rd); } break; case 19: // LO = R(rs); break; //mtlo - gpr.MapIn(rs); ir.Write(IROp::MtLo, 0, rs); break; diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 7a4601838f4a..9d69b282c212 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -29,7 +29,6 @@ #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/IR/IRJit.h" -#include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" #include "Common/Arm64Emitter.h" @@ -74,12 +73,10 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) MIPSGPReg rhs = rt; if (!delaySlotIsNice) { // if likely, we don't need this if (rs != 0) { - gpr.MapIn(rs); ir.Write(IROp::Mov, IRTEMP_0, rs); lhs = (MIPSGPReg)IRTEMP_0; } if (rt != 0) { - gpr.MapIn(rt); ir.Write(IROp::Mov, IRTEMP_1, rt); rhs = (MIPSGPReg)IRTEMP_1; } @@ -88,7 +85,6 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) if (!likely) CompileDelaySlot(); - gpr.MapInIn(lhs, rhs); FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs); // This makes the block "impure" :( @@ -121,12 +117,11 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool lhs = (MIPSGPReg)IRTEMP_0; } if (andLink) - gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); + ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8); if (!likely) CompileDelaySlot(); - gpr.MapIn(lhs); FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs); if (likely) @@ -294,7 +289,7 @@ void IRJit::Comp_Jump(MIPSOpcode op) { break; case 3: //jal - gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); + ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8); CompileDelaySlot(); FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); @@ -325,27 +320,24 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { int destReg; if (IsSyscall(delaySlotOp)) { - gpr.MapDirty(rs); ir.Write(IROp::SetPC, 0, rs); if (andLink) - gpr.SetImm(rd, GetCompilerPC() + 8); + ir.WriteSetConstant(rd, GetCompilerPC() + 8); CompileDelaySlot(); // Syscall (the delay slot) does FlushAll. return; // Syscall (delay slot) wrote exit code. } else if (delaySlotIsNice) { if (andLink) - gpr.SetImm(rd, GetCompilerPC() + 8); + ir.WriteSetConstant(rd, GetCompilerPC() + 8); CompileDelaySlot(); - gpr.MapDirty(rs); destReg = rs; // Safe because FlushAll doesn't change any regs FlushAll(); } else { // Bad delay slot. - gpr.MapDirty(rs); ir.Write(IROp::Mov, IRTEMP_0, rs); destReg = IRTEMP_0; if (andLink) - gpr.SetImm(rd, GetCompilerPC() + 8); + ir.WriteSetConstant(rd, GetCompilerPC() + 8); CompileDelaySlot(); FlushAll(); } diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 8d9d7b2aa5f2..86e795e50be7 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -82,7 +82,7 @@ void IRJit::Comp_FPUComp(MIPSOpcode op) { int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0) { // f, sf (signalling false) - gpr.SetImm((MIPSGPReg)IRREG_FPCOND, 0); + ir.Write(IROp::ZeroFpCond); return; } @@ -186,7 +186,6 @@ void IRJit::Comp_mxc1(MIPSOpcode op) if (rt == MIPS_REG_ZERO) { return; } - gpr.MapDirty(rt); ir.Write(IROp::FMovToGPR, rt, fs); return; @@ -196,16 +195,16 @@ void IRJit::Comp_mxc1(MIPSOpcode op) } if (fs == 31) { DISABLE; - } else if (fs == 0) { - gpr.SetImm(rt, MIPSState::FCR0_VALUE); + } + else if (fs == 0) { + ir.Write(IROp::SetConst, rt, ir.AddConstant(MIPSState::FCR0_VALUE)); } else { // Unsupported regs are always 0. - gpr.SetImm(rt, 0); + ir.Write(IROp::SetConst, rt, ir.AddConstant(0)); } return; case 4: //FI(fs) = R(rt); break; //mtc1 - gpr.MapDirty(rt); ir.Write(IROp::FMovFromGPR, fs, rt); return; diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index fb0a143dd8a6..4e702a544f2a 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -82,8 +82,6 @@ namespace MIPSComp { return; } - gpr.MapIn(rs); - gpr.MapDirty(rt); int addrReg = IRTEMP_0; switch (o) { // Load diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 460bca7c9052..d05e10bf8daf 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -372,6 +372,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c } break; //cvt.w.s } + case IROp::ZeroFpCond: + mips->fpcond = 0; + break; case IROp::FMovFromGPR: memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); @@ -481,6 +484,10 @@ int IRWriter::AddConstant(u32 value) { return (int)i; } constPool_.push_back(value); + if (constPool_.size() > 255) { + // Cannot have more than 256 constants in a block! + Crash(); + } return (int)constPool_.size() - 1; } diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index d3fecabe69b1..c8680d1ad0f7 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -121,6 +121,7 @@ enum class IROp : u8 { FpCondToReg, VfpCondToReg, + ZeroFpCond, FCmpUnordered, FCmpEqual, FCmpEqualUnordered, diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 2f88b7740f1c..1b4a9a59c54e 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -41,14 +41,14 @@ namespace MIPSComp { -IRJit::IRJit(MIPSState *mips) : gpr(), mips_(mips) { +IRJit::IRJit(MIPSState *mips) : mips_(mips) { logBlocks = 0; dontLogBlocks = 0; js.startDefaultPrefix = true; js.currentRoundingFunc = convertS0ToSCRATCH1[0]; u32 size = 128 * 1024; blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); - logBlocks = 0; + logBlocks = 100; InitIR(); } @@ -88,7 +88,7 @@ void IRJit::DoDummyState(PointerWrap &p) { } void IRJit::FlushAll() { - gpr.FlushAll(); + // gpr.FlushAll(); // FlushPrefixV(); } @@ -246,8 +246,6 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { js.PrefixStart(); ir.Clear(); - gpr.Start(&ir); - int partialFlushOffset = 0; js.numInstructions = 0; @@ -273,7 +271,6 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { } } - if (logBlocks > 0 && dontLogBlocks == 0) { ILOG("=============== IR (%d instructions) ===============", js.numInstructions); for (int i = 0; i < ir.GetInstructions().size(); i++) { diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 63badb5ce2cf..13e9162fa71c 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -84,16 +84,18 @@ class IRBlockCache { int GetNumBlocks() const { return (int)blocks_.size(); } int AllocateBlock(int emAddr) { blocks_.emplace_back(IRBlock(emAddr)); + size_ = (int)blocks_.size(); return (int)blocks_.size() - 1; } IRBlock *GetBlock(int i) { - if (i >= 0 && i < blocks_.size()) { - return &blocks_[i]; + if (i >= 0 && i < size_) { + return blocks_.data() + i; } else { return nullptr; } } private: + int size_; std::vector blocks_; }; @@ -231,8 +233,6 @@ class IRJit : public JitInterface { void BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely); // Utilities to reduce duplicated code - void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, IROp op); - void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp constOp, bool symmetric = false); void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa); void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst); @@ -258,9 +258,6 @@ class IRJit : public JitInterface { IRBlockCache blocks_; - IRRegCache gpr; - // Arm64RegCacheFPU fpr; - MIPSState *mips_; int dontLogBlocks; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index e110b7380874..6082f261fbf0 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -6,6 +6,10 @@ void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool) { case IROp::AddConst: if (constPool[inst[i].src2] == 0) inst[i].op = IROp::Mov; + else if (inst[i].src1 == 0) { + inst[i].op = IROp::SetConst; + inst[i].src1 = inst[i].src2; + } break; default: break; From 09969c0156162fdf9b4cbf8dcaacbd9dad19706f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 01:06:07 +0200 Subject: [PATCH 08/77] Use the regcache in a new (incomplete) pass, PropagateConstants. --- Core/MIPS/IR/IRCompALU.cpp | 7 +- Core/MIPS/IR/IRCompFPU.cpp | 1 + Core/MIPS/IR/IRInst.cpp | 20 +++-- Core/MIPS/IR/IRInst.h | 8 +- Core/MIPS/IR/IRJit.cpp | 30 +++++-- Core/MIPS/IR/IRPassSimplify.cpp | 140 ++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRPassSimplify.h | 3 + Core/MIPS/IR/IRRegCache.cpp | 52 ++++++------ Core/MIPS/IR/IRRegCache.h | 23 +++--- 9 files changed, 230 insertions(+), 54 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 078e296c6c9b..d68150b17842 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -61,12 +61,7 @@ void IRJit::Comp_IType(MIPSOpcode op) { switch (op >> 26) { case 8: // same as addiu? case 9: // R(rt) = R(rs) + simm; break; //addiu - // Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others. - if (simm >= 0) { - ir.Write(IROp::AddConst, rt, rs, ir.AddConstant(simm)); - } else if (simm < 0) { - ir.Write(IROp::SubConst, rt, rs, ir.AddConstant(-simm)); - } + ir.Write(IROp::AddConst, rt, rs, ir.AddConstant(simm)); break; case 12: ir.Write(IROp::AndConst, rt, rs, ir.AddConstant(uimm)); break; diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 86e795e50be7..f94bddbaad7a 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -121,6 +121,7 @@ void IRJit::Comp_FPUComp(MIPSOpcode op) { void IRJit::Comp_FPU2op(MIPSOpcode op) { CONDITIONAL_DISABLE; + int fs = _FS; int fd = _FD; diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index d05e10bf8daf..b4eb14d98f5e 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -9,7 +9,7 @@ #include "math/math_util.h" static const IRMeta irMeta[] = { - { IROp::SetConst, "SetConst", "GC_" }, + { IROp::SetConst, "SetConst", "GC" }, { IROp::Mov, "Mov", "GG" }, { IROp::Add, "Add", "GGG" }, { IROp::Sub, "Sub", "GGG" }, @@ -81,7 +81,7 @@ static const IRMeta irMeta[] = { { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, { IROp::FpCondToReg, "FpCondToReg", "G" }, - { IROp::SetCtrlVFPU, "SetCtrlVFPU", "T" }, + { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, { IROp::ExitToConst, "Exit", "C" }, @@ -94,7 +94,9 @@ static const IRMeta irMeta[] = { { IROp::ExitToReg, "ExitToReg", "G" }, { IROp::Syscall, "Syscall", "_C" }, { IROp::Break, "Break", ""}, - { IROp::SetPC, "SetPC", "_G"}, + { IROp::SetPC, "SetPC", "_G" }, + { IROp::SetPCConst, "SetPC", "_C" }, + { IROp::CallReplacement, "CallRepl", "_C"}, }; const IRMeta *metaIndex[256]; @@ -454,6 +456,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c Crash(); break; + case IROp::SetCtrlVFPU: + mips->vfpuCtrl[inst->dest] = constPool[inst->src1]; + break; + default: Crash(); } @@ -498,7 +504,7 @@ int IRWriter::AddConstantFloat(float value) { } void IRWriter::Simplify() { - SimplifyInPlace(&insts_[0], insts_.size(), constPool_.data()); + SimplifyInPlace(&insts_[0], (int)insts_.size(), constPool_.data()); } const char *GetGPRName(int r) { @@ -536,8 +542,12 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co } } +const IRMeta *GetIRMeta(IROp op) { + return metaIndex[(int)op]; +} + void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool) { - const IRMeta *meta = metaIndex[(int)inst.op]; + const IRMeta *meta = GetIRMeta(inst.op); if (!meta) { snprintf(buf, bufsize, "Unknown %d", (int)inst.op); return; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index c8680d1ad0f7..2c6ab75cd756 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -249,6 +249,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c class IRWriter { public: void Write(IROp op, u8 dst = 0, u8 src1 = 0, u8 src2 = 0); + void Write(IRInst inst) { + insts_.push_back(inst); + } void WriteSetConstant(u8 dst, u32 value); int AddConstant(u32 value); @@ -261,13 +264,14 @@ class IRWriter { void Simplify(); - const std::vector &GetInstructions() { return insts_; } - const std::vector &GetConstants() { return constPool_; } + const std::vector &GetInstructions() const { return insts_; } + const std::vector &GetConstants() const { return constPool_; } private: std::vector insts_; std::vector constPool_; }; +const IRMeta *GetIRMeta(IROp op); void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool); void InitIR(); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 1b4a9a59c54e..d393bf78ca70 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -36,6 +36,7 @@ #include "Core/HLE/sceKernelMemory.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/JitCommon/JitCommon.h" namespace MIPSComp @@ -44,11 +45,11 @@ namespace MIPSComp IRJit::IRJit(MIPSState *mips) : mips_(mips) { logBlocks = 0; dontLogBlocks = 0; - js.startDefaultPrefix = true; + js.startDefaultPrefix = mips_->HasDefaultPrefix(); js.currentRoundingFunc = convertS0ToSCRATCH1[0]; u32 size = 128 * 1024; blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); - logBlocks = 100; + logBlocks = 12; InitIR(); } @@ -88,8 +89,7 @@ void IRJit::DoDummyState(PointerWrap &p) { } void IRJit::FlushAll() { - // gpr.FlushAll(); - // FlushPrefixV(); + FlushPrefixV(); } void IRJit::FlushPrefixV() { @@ -259,7 +259,15 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { ir.Simplify(); - b->SetInstructions(ir.GetInstructions(), ir.GetConstants()); + IRWriter simplified; + + IRWriter *code = &ir; + if (true) { + PropagateConstants(ir, simplified); + code = &simplified; + } + + b->SetInstructions(code->GetInstructions(), code->GetConstants()); if (logBlocks > 0 && dontLogBlocks == 0) { char temp2[256]; @@ -272,7 +280,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { } if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== IR (%d instructions) ===============", js.numInstructions); + ILOG("=============== Original IR (%d instructions) ===============", (int)ir.GetInstructions().size()); for (int i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); @@ -281,6 +289,16 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { ILOG("=============== end ================="); } + if (logBlocks > 0 && dontLogBlocks == 0) { + ILOG("=============== IR (%d instructions) ===============", (int)code->GetInstructions().size()); + for (int i = 0; i < code->GetInstructions().size(); i++) { + char buf[256]; + DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], code->GetConstants().data()); + ILOG("%s", buf); + } + ILOG("=============== end ================="); + } + if (logBlocks > 0) logBlocks--; if (dontLogBlocks > 0) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 6082f261fbf0..38141951de10 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -1,4 +1,5 @@ #include "Core/MIPS/IR/IRPassSimplify.h" +#include "Core/MIPS/IR/IRRegCache.h" void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool) { for (int i = 0; i < count; i++) { @@ -15,4 +16,143 @@ void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool) { break; } } +} + + +u32 Evaluate(u32 a, u32 b, IROp op) { + switch (op) { + case IROp::Add: case IROp::AddConst: return a + b; + case IROp::Sub: case IROp::SubConst: return a - b; + case IROp::And: case IROp::AndConst: return a & b; + case IROp::Or: case IROp::OrConst: return a | b; + case IROp::Xor: case IROp::XorConst: return a ^ b; + default: + return -1; + } +} + +IROp ArithToArithConst(IROp op) { + switch (op) { + case IROp::Add: return IROp::AddConst; + case IROp::Sub: return IROp::SubConst; + case IROp::And: return IROp::AndConst; + case IROp::Or: return IROp::OrConst; + case IROp::Xor: return IROp::XorConst; + default: + return (IROp)-1; + } +} + + +void PropagateConstants(const IRWriter &in, IRWriter &out) { + IRRegCache gpr(&out); + + const u32 *constants = in.GetConstants().data(); + for (int i = 0; i < (int)in.GetInstructions().size(); i++) { + IRInst inst = in.GetInstructions()[i]; + bool symmetric = true; + switch (inst.op) { + case IROp::SetConst: + gpr.SetImm((MIPSGPReg)inst.dest, constants[inst.src1]); + break; + + case IROp::Sub: + symmetric = false; // fallthrough + case IROp::Add: + case IROp::And: + case IROp::Or: + case IROp::Xor: + if (gpr.IsImm(inst.src1) && gpr.IsImm(inst.src2)) { + gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), gpr.GetImm(inst.src2), inst.op)); + } else if (gpr.IsImm(inst.src2) && inst.src1 != inst.src2 && inst.dest != inst.src2) { + gpr.MapDirtyIn(inst.dest, inst.src1); + if (gpr.GetImm(inst.src2) == 0 && (inst.op == IROp::Add || inst.op == IROp::Or)) { + out.Write(IROp::Mov, inst.dest, inst.src1); + } else { + out.Write(ArithToArithConst(inst.op), inst.dest, inst.src1, out.AddConstant(gpr.GetImm(inst.src2))); + } + } else if (gpr.IsImm(inst.src1) && inst.src1 != inst.src2 && inst.dest != inst.src2 && symmetric) { + gpr.MapDirtyIn(inst.dest, inst.src2); + out.Write(ArithToArithConst(inst.op), inst.dest, inst.src2, out.AddConstant(gpr.GetImm(inst.src1))); + } else { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + goto doDefault; + } + break; + + case IROp::AddConst: + case IROp::SubConst: + case IROp::AndConst: + case IROp::OrConst: + case IROp::XorConst: + if (gpr.IsImm(inst.src1)) { + gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), constants[inst.src2], inst.op)); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + } + break; + + case IROp::Mov: + if (inst.src1 == inst.src2) { + // Nop + } else if (gpr.IsImm(inst.src1)) { + gpr.SetImm(inst.dest, gpr.GetImm(inst.src1)); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + } + break; + + case IROp::Store8: + case IROp::Store16: + case IROp::Store32: + // Just pass through, no excessive flushing + gpr.MapInIn(inst.dest, inst.src1); + goto doDefault; + + case IROp::Load8: + case IROp::Load8Ext: + case IROp::Load16: + case IROp::Load16Ext: + case IROp::Load32: + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + + case IROp::ExitToConst: + case IROp::ExitToReg: + case IROp::ExitToConstIfEq: + case IROp::ExitToConstIfNeq: + case IROp::ExitToConstIfFpFalse: + case IROp::ExitToConstIfFpTrue: + case IROp::ExitToConstIfGeZ: + case IROp::ExitToConstIfGtZ: + case IROp::ExitToConstIfLeZ: + case IROp::ExitToConstIfLtZ: + default: + { + gpr.FlushAll(); + doDefault: + // Remap constants to the new reality + const IRMeta *m = GetIRMeta(inst.op); + switch (m->types[0]) { + case 'C': + inst.dest = out.AddConstant(constants[inst.dest]); + break; + } + switch (m->types[1]) { + case 'C': + inst.src1 = out.AddConstant(constants[inst.src1]); + break; + } + switch (m->types[2]) { + case 'C': + inst.src2 = out.AddConstant(constants[inst.src2]); + break; + } + out.Write(inst); + break; + } + } + } } \ No newline at end of file diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index 8706661704b6..b5d0af1e95d1 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -4,3 +4,6 @@ // Dumb example of a simplification pass that can't add or remove instructions. void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); + + +void PropagateConstants(const IRWriter &in, IRWriter &out); \ No newline at end of file diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index 808370ce6321..f1c020139579 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -1,7 +1,7 @@ #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRInst.h" -void IRRegCache::Dirty(MIPSGPReg rd) { +void IRRegCache::Flush(int rd) { if (rd == 0) { return; } @@ -11,38 +11,42 @@ void IRRegCache::Dirty(MIPSGPReg rd) { } } -void IRRegCache::MapIn(MIPSGPReg rd) { - Dirty(rd); -} - -void IRRegCache::MapInIn(MIPSGPReg rs, MIPSGPReg rt) { - Dirty(rs); - Dirty(rt); +void IRRegCache::Discard(int rd) { + if (rd == 0) { + return; + } + reg_[rd].isImm = false; } -void IRRegCache::MapDirty(MIPSGPReg rd) { - Dirty(rd); +IRRegCache::IRRegCache(IRWriter *ir) : ir_(ir) { + memset(®_, 0, sizeof(reg_)); + reg_[0].isImm = true; + ir_ = ir; } -void IRRegCache::MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs) { - Dirty(rd); - Dirty(rs); +void IRRegCache::FlushAll() { + for (int i = 0; i < TOTAL_MAPPABLE_MIPSREGS; i++) { + Flush(i); + } } -void IRRegCache::MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt) { - Dirty(rd); - Dirty(rs); - Dirty(rt); +void IRRegCache::MapInIn(int rs, int rt) { + Flush(rs); + Flush(rt); } -void IRRegCache::Start(IRWriter *ir) { - memset(®_, 0, sizeof(reg_)); - reg_[0].isImm = true; - ir_ = ir; +void IRRegCache::MapDirtyIn(int rd, int rs) { + if (rs != rd) { + Discard(rd); + } + Flush(rs); } -void IRRegCache::FlushAll() { - for (int i = 0; i < TOTAL_MAPPABLE_MIPSREGS; i++) { - Dirty((MIPSGPReg)i); +void IRRegCache::MapDirtyInIn(int rd, int rs, int rt) { + if (rs != rd && rt != rd) { + Discard(rd); } + Flush(rs); + Flush(rt); } + diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h index bf53e2a818f0..1d7e78f7a888 100644 --- a/Core/MIPS/IR/IRRegCache.h +++ b/Core/MIPS/IR/IRRegCache.h @@ -17,27 +17,28 @@ struct RegIR { class IRWriter; +// Transient class IRRegCache { public: - void SetImm(MIPSGPReg r, u32 immVal) { + IRRegCache(IRWriter *ir); + + void SetImm(int r, u32 immVal) { reg_[r].isImm = true; reg_[r].immVal = immVal; } - bool IsImm(MIPSGPReg r) const { return reg_[r].isImm; } - u32 GetImm(MIPSGPReg r) const { return reg_[r].immVal; } - - void MapIn(MIPSGPReg rd); - void MapInIn(MIPSGPReg rs, MIPSGPReg rt); - void MapDirty(MIPSGPReg rd); - void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs); - void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt); + bool IsImm(int r) const { return reg_[r].isImm; } + u32 GetImm(int r) const { return reg_[r].immVal; } - void Start(IRWriter *ir); void FlushAll(); + void MapInIn(int rs, int rt); + void MapDirtyIn(int rd, int rs); + void MapDirtyInIn(int rd, int rs, int rt); + private: - void Dirty(MIPSGPReg rd); + void Flush(int rd); + void Discard(int rd); RegIR reg_[TOTAL_MAPPABLE_MIPSREGS]; IRWriter *ir_; }; From ce8aae5ed1eca2e2e10f0ffaaa2303b8e83a6d34 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 01:43:27 +0200 Subject: [PATCH 09/77] Make the IRJit core selectable in developer tools --- Core/Config.cpp | 28 +++++++++++++++++++--------- Core/Config.h | 7 ++++++- Core/CoreParameter.h | 8 ++------ Core/MIPS/IR/IRCompBranch.cpp | 2 +- Core/MIPS/IR/IRInst.cpp | 4 ++++ Core/MIPS/IR/IRInst.h | 2 +- Core/MIPS/IR/IRJit.cpp | 6 ++++++ Core/MIPS/JitCommon/JitCommon.cpp | 4 ---- Core/MIPS/MIPS.cpp | 27 ++++++++++++++++++++------- Core/MemMapFunctions.cpp | 4 ++-- UI/EmuScreen.cpp | 4 ++-- UI/GameSettingsScreen.cpp | 7 +++++-- UI/MiscScreens.cpp | 2 +- UI/NativeApp.cpp | 8 ++++++-- android/jni/TestRunner.cpp | 2 +- headless/Headless.cpp | 10 ++++++---- unittest/JitHarness.cpp | 4 ++-- 17 files changed, 84 insertions(+), 45 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 6faa97a0759b..137194c47571 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -282,9 +282,20 @@ static int DefaultNumWorkers() { return cpu_info.num_cores; } -static bool DefaultJit() { +// TODO: Default to IRJit on iOS when it's done. +static int DefaultCpuCore() { #ifdef IOS - return iosCanUseJit; + return iosCanUseJit ? CPU_CORE_JIT : CPU_CORE_INTERPRETER; +#elif defined(ARM) || defined(ARM64) || defined(_M_IX86) || defined(_M_X64) + return CPU_CORE_JIT; +#else + return CPU_CORE_INTERPRETER; +#endif +} + +static bool DefaultCodeGen() { +#ifdef IOS + return iosCanUseJit ? true : false; #elif defined(ARM) || defined(ARM64) || defined(_M_IX86) || defined(_M_X64) return true; #else @@ -353,8 +364,7 @@ static bool DefaultSasThread() { } static ConfigSetting cpuSettings[] = { - ReportedConfigSetting("Jit", &g_Config.bJit, &DefaultJit, true, true), - ReportedConfigSetting("CPUCore", &g_Config.bJit, &DefaultJit, true, true), + ReportedConfigSetting("CPUCore", &g_Config.iCpuCore, &DefaultCpuCore, true, true), ReportedConfigSetting("SeparateCPUThread", &g_Config.bSeparateCPUThread, false, true, true), ReportedConfigSetting("SeparateSASThread", &g_Config.bSeparateSASThread, &DefaultSasThread, true, true), ReportedConfigSetting("SeparateIOThread", &g_Config.bSeparateIOThread, true, true, true), @@ -464,7 +474,7 @@ static ConfigSetting graphicsSettings[] = { ReportedConfigSetting("VertexCache", &g_Config.bVertexCache, true, true, true), ReportedConfigSetting("TextureBackoffCache", &g_Config.bTextureBackoffCache, false, true, true), ReportedConfigSetting("TextureSecondaryCache", &g_Config.bTextureSecondaryCache, false, true, true), - ReportedConfigSetting("VertexDecJit", &g_Config.bVertexDecoderJit, &DefaultJit, false), + ReportedConfigSetting("VertexDecJit", &g_Config.bVertexDecoderJit, &DefaultCodeGen, false), #ifndef MOBILE_DEVICE ConfigSetting("FullScreen", &g_Config.bFullScreen, false), @@ -959,16 +969,16 @@ void Config::Load(const char *iniFileName, const char *controllerIniFilename) { } // Override ppsspp.ini JIT value to prevent crashing - if (!DefaultJit() && g_Config.bJit) { + if (DefaultCpuCore() != CPU_CORE_JIT && g_Config.iCpuCore == CPU_CORE_JIT) { jitForcedOff = true; - g_Config.bJit = false; + g_Config.iCpuCore = CPU_CORE_INTERPRETER; } } void Config::Save() { if (jitForcedOff) { // if JIT has been forced off, we don't want to screw up the user's ppsspp.ini - g_Config.bJit = true; + g_Config.iCpuCore = CPU_CORE_JIT; } if (iniFilename_.size() && g_Config.bSaveSettings) { @@ -1037,7 +1047,7 @@ void Config::Save() { } if (jitForcedOff) { // force JIT off again just in case Config::Save() is called without exiting PPSSPP - g_Config.bJit = false; + g_Config.iCpuCore = CPU_CORE_INTERPRETER; } } diff --git a/Core/Config.h b/Core/Config.h index 825091bd0cc6..6a2016997b0c 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -33,6 +33,12 @@ const int PSP_DEFAULT_FIRMWARE = 150; static const s8 VOLUME_OFF = 0; static const s8 VOLUME_MAX = 10; +enum CPUCore { + CPU_CORE_INTERPRETER = 0, + CPU_CORE_JIT = 1, + CPU_CORE_IRJIT = 2, +}; + enum { ROTATION_AUTO = 0, ROTATION_LOCKED_HORIZONTAL = 1, @@ -119,7 +125,6 @@ struct Config { // Core bool bIgnoreBadMemAccess; bool bFastMemory; - bool bJit; int iCpuCore; bool bCheckForNewVersion; bool bForceLagSync; diff --git a/Core/CoreParameter.h b/Core/CoreParameter.h index 1517b50c03c1..c9351443c783 100644 --- a/Core/CoreParameter.h +++ b/Core/CoreParameter.h @@ -20,12 +20,7 @@ #include #include "Core/Compatibility.h" - -enum CPUCore { - CPU_INTERPRETER, - CPU_JIT, - CPU_IRJIT, -}; +#include "Core/Config.h" enum GPUCore { GPUCORE_NULL, @@ -47,6 +42,7 @@ struct CoreParameter { CPUCore cpuCore; GPUCore gpuCore; + GraphicsContext *graphicsContext; // TODO: Find a better place. Thin3DContext *thin3d; bool enableSound; // there aren't multiple sound cores. diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 9d69b282c212..2b478f695a01 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -219,7 +219,7 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - ir.Write(IROp::VfpCondToReg, IRTEMP_0); + ir.Write(IROp::VfpuCtrlToReg, IRTEMP_0, VFPU_CTRL_CC); ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index b4eb14d98f5e..cba03ae7957b 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -81,6 +81,7 @@ static const IRMeta irMeta[] = { { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, { IROp::FpCondToReg, "FpCondToReg", "G" }, + { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, @@ -329,6 +330,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::FpCondToReg: mips->r[inst->dest] = mips->fpcond; break; + case IROp::VfpuCtrlToReg: + mips->r[inst->dest] = mips->vfpuCtrl[inst->src1]; + break; case IROp::FRound: mips->r[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); break; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 2c6ab75cd756..e044825f1ccb 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -119,7 +119,7 @@ enum class IROp : u8 { FMovToGPR, FpCondToReg, - VfpCondToReg, + VfpuCtrlToReg, ZeroFpCond, FCmpUnordered, diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index d393bf78ca70..0d8fca504b14 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -255,6 +255,12 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { MIPSCompileOp(inst, this); js.compilerPC += 4; js.numInstructions++; + + if (ir.GetConstants().size() > 128) { + // Need to break the block + ir.Write(IROp::ExitToConst, ir.AddConstant(js.compilerPC)); + js.compiling = false; + } } ir.Simplify(); diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index 630494f97329..e267b9352ed1 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -47,9 +47,6 @@ namespace MIPSComp { } JitInterface *CreateNativeJit(MIPSState *mips) { -#if 1 - return new MIPSComp::IRJit(mips); -#else #if defined(ARM) return new MIPSComp::ArmJit(mips); #elif defined(ARM64) @@ -60,7 +57,6 @@ namespace MIPSComp { return new MIPSComp::MipsJit(mips); #else return new MIPSComp::FakeJit(mips); -#endif #endif } diff --git a/Core/MIPS/MIPS.cpp b/Core/MIPS/MIPS.cpp index 1140b67914fb..0482f21211f6 100644 --- a/Core/MIPS/MIPS.cpp +++ b/Core/MIPS/MIPS.cpp @@ -27,6 +27,7 @@ #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/MIPSDebugInterface.h" #include "Core/MIPS/MIPSVFPUUtils.h" +#include "Core/MIPS/IR/IRJit.h" #include "Core/Reporting.h" #include "Core/System.h" #include "Core/HLE/sceDisplay.h" @@ -206,8 +207,10 @@ void MIPSState::Init() { // Initialize the VFPU random number generator with .. something? rng.Init(0x1337); - if (PSP_CoreParameter().cpuCore == CPU_JIT) { + if (PSP_CoreParameter().cpuCore == CPU_CORE_JIT) { MIPSComp::jit = MIPSComp::CreateNativeJit(this); + } else if (PSP_CoreParameter().cpuCore == CPU_CORE_IRJIT) { + MIPSComp::jit = new MIPSComp::IRJit(this); } else { MIPSComp::jit = nullptr; } @@ -224,14 +227,23 @@ void MIPSState::UpdateCore(CPUCore desired) { PSP_CoreParameter().cpuCore = desired; switch (PSP_CoreParameter().cpuCore) { - case CPU_JIT: + case CPU_CORE_JIT: INFO_LOG(CPU, "Switching to JIT"); - if (!MIPSComp::jit) { - MIPSComp::jit = MIPSComp::CreateNativeJit(this); + if (MIPSComp::jit) { + delete MIPSComp::jit; } + MIPSComp::jit = MIPSComp::CreateNativeJit(this); + break; + + case CPU_CORE_IRJIT: + INFO_LOG(CPU, "Switching to IRJIT"); + if (MIPSComp::jit) { + delete MIPSComp::jit; + } + MIPSComp::jit = new MIPSComp::IRJit(this); break; - case CPU_INTERPRETER: + case CPU_CORE_INTERPRETER: INFO_LOG(CPU, "Switching to interpreter"); delete MIPSComp::jit; MIPSComp::jit = 0; @@ -292,11 +304,12 @@ void MIPSState::SingleStep() { // returns 1 if reached ticks limit int MIPSState::RunLoopUntil(u64 globalTicks) { switch (PSP_CoreParameter().cpuCore) { - case CPU_JIT: + case CPU_CORE_JIT: + case CPU_CORE_IRJIT: MIPSComp::jit->RunLoopUntil(globalTicks); break; - case CPU_INTERPRETER: + case CPU_CORE_INTERPRETER: return MIPSInterpret_RunUntil(globalTicks); } return 1; diff --git a/Core/MemMapFunctions.cpp b/Core/MemMapFunctions.cpp index 93029d65ff5c..d367205ef797 100644 --- a/Core/MemMapFunctions.cpp +++ b/Core/MemMapFunctions.cpp @@ -87,7 +87,7 @@ inline void ReadFromHardware(T &var, const u32 address) { var = *((const T*)GetPointerUnchecked(address)); } else { // In jit, we only flush PC when bIgnoreBadMemAccess is off. - if (g_Config.bJit && g_Config.bIgnoreBadMemAccess) { + if (g_Config.iCpuCore != CPU_CORE_INTERPRETER && g_Config.bIgnoreBadMemAccess) { WARN_LOG(MEMMAP, "ReadFromHardware: Invalid address %08x", address); } else { WARN_LOG(MEMMAP, "ReadFromHardware: Invalid address %08x PC %08x LR %08x", address, currentMIPS->pc, currentMIPS->r[MIPS_REG_RA]); @@ -123,7 +123,7 @@ inline void WriteToHardware(u32 address, const T data) { *(T*)GetPointerUnchecked(address) = data; } else { // In jit, we only flush PC when bIgnoreBadMemAccess is off. - if (g_Config.bJit && g_Config.bIgnoreBadMemAccess) { + if (g_Config.iCpuCore != CPU_CORE_INTERPRETER && g_Config.bIgnoreBadMemAccess) { WARN_LOG(MEMMAP, "WriteToHardware: Invalid address %08x", address); } else { WARN_LOG(MEMMAP, "WriteToHardware: Invalid address %08x PC %08x LR %08x", address, currentMIPS->pc, currentMIPS->r[MIPS_REG_RA]); diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index dd89e1ae7d7c..06a3446eb61a 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -101,7 +101,7 @@ void EmuScreen::bootGame(const std::string &filename) { invalid_ = true; CoreParameter coreParam; - coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER; + coreParam.cpuCore = (CPUCore)g_Config.iCpuCore; coreParam.gpuCore = GPUCORE_GLES; switch (GetGPUBackend()) { case GPUBackend::OPENGL: @@ -282,7 +282,7 @@ void EmuScreen::sendMessage(const char *message, const char *value) { } else if (!strcmp(message, "clear jit")) { currentMIPS->ClearJitCache(); if (PSP_IsInited()) { - currentMIPS->UpdateCore(g_Config.bJit ? CPU_JIT : CPU_INTERPRETER); + currentMIPS->UpdateCore((CPUCore)g_Config.iCpuCore); } } else if (!strcmp(message, "window minimized")) { if (!strcmp(value, "true")) { diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 2a5828eaec0a..1ade2a4303f3 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -1059,8 +1059,11 @@ void DeveloperToolsScreen::CreateViews() { } } #endif - if (canUseJit) { - list->Add(new CheckBox(&g_Config.bJit, sy->T("Dynarec", "Dynarec (JIT)")))->OnClick.Handle(this, &DeveloperToolsScreen::OnJitAffectingSetting); + + static const char *cpuCores[] = { "Interpreter", "Dynarec (JIT)", "IRJit" }; + PopupMultiChoice *core = list->Add(new PopupMultiChoice(&g_Config.iCpuCore, gr->T("CPU Core"), cpuCores, 0, ARRAY_SIZE(cpuCores), sy->GetName(), screenManager())); + if (!canUseJit) { + core->HideChoice(1); } list->Add(new CheckBox(&g_Config.bShowDeveloperMenu, dev->T("Show Developer Menu"))); diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp index e6f0cc28e650..c94c9e6df49c 100644 --- a/UI/MiscScreens.cpp +++ b/UI/MiscScreens.cpp @@ -133,7 +133,7 @@ void HandleCommonMessages(const char *message, const char *value, ScreenManager MIPSComp::jit->ClearCache(); } if (PSP_IsInited()) { - currentMIPS->UpdateCore(g_Config.bJit ? CPU_JIT : CPU_INTERPRETER); + currentMIPS->UpdateCore((CPUCore)g_Config.iCpuCore); } } } diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp index 786e4e51263c..9ca8045dd89e 100644 --- a/UI/NativeApp.cpp +++ b/UI/NativeApp.cpp @@ -392,11 +392,15 @@ void NativeInit(int argc, const char *argv[], const char *savegame_dir, const ch gfxLog = true; break; case 'j': - g_Config.bJit = true; + g_Config.iCpuCore = CPU_CORE_JIT; g_Config.bSaveSettings = false; break; case 'i': - g_Config.bJit = false; + g_Config.iCpuCore = CPU_CORE_INTERPRETER; + g_Config.bSaveSettings = false; + break; + case 'r': + g_Config.iCpuCore = CPU_CORE_IRJIT; g_Config.bSaveSettings = false; break; case '-': diff --git a/android/jni/TestRunner.cpp b/android/jni/TestRunner.cpp index cfc4e0354e36..29ca2b2a0cc4 100644 --- a/android/jni/TestRunner.cpp +++ b/android/jni/TestRunner.cpp @@ -69,7 +69,7 @@ void RunTests() #endif CoreParameter coreParam; - coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER; + coreParam.cpuCore = (CPUCore)g_Config.iCpuCore; coreParam.gpuCore = g_Config.bSoftwareRendering ? GPUCORE_SOFTWARE : GPUCORE_GLES; coreParam.enableSound = g_Config.bEnableSound; coreParam.graphicsContext = PSP_CoreParameter().graphicsContext; diff --git a/headless/Headless.cpp b/headless/Headless.cpp index 687ac1d875dd..f818da68cab7 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -207,11 +207,11 @@ int main(int argc, const char* argv[]) #endif bool fullLog = false; - bool useJit = true; bool autoCompare = false; bool verbose = false; const char *stateToLoad = 0; GPUCore gpuCore = GPUCORE_NULL; + CPUCore cpuCore = CPU_CORE_JIT; std::vector testFilenames; const char *mountIso = 0; @@ -236,9 +236,11 @@ int main(int argc, const char* argv[]) else if (!strcmp(argv[i], "-l") || !strcmp(argv[i], "--log")) fullLog = true; else if (!strcmp(argv[i], "-i")) - useJit = false; + cpuCore = CPU_CORE_INTERPRETER; else if (!strcmp(argv[i], "-j")) - useJit = true; + cpuCore = CPU_CORE_JIT; + else if (!strcmp(argv[i], "-ir")) + cpuCore = CPU_CORE_IRJIT; else if (!strcmp(argv[i], "-c") || !strcmp(argv[i], "--compare")) autoCompare = true; else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) @@ -311,7 +313,7 @@ int main(int argc, const char* argv[]) } CoreParameter coreParameter; - coreParameter.cpuCore = useJit ? CPU_JIT : CPU_INTERPRETER; + coreParameter.cpuCore = cpuCore; coreParameter.gpuCore = glWorking ? gpuCore : GPUCORE_NULL; coreParameter.graphicsContext = graphicsContext; coreParameter.enableSound = false; diff --git a/unittest/JitHarness.cpp b/unittest/JitHarness.cpp index b80fb04e6d2a..2467a27c0b5c 100644 --- a/unittest/JitHarness.cpp +++ b/unittest/JitHarness.cpp @@ -83,7 +83,7 @@ static void SetupJitHarness() { coreState = CORE_POWERUP; currentMIPS = &mipsr4k; Memory::g_MemorySize = Memory::RAM_NORMAL_SIZE; - PSP_CoreParameter().cpuCore = CPU_INTERPRETER; + PSP_CoreParameter().cpuCore = CPU_CORE_INTERPRETER; PSP_CoreParameter().unthrottle = true; Memory::Init(); @@ -169,7 +169,7 @@ bool TestJit() { double jit_speed = 0.0, interp_speed = 0.0; if (compileSuccess) { interp_speed = ExecCPUTest(); - mipsr4k.UpdateCore(CPU_JIT); + mipsr4k.UpdateCore(CPU_CORE_JIT); jit_speed = ExecCPUTest(); // Disassemble From 1a2edc67d07ca63e6659fa38b9675e2c415c609f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 02:03:11 +0200 Subject: [PATCH 10/77] Add support for float store/load --- Core/MIPS/IR/IRCompFPU.cpp | 22 +++++++++++++++++++++- Core/MIPS/IR/IRInst.cpp | 8 ++++++++ Core/MemMap.h | 16 ++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index f94bddbaad7a..c4353dd9836b 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -73,7 +73,27 @@ void IRJit::Comp_FPU3op(MIPSOpcode op) { } void IRJit::Comp_FPULS(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + s32 offset = _IMM16; + int ft = _FT; + MIPSGPReg rs = _RS; + + switch (op >> 26) { + case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1 + { + ir.Write(IROp::LoadFloat, ft, rs, ir.AddConstant(offset)); + } + break; + case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1 + { + ir.Write(IROp::StoreFloat, ft, rs, ir.AddConstant(offset)); + } + break; + + default: + _dbg_assert_msg_(CPU, 0, "Trying to interpret FPULS instruction that can't be interpreted"); + break; + } } void IRJit::Comp_FPUComp(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index cba03ae7957b..439c0b25fd82 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -61,9 +61,11 @@ static const IRMeta irMeta[] = { { IROp::Load16, "Load16", "GGC" }, { IROp::Load16Ext, "Load16Ext", "GGC" }, { IROp::Load32, "Load32", "GGC" }, + { IROp::LoadFloat, "LoadFloat", "FGC" }, { IROp::Store8, "Store8", "GGC" }, { IROp::Store16, "Store16", "GGC" }, { IROp::Store32, "Store32", "GGC" }, + { IROp::StoreFloat, "StoreFloat", "FGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, @@ -176,6 +178,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::Load32: mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + constPool[inst->src2]); break; + case IROp::LoadFloat: + mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); + break; case IROp::Store8: Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); @@ -186,6 +191,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::Store32: Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; + case IROp::StoreFloat: + Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; diff --git a/Core/MemMap.h b/Core/MemMap.h index c49a41d119cc..c2b4ad3b12a6 100644 --- a/Core/MemMap.h +++ b/Core/MemMap.h @@ -186,6 +186,14 @@ inline u32 ReadUnchecked_U32(const u32 address) { #endif } +inline float ReadUnchecked_Float(const u32 address) { +#ifdef _ARCH_32 + return *(float *)(base + (address & MEMVIEW32_MASK)); +#else + return *(float *)(base + address); +#endif +} + inline u16 ReadUnchecked_U16(const u32 address) { #ifdef _ARCH_32 return *(u16_le *)(base + (address & MEMVIEW32_MASK)); @@ -210,6 +218,14 @@ inline void WriteUnchecked_U32(u32 data, u32 address) { #endif } +inline void WriteUnchecked_Float(float data, u32 address) { +#ifdef _ARCH_32 + *(float *)(base + (address & MEMVIEW32_MASK)) = data; +#else + *(float *)(base + address) = data; +#endif +} + inline void WriteUnchecked_U16(u16 data, u32 address) { #ifdef _ARCH_32 *(u16_le *)(base + (address & MEMVIEW32_MASK)) = data; From 7d4774db4c1bdea52d12781586b5c8168fe5ba8c Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 02:08:25 +0200 Subject: [PATCH 11/77] Fix wsbw --- Core/MIPS/IR/IRCompALU.cpp | 2 +- Core/MIPS/IR/IRInst.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index d68150b17842..4a58a0224796 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -292,7 +292,7 @@ void IRJit::Comp_Allegrex2(MIPSOpcode op) { ir.Write(IROp::BSwap16, rd, rt); break; case 0xE0: //wsbw - ir.Write(IROp::BSwap16, rd, rt); + ir.Write(IROp::BSwap32, rd, rt); break; default: Comp_Generic(op); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 439c0b25fd82..b1aaa34abc21 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -307,8 +307,11 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } case IROp::BSwap32: - mips->r[inst->dest] = swap32(mips->r[inst->src1]); + { + u32 x = mips->r[inst->src1]; + mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24); break; + } case IROp::FAdd: mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2]; From e750987052b8a5eea356d1535c7cd21181fc606a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 02:08:54 +0200 Subject: [PATCH 12/77] ir-jit: Fix bug in ext --- Core/MIPS/IR/IRCompALU.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 4a58a0224796..fd5944fd0694 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -224,7 +224,6 @@ void IRJit::Comp_ShiftType(MIPSOpcode op) { void IRJit::Comp_Special3(MIPSOpcode op) { CONDITIONAL_DISABLE; - MIPSGPReg rs = _RS; MIPSGPReg rt = _RT; @@ -237,9 +236,13 @@ void IRJit::Comp_Special3(MIPSOpcode op) { return; switch (op & 0x3f) { - case 0x0: //ext - ir.Write(IROp::Shl, rt, rs); - ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(mask)); + case 0x0: + if (pos != 0) { + ir.Write(IROp::ShrImm, rt, rs, pos); + ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(mask)); + } else { + ir.Write(IROp::AndConst, rt, rs, ir.AddConstant(mask)); + } break; case 0x4: //ins From 46e839b2b2f09c60c1e356a4c8ae7997b40fa020 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 02:26:36 +0200 Subject: [PATCH 13/77] ir-jit: Fix bugs in rounding --- Core/MIPS/IR/IRInst.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index b1aaa34abc21..1fd990adba9b 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -345,7 +345,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c mips->r[inst->dest] = mips->vfpuCtrl[inst->src1]; break; case IROp::FRound: - mips->r[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); + mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); break; case IROp::FTrunc: { @@ -363,10 +363,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } case IROp::FCeil: - mips->r[inst->dest] = (int)ceilf(mips->f[inst->src1]); + mips->fs[inst->dest] = (int)ceilf(mips->f[inst->src1]); break; case IROp::FFloor: - mips->r[inst->dest] = (int)floorf(mips->f[inst->src1]); + mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1]); break; case IROp::FCvtSW: From aae32bd929e9aed2cd4444477960028b0325d3eb Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 02:47:19 +0200 Subject: [PATCH 14/77] ir-jit: Re-enable mult. Fix bvf/bvt --- Core/MIPS/IR/IRCompALU.cpp | 1 - Core/MIPS/IR/IRCompBranch.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index fd5944fd0694..46c43ded73a0 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -305,7 +305,6 @@ void IRJit::Comp_Allegrex2(MIPSOpcode op) { void IRJit::Comp_MulDivType(MIPSOpcode op) { CONDITIONAL_DISABLE; - DISABLE; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; MIPSGPReg rd = _RD; diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 2b478f695a01..0cf3e7d8f79f 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -237,7 +237,7 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); - ir.Write(IROp::AndConst, IRTEMP_0, IRTEMP_0, ir.AddConstant(imm3)); + ir.Write(IROp::AndConst, IRTEMP_0, IRTEMP_0, ir.AddConstant(1 << imm3)); FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_0, 0); From 14df39d7c9987f5daf50901869dfd2583f8e567d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 10:36:37 +0200 Subject: [PATCH 15/77] Fix IRTEMP clash bug. Add more cases to the constant propagation pass. --- Core/MIPS/IR/IRCompALU.cpp | 9 ++++-- Core/MIPS/IR/IRCompBranch.cpp | 28 ++++++++--------- Core/MIPS/IR/IRInst.cpp | 6 ++++ Core/MIPS/IR/IRInst.h | 4 +-- Core/MIPS/IR/IRJit.cpp | 5 +-- Core/MIPS/IR/IRPassSimplify.cpp | 56 ++++++++++++++++++++++++++++----- Core/MIPS/IR/IRPassSimplify.h | 2 +- Core/MIPS/IR/IRRegCache.cpp | 11 ++++++- Core/MIPS/IR/IRRegCache.h | 2 ++ 9 files changed, 94 insertions(+), 29 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 46c43ded73a0..7f21c2c572d9 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -47,6 +47,7 @@ namespace MIPSComp { void IRJit::Comp_IType(MIPSOpcode op) { CONDITIONAL_DISABLE; + s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension u32 uimm = op & 0xFFFF; u32 suimm = (u32)(s32)simm; @@ -236,7 +237,7 @@ void IRJit::Comp_Special3(MIPSOpcode op) { return; switch (op & 0x3f) { - case 0x0: + case 0x0: // ext if (pos != 0) { ir.Write(IROp::ShrImm, rt, rs, pos); ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(mask)); @@ -247,17 +248,21 @@ void IRJit::Comp_Special3(MIPSOpcode op) { case 0x4: //ins { + logBlocks = 1; u32 sourcemask = mask >> pos; u32 destmask = ~(sourcemask << pos); ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); + if (pos != 0) { + ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); + } ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); - ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); ir.Write(IROp::Or, rt, rt, IRTEMP_0); } break; } } + void IRJit::Comp_Allegrex(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rt = _RT; diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 0cf3e7d8f79f..a290784904e2 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -73,12 +73,12 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) MIPSGPReg rhs = rt; if (!delaySlotIsNice) { // if likely, we don't need this if (rs != 0) { - ir.Write(IROp::Mov, IRTEMP_0, rs); - lhs = (MIPSGPReg)IRTEMP_0; + ir.Write(IROp::Mov, IRTEMP_LHS, rs); + lhs = (MIPSGPReg)IRTEMP_LHS; } if (rt != 0) { - ir.Write(IROp::Mov, IRTEMP_1, rt); - rhs = (MIPSGPReg)IRTEMP_1; + ir.Write(IROp::Mov, IRTEMP_RHS, rt); + rhs = (MIPSGPReg)IRTEMP_RHS; } } @@ -113,8 +113,8 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool MIPSGPReg lhs = rs; if (!delaySlotIsNice) { // if likely, we don't need this - ir.Write(IROp::Mov, IRTEMP_0, rs); - lhs = (MIPSGPReg)IRTEMP_0; + ir.Write(IROp::Mov, IRTEMP_LHS, rs); + lhs = (MIPSGPReg)IRTEMP_LHS; } if (andLink) ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8); @@ -179,7 +179,7 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 targetAddr = GetCompilerPC() + offset + 4; MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - ir.Write(IROp::FpCondToReg, IRTEMP_0); + ir.Write(IROp::FpCondToReg, IRTEMP_LHS); if (!likely) CompileDelaySlot(); @@ -187,7 +187,7 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { FlushAll(); // Not taken - ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_0, 0); + ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_LHS, 0); // Taken if (likely) CompileDelaySlot(); @@ -218,8 +218,8 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 targetAddr = GetCompilerPC() + offset + 4; MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - - ir.Write(IROp::VfpuCtrlToReg, IRTEMP_0, VFPU_CTRL_CC); + logBlocks = 1; + ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC); ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); @@ -237,9 +237,9 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); - ir.Write(IROp::AndConst, IRTEMP_0, IRTEMP_0, ir.AddConstant(1 << imm3)); + ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3)); FlushAll(); - ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_0, 0); + ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0); if (likely) CompileDelaySlot(); @@ -334,8 +334,8 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { FlushAll(); } else { // Bad delay slot. - ir.Write(IROp::Mov, IRTEMP_0, rs); - destReg = IRTEMP_0; + ir.Write(IROp::Mov, IRTEMP_LHS, rs); + destReg = IRTEMP_LHS; if (andLink) ir.WriteSetConstant(rd, GetCompilerPC() + 8); CompileDelaySlot(); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 1fd990adba9b..1a184e00eb4f 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -478,6 +478,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c default: Crash(); } +#ifdef _DEBUG + if (mips->r[0] != 0) + Crash(); +#endif inst++; } @@ -529,6 +533,8 @@ const char *GetGPRName(int r) { switch (r) { case IRTEMP_0: return "irtemp0"; case IRTEMP_1: return "irtemp1"; + case IRTEMP_LHS: return "irtemp_lhs"; + case IRTEMP_RHS: return "irtemp_rhs"; default: return "(unk)"; } } diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index e044825f1ccb..062d5189abc5 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -204,8 +204,8 @@ inline IROp ComparisonToExit(IRComparison comp) { enum { IRTEMP_0 = 192, IRTEMP_1, - IRTEMP_2, - IRTEMP_3, + IRTEMP_LHS, // Reserved for use in branches + IRTEMP_RHS, // Reserved for use in branches // Hacky way to get to other state IRREG_LO = 226, // offset of lo in MIPSState / 4 diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 0d8fca504b14..09f6acbe614e 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -269,7 +269,8 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { IRWriter *code = &ir; if (true) { - PropagateConstants(ir, simplified); + if (PropagateConstants(ir, simplified)) + logBlocks = 1; code = &simplified; } @@ -362,7 +363,7 @@ void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { } else { ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name); } -} +} void IRJit::Comp_Generic(MIPSOpcode op) { FlushAll(); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 38141951de10..d5b943c23c1a 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -26,6 +26,12 @@ u32 Evaluate(u32 a, u32 b, IROp op) { case IROp::And: case IROp::AndConst: return a & b; case IROp::Or: case IROp::OrConst: return a | b; case IROp::Xor: case IROp::XorConst: return a ^ b; + case IROp::Shr: case IROp::ShrImm: return a >> b; + case IROp::Sar: case IROp::SarImm: return (s32)a >> b; + case IROp::Ror: case IROp::RorImm: return (a >> b) | (a << (32 - b)); + case IROp::Shl: case IROp::ShlImm: return a << b; + case IROp::Slt: case IROp::SltConst: return ((s32)a < (s32)b); + case IROp::SltU: case IROp::SltUConst: return (a < b); default: return -1; } @@ -38,16 +44,19 @@ IROp ArithToArithConst(IROp op) { case IROp::And: return IROp::AndConst; case IROp::Or: return IROp::OrConst; case IROp::Xor: return IROp::XorConst; + case IROp::Slt: return IROp::SltConst; + case IROp::SltU: return IROp::SltUConst; default: return (IROp)-1; } } -void PropagateConstants(const IRWriter &in, IRWriter &out) { +bool PropagateConstants(const IRWriter &in, IRWriter &out) { IRRegCache gpr(&out); const u32 *constants = in.GetConstants().data(); + bool logBlocks = false; for (int i = 0; i < (int)in.GetInstructions().size(); i++) { IRInst inst = in.GetInstructions()[i]; bool symmetric = true; @@ -57,6 +66,8 @@ void PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::Sub: + case IROp::Slt: + case IROp::SltU: symmetric = false; // fallthrough case IROp::Add: case IROp::And: @@ -67,7 +78,8 @@ void PropagateConstants(const IRWriter &in, IRWriter &out) { } else if (gpr.IsImm(inst.src2) && inst.src1 != inst.src2 && inst.dest != inst.src2) { gpr.MapDirtyIn(inst.dest, inst.src1); if (gpr.GetImm(inst.src2) == 0 && (inst.op == IROp::Add || inst.op == IROp::Or)) { - out.Write(IROp::Mov, inst.dest, inst.src1); + if (inst.dest != inst.src1) + out.Write(IROp::Mov, inst.dest, inst.src1); } else { out.Write(ArithToArithConst(inst.op), inst.dest, inst.src1, out.AddConstant(gpr.GetImm(inst.src2))); } @@ -85,6 +97,8 @@ void PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::AndConst: case IROp::OrConst: case IROp::XorConst: + case IROp::SltConst: + case IROp::SltUConst: if (gpr.IsImm(inst.src1)) { gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), constants[inst.src2], inst.op)); } else { @@ -93,6 +107,18 @@ void PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::ShlImm: + case IROp::ShrImm: + case IROp::RorImm: + case IROp::SarImm: + if (gpr.IsImm(inst.src1)) { + gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), inst.src2, inst.op)); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + } + break; + case IROp::Mov: if (inst.src1 == inst.src2) { // Nop @@ -107,18 +133,33 @@ void PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::Store8: case IROp::Store16: case IROp::Store32: - // Just pass through, no excessive flushing - gpr.MapInIn(inst.dest, inst.src1); - goto doDefault; + if (gpr.IsImm(inst.src1) && inst.src1 != inst.dest) { + gpr.MapIn(inst.dest); + out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); + } else { + // Just pass through, no excessive flushing + gpr.MapInIn(inst.dest, inst.src1); + goto doDefault; + } + break; case IROp::Load8: case IROp::Load8Ext: case IROp::Load16: case IROp::Load16Ext: case IROp::Load32: - gpr.MapDirtyIn(inst.dest, inst.src1); - goto doDefault; + if (gpr.IsImm(inst.src1) && inst.src1 != inst.dest && inst.src2 != inst.dest) { + gpr.MapDirty(inst.dest); + out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); + logBlocks = true; + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + } + break; + case IROp::Syscall: + case IROp::Interpret: case IROp::ExitToConst: case IROp::ExitToReg: case IROp::ExitToConstIfEq: @@ -155,4 +196,5 @@ void PropagateConstants(const IRWriter &in, IRWriter &out) { } } } + return logBlocks; } \ No newline at end of file diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index b5d0af1e95d1..5a57be1cfae4 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -6,4 +6,4 @@ void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); -void PropagateConstants(const IRWriter &in, IRWriter &out); \ No newline at end of file +bool PropagateConstants(const IRWriter &in, IRWriter &out); \ No newline at end of file diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index f1c020139579..c7e11aa6d9ea 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -26,10 +26,19 @@ IRRegCache::IRRegCache(IRWriter *ir) : ir_(ir) { void IRRegCache::FlushAll() { for (int i = 0; i < TOTAL_MAPPABLE_MIPSREGS; i++) { - Flush(i); + if (i < IRTEMP_0) + Flush(i); } } +void IRRegCache::MapIn(int rd) { + Flush(rd); +} + +void IRRegCache::MapDirty(int rd) { + Discard(rd); +} + void IRRegCache::MapInIn(int rs, int rt) { Flush(rs); Flush(rt); diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h index 1d7e78f7a888..68570f50acf5 100644 --- a/Core/MIPS/IR/IRRegCache.h +++ b/Core/MIPS/IR/IRRegCache.h @@ -32,6 +32,8 @@ class IRRegCache { void FlushAll(); + void MapDirty(int rd); + void MapIn(int rd); void MapInIn(int rs, int rt); void MapDirtyIn(int rd, int rs); void MapDirtyInIn(int rd, int rs, int rt); From 98113edbd4359441df7ce25241bb2ee34ba66670 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 11:29:11 +0200 Subject: [PATCH 16/77] More simplify pass --- Core/MIPS/IR/IRJit.cpp | 9 ++++++--- Core/MIPS/IR/IRPassSimplify.cpp | 11 +++++------ Core/MIPS/IR/IRRegCache.cpp | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 09f6acbe614e..cc8bc8b65cc6 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -256,7 +256,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { js.compilerPC += 4; js.numInstructions++; - if (ir.GetConstants().size() > 128) { + if (ir.GetConstants().size() > 64) { // Need to break the block ir.Write(IROp::ExitToConst, ir.AddConstant(js.compilerPC)); js.compiling = false; @@ -272,6 +272,9 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { if (PropagateConstants(ir, simplified)) logBlocks = 1; code = &simplified; + // Some blocks in tekken generate curious numbers of constants after propagation. + //if (ir.GetConstants().size() >= 64) + // logBlocks = 1; } b->SetInstructions(code->GetInstructions(), code->GetConstants()); @@ -287,7 +290,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { } if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== Original IR (%d instructions) ===============", (int)ir.GetInstructions().size()); + ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); for (int i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); @@ -297,7 +300,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { } if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== IR (%d instructions) ===============", (int)code->GetInstructions().size()); + ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); for (int i = 0; i < code->GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], code->GetConstants().data()); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index d5b943c23c1a..5eb1ea800107 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -62,7 +62,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { bool symmetric = true; switch (inst.op) { case IROp::SetConst: - gpr.SetImm((MIPSGPReg)inst.dest, constants[inst.src1]); + gpr.SetImm(inst.dest, constants[inst.src1]); break; case IROp::Sub: @@ -83,7 +83,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } else { out.Write(ArithToArithConst(inst.op), inst.dest, inst.src1, out.AddConstant(gpr.GetImm(inst.src2))); } - } else if (gpr.IsImm(inst.src1) && inst.src1 != inst.src2 && inst.dest != inst.src2 && symmetric) { + } else if (symmetric && gpr.IsImm(inst.src1) && inst.src1 != inst.src2 && inst.dest != inst.src2) { gpr.MapDirtyIn(inst.dest, inst.src2); out.Write(ArithToArithConst(inst.op), inst.dest, inst.src2, out.AddConstant(gpr.GetImm(inst.src1))); } else { @@ -91,7 +91,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { goto doDefault; } break; - + case IROp::AddConst: case IROp::SubConst: case IROp::AndConst: @@ -120,7 +120,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::Mov: - if (inst.src1 == inst.src2) { + if (inst.dest == inst.src1) { // Nop } else if (gpr.IsImm(inst.src1)) { gpr.SetImm(inst.dest, gpr.GetImm(inst.src1)); @@ -148,10 +148,9 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::Load16: case IROp::Load16Ext: case IROp::Load32: - if (gpr.IsImm(inst.src1) && inst.src1 != inst.dest && inst.src2 != inst.dest) { + if (gpr.IsImm(inst.src1) && inst.src1 != inst.dest) { gpr.MapDirty(inst.dest); out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); - logBlocks = true; } else { gpr.MapDirtyIn(inst.dest, inst.src1); goto doDefault; diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index c7e11aa6d9ea..09aeeb9c9026 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -26,7 +26,7 @@ IRRegCache::IRRegCache(IRWriter *ir) : ir_(ir) { void IRRegCache::FlushAll() { for (int i = 0; i < TOTAL_MAPPABLE_MIPSREGS; i++) { - if (i < IRTEMP_0) + //if (i < IRTEMP_0) Flush(i); } } From 3eb5480adeec8896e57f938c8cd667eea65829a6 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 13:32:22 +0200 Subject: [PATCH 17/77] Initial VFPU --- Core/MIPS/IR/IRCompVFPU.cpp | 66 +++++++++++++++++++++++++++++++++++-- Core/MIPS/IR/IRInst.cpp | 53 ++++++++++++++++++++++++++++- Core/MIPS/IR/IRInst.h | 7 +++- 3 files changed, 122 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index d7b807fe6347..1f2623ac67e3 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -181,7 +181,38 @@ namespace MIPSComp { } void IRJit::Comp_SVQ(MIPSOpcode op) { - DISABLE; + int imm = (signed short)(op & 0xFFFC); + int vt = (((op >> 16) & 0x1f)) | ((op & 1) << 5); + MIPSGPReg rs = _RS; + + u8 vregs[4]; + GetVectorRegs(vregs, V_Quad, vt); + + switch (op >> 26) { + case 54: //lv.q + { + // TODO: Add vector load/store instruction to the IR + ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + } + break; + + case 62: //sv.q + { + // CC might be set by slow path below, so load regs first. + ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + } + break; + + default: + DISABLE; + break; + } } void IRJit::Comp_VVectorInit(MIPSOpcode op) { @@ -215,6 +246,11 @@ namespace MIPSComp { } void IRJit::Comp_VV2Op(MIPSOpcode op) { + CONDITIONAL_DISABLE; + // Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure + if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) { + return; + } DISABLE; } @@ -231,7 +267,33 @@ namespace MIPSComp { } void IRJit::Comp_Mftv(MIPSOpcode op) { - DISABLE; + int imm = op & 0xFF; + MIPSGPReg rt = _RT; + switch ((op >> 21) & 0x1f) { + case 3: //mfv / mfvc + // rt = 0, imm = 255 appears to be used as a CPU interlock by some games. + if (rt != 0) { + if (imm < 128) { //R(rt) = VI(imm); + ir.Write(IROp::VMovToGPR, rt, imm); + logBlocks = 1; + } else { + DISABLE; + } + } + break; + + case 7: // mtv + if (imm < 128) { + ir.Write(IROp::VMovFromGPR, imm, rt); + logBlocks = 1; + } else { + DISABLE; + } + break; + + default: + DISABLE; + } } void IRJit::Comp_Vmfvc(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 1a184e00eb4f..45902a4485c4 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -10,6 +10,8 @@ static const IRMeta irMeta[] = { { IROp::SetConst, "SetConst", "GC" }, + { IROp::SetConstF, "SetConstF", "FC" }, + { IROp::SetConstV, "SetConstV", "VC" }, { IROp::Mov, "Mov", "GG" }, { IROp::Add, "Add", "GGG" }, { IROp::Sub, "Sub", "GGG" }, @@ -62,10 +64,12 @@ static const IRMeta irMeta[] = { { IROp::Load16Ext, "Load16Ext", "GGC" }, { IROp::Load32, "Load32", "GGC" }, { IROp::LoadFloat, "LoadFloat", "FGC" }, + { IROp::LoadFloatV, "LoadFloatV", "VGC" }, { IROp::Store8, "Store8", "GGC" }, { IROp::Store16, "Store16", "GGC" }, { IROp::Store32, "Store32", "GGC" }, { IROp::StoreFloat, "StoreFloat", "FGC" }, + { IROp::StoreFloatV, "StoreFloatV", "VGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, @@ -82,6 +86,8 @@ static const IRMeta irMeta[] = { { IROp::FCvtSW, "FCvtSW", "FF" }, { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, + { IROp::VMovFromGPR, "VMovFromGPR", "VG" }, + { IROp::VMovToGPR, "VMovToGPR", "GV" }, { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, @@ -117,6 +123,12 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::SetConst: mips->r[inst->dest] = constPool[inst->src1]; break; + case IROp::SetConstF: + memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); + break; + case IROp::SetConstV: + memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); + break; case IROp::Add: mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; break; @@ -181,6 +193,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::LoadFloat: mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); break; + case IROp::LoadFloatV: + mips->v[voffset[inst->dest]] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); + break; case IROp::Store8: Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); @@ -194,6 +209,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::StoreFloat: Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; + case IROp::StoreFloatV: + Memory::WriteUnchecked_Float(mips->v[voffset[inst->src3]], mips->r[inst->src1] + constPool[inst->src2]); + break; case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; @@ -389,6 +407,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c } break; //cvt.w.s } + case IROp::ZeroFpCond: mips->fpcond = 0; break; @@ -400,6 +419,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); break; + case IROp::VMovFromGPR: + memcpy(&mips->v[voffset[inst->dest]], &mips->r[inst->src1], 4); + break; + case IROp::VMovToGPR: + memcpy(&mips->r[inst->dest], &mips->v[voffset[inst->src1]], 4); + break; + case IROp::ExitToConst: return constPool[inst->dest]; @@ -540,12 +566,31 @@ const char *GetGPRName(int r) { } void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *constPool) { + static const char *vfpuCtrlNames[VFPU_CTRL_MAX] = { + "SPFX", + "TPFX", + "DPFX", + "CC", + "INF4", + "RSV5", + "RSV6", + "REV", + "RCX0", + "RCX1", + "RCX2", + "RCX3", + "RCX4", + "RCX5", + "RCX6", + "RCX7", + }; + switch (type) { case 'G': snprintf(buf, bufSize, "%s", GetGPRName(param)); break; case 'F': - snprintf(buf, bufSize, "r%d", param); + snprintf(buf, bufSize, "f%d", param); break; case 'C': snprintf(buf, bufSize, "%08x", constPool[param]); @@ -553,6 +598,12 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'I': snprintf(buf, bufSize, "%02x", param); break; + case 'V': + snprintf(buf, bufSize, "v%d", param); + break; + case 'T': + snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]); + break; case '_': case '\0': buf[0] = 0; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 062d5189abc5..b19651e3d8fb 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -17,7 +17,8 @@ enum class IROp : u8 { SetConst, - FSetConst, + SetConstF, + SetConstV, Mov, @@ -88,11 +89,13 @@ enum class IROp : u8 { Load16Ext, Load32, LoadFloat, + LoadFloatV, Store8, Store16, Store32, StoreFloat, + StoreFloatV, Ext8to32, Ext16to32, @@ -136,6 +139,8 @@ enum class IROp : u8 { UpdateRoundingMode, SetCtrlVFPU, + VMovFromGPR, + VMovToGPR, // Fake/System instructions Interpret, From 492ea5fac43f015eade155861bb9c248dff1d761 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 21:38:03 +0200 Subject: [PATCH 18/77] Address a bunch of comments, thanks for the review --- Core/MIPS/ARM64/Arm64Jit.cpp | 2 -- Core/MIPS/IR/IRCompALU.cpp | 3 --- Core/MIPS/IR/IRCompBranch.cpp | 30 +++++++++++++++++++----------- Core/MIPS/IR/IRCompFPU.cpp | 19 +++++++------------ Core/MIPS/IR/IRInst.h | 10 ---------- Core/MIPS/IR/IRJit.cpp | 25 +++---------------------- Core/MIPS/IR/IRJit.h | 6 +++--- Core/MIPS/JitCommon/JitCommon.cpp | 2 +- Core/MIPS/x86/Asm.cpp | 2 +- Core/MemMapFunctions.cpp | 2 +- 10 files changed, 35 insertions(+), 66 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 427126e26e6f..83c7ec6b9720 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -284,8 +284,6 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) { gpr.Start(analysis); fpr.Start(analysis); - int partialFlushOffset = 0; - js.numInstructions = 0; while (js.compiling) { gpr.SetCompilerPC(GetCompilerPC()); // Let it know for log messages diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 7f21c2c572d9..82053dc63fa3 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -192,8 +192,6 @@ void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpConst) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; - // Not sure if ARM64 wraps like this so let's do it for it. (TODO: According to the ARM ARM, it will indeed mask for us so this is not necessary) - // ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG); ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(31)); ir.Write(shiftOp, rd, rt, IRTEMP_0); } @@ -248,7 +246,6 @@ void IRJit::Comp_Special3(MIPSOpcode op) { case 0x4: //ins { - logBlocks = 1; u32 sourcemask = mask >> pos; u32 destmask = ~(sourcemask << pos); ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index a290784904e2..e2d6c99c8523 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -53,8 +53,7 @@ namespace MIPSComp { using namespace Arm64Gen; -void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) -{ +void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -67,11 +66,12 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); MIPSGPReg lhs = rs; MIPSGPReg rhs = rt; - if (!delaySlotIsNice) { // if likely, we don't need this + if (!delaySlotIsNice && !likely) { // if likely, we don't need this if (rs != 0) { ir.Write(IROp::Mov, IRTEMP_LHS, rs); lhs = (MIPSGPReg)IRTEMP_LHS; @@ -109,7 +109,8 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); MIPSGPReg lhs = rs; if (!delaySlotIsNice) { // if likely, we don't need this @@ -136,13 +137,13 @@ void IRJit::Comp_RelBranch(MIPSOpcode op) { // The CC flags here should be opposite of the actual branch becuase they skip the branching action. switch (op >> 26) { case 4: BranchRSRTComp(op, IRComparison::NotEqual, false); break;//beq - case 5: BranchRSRTComp(op, IRComparison::Equal, false); break;//bne + case 5: BranchRSRTComp(op, IRComparison::Equal, false); break;//bne case 6: BranchRSZeroComp(op, IRComparison::Greater, false, false); break;//blez case 7: BranchRSZeroComp(op, IRComparison::LessEqual, false, false); break;//bgtz case 20: BranchRSRTComp(op, IRComparison::NotEqual, true); break;//beql - case 21: BranchRSRTComp(op, IRComparison::Equal, true); break;//bnel + case 21: BranchRSRTComp(op, IRComparison::Equal, true); break;//bnel case 22: BranchRSZeroComp(op, IRComparison::Greater, false, true); break;//blezl case 23: BranchRSZeroComp(op, IRComparison::LessEqual, false, true); break;//bgtzl @@ -183,7 +184,8 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { if (!likely) CompileDelaySlot(); - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); FlushAll(); // Not taken @@ -221,7 +223,8 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { logBlocks = 1; ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC); - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. @@ -268,7 +271,8 @@ void IRJit::Comp_Jump(MIPSOpcode op) { u32 off = _IMM26 << 2; u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off; - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); // Might be a stubbed address or something? if (!Memory::IsValidAddress(targetAddr)) { @@ -316,7 +320,8 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { if (andLink && rs == rd) delaySlotIsNice = false; - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); int destReg; if (IsSyscall(delaySlotOp)) { @@ -363,6 +368,9 @@ void IRJit::Comp_Syscall(MIPSOpcode op) { RestoreRoundingMode(); js.downcountAmount = -offset; + int dcAmount = js.downcountAmount + 1; + ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + FlushAll(); ir.Write(IROp::Syscall, 0, ir.AddConstant(op.encoding)); diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index c4353dd9836b..b0ff42cf261c 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -80,15 +80,12 @@ void IRJit::Comp_FPULS(MIPSOpcode op) { switch (op >> 26) { case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1 - { ir.Write(IROp::LoadFloat, ft, rs, ir.AddConstant(offset)); - } - break; + break; + case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1 - { ir.Write(IROp::StoreFloat, ft, rs, ir.AddConstant(offset)); - } - break; + break; default: _dbg_assert_msg_(CPU, 0, "Trying to interpret FPULS instruction that can't be interpreted"); @@ -97,7 +94,7 @@ void IRJit::Comp_FPULS(MIPSOpcode op) { } void IRJit::Comp_FPUComp(MIPSOpcode op) { - DISABLE; + DISABLE; // IROps not yet implemented int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias @@ -195,8 +192,7 @@ void IRJit::Comp_FPU2op(MIPSOpcode op) { } } -void IRJit::Comp_mxc1(MIPSOpcode op) -{ +void IRJit::Comp_mxc1(MIPSOpcode op) { CONDITIONAL_DISABLE; int fs = _FS; @@ -215,9 +211,8 @@ void IRJit::Comp_mxc1(MIPSOpcode op) return; } if (fs == 31) { - DISABLE; - } - else if (fs == 0) { + DISABLE; // TODO: Add a new op + } else if (fs == 0) { ir.Write(IROp::SetConst, rt, ir.AddConstant(MIPSState::FCR0_VALUE)); } else { // Unsupported regs are always 0. diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index b19651e3d8fb..a25996590607 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -219,16 +219,6 @@ enum { IRREG_FPCOND = 229 }; -enum class IRParam { - Ignore = '_', - UImm8 = 'U', - Const = 'C', - GPR = 'G', - FPR = 'F', - VPR = 'V', - VCtrl = 'T', -}; - struct IRMeta { IROp op; const char *name; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index cc8bc8b65cc6..b9522c6b88fd 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -39,8 +39,7 @@ #include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/JitCommon/JitCommon.h" -namespace MIPSComp -{ +namespace MIPSComp { IRJit::IRJit(MIPSState *mips) : mips_(mips) { logBlocks = 0; @@ -48,8 +47,7 @@ IRJit::IRJit(MIPSState *mips) : mips_(mips) { js.startDefaultPrefix = mips_->HasDefaultPrefix(); js.currentRoundingFunc = convertS0ToSCRATCH1[0]; u32 size = 128 * 1024; - blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); - logBlocks = 12; + // blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); InitIR(); } @@ -110,7 +108,7 @@ void IRJit::FlushPrefixV() { } void IRJit::ClearCache() { - ILOG("ARM64Jit: Clearing the cache!"); + ILOG("IRJit: Clearing the cache!"); blocks_.Clear(); } @@ -184,12 +182,6 @@ void IRJit::RunLoopUntil(u64 globalticks) { // ApplyRoundingMode(true); // IR Dispatcher - FILE *f; - int numBlocks = 0; - if (numBlocks) { - f = fopen("E:\\blockir.txt", "w"); - } - while (true) { // RestoreRoundingMode(true); CoreTiming::Advance(); @@ -203,18 +195,9 @@ void IRJit::RunLoopUntil(u64 globalticks) { u32 data = inst & 0xFFFFFF; if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) { IRBlock *block = blocks_.GetBlock(data); - if (numBlocks > 0) { - // ILOG("Run block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); - fprintf(f, "BLOCK : %08x v0: %08x v1: %08x a0: %08x s0: %08x s4: %08x\n", mips_->pc, mips_->r[MIPS_REG_V0], mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0], mips_->r[MIPS_REG_S0], mips_->r[MIPS_REG_S4]); - fflush(f); - numBlocks--; - } mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); } else { - if (mips_->pc == 0x0880de94) - logBlocks = 10; // RestoreRoundingMode(true); - // ILOG("Compile block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); Compile(mips_->pc); // ApplyRoundingMode(true); } @@ -246,8 +229,6 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { js.PrefixStart(); ir.Clear(); - int partialFlushOffset = 0; - js.numInstructions = 0; while (js.compiling) { MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC()); diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 13e9162fa71c..e26f1c24391c 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -265,9 +265,9 @@ class IRJit : public JitInterface { IRWriter ir; - // where to write branch-likely trampolines - u32 blTrampolines_; - int blTrampolineCount_; + // where to write branch-likely trampolines. not used atm + // u32 blTrampolines_; + // int blTrampolineCount_; public: // Code pointers diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index e267b9352ed1..0a1acd8229e1 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -50,7 +50,7 @@ namespace MIPSComp { #if defined(ARM) return new MIPSComp::ArmJit(mips); #elif defined(ARM64) - return new MIPSComp::IRJit(mips); + return new MIPSComp::Arm64Jit(mips); #elif defined(_M_IX86) || defined(_M_X64) return new MIPSComp::Jit(mips); #elif defined(MIPS) diff --git a/Core/MIPS/x86/Asm.cpp b/Core/MIPS/x86/Asm.cpp index 86dfc1d7fb1f..05eda2823d77 100644 --- a/Core/MIPS/x86/Asm.cpp +++ b/Core/MIPS/x86/Asm.cpp @@ -40,7 +40,7 @@ namespace MIPSComp //TODO - make an option //#if _DEBUG - static bool enableDebug = true; +static bool enableDebug = false; //#else // bool enableDebug = false; diff --git a/Core/MemMapFunctions.cpp b/Core/MemMapFunctions.cpp index d367205ef797..112ae7093c7f 100644 --- a/Core/MemMapFunctions.cpp +++ b/Core/MemMapFunctions.cpp @@ -87,7 +87,7 @@ inline void ReadFromHardware(T &var, const u32 address) { var = *((const T*)GetPointerUnchecked(address)); } else { // In jit, we only flush PC when bIgnoreBadMemAccess is off. - if (g_Config.iCpuCore != CPU_CORE_INTERPRETER && g_Config.bIgnoreBadMemAccess) { + if (g_Config.iCpuCore == CPU_CORE_JIT && g_Config.bIgnoreBadMemAccess) { WARN_LOG(MEMMAP, "ReadFromHardware: Invalid address %08x", address); } else { WARN_LOG(MEMMAP, "ReadFromHardware: Invalid address %08x PC %08x LR %08x", address, currentMIPS->pc, currentMIPS->r[MIPS_REG_RA]); From f8659b8e1e59cf570cd0f44a786d004311fdfb06 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 21:56:52 +0200 Subject: [PATCH 19/77] Move the IR interpreter out into its own file. Rename it in the UI. First CMake and Android fixes. --- CMakeLists.txt | 19 ++ Core/Core.vcxproj | 2 + Core/Core.vcxproj.filters | 6 + Core/MIPS/ARM/ArmJit.cpp | 10 + Core/MIPS/ARM/ArmJit.h | 1 + Core/MIPS/ARM64/Arm64Jit.cpp | 10 + Core/MIPS/ARM64/Arm64Jit.h | 1 + Core/MIPS/IR/IRInst.cpp | 407 +------------------------------- Core/MIPS/IR/IRInterpreter.cpp | 411 +++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRInterpreter.h | 8 + Core/MIPS/IR/IRJit.cpp | 1 + UI/DevScreens.cpp | 13 ++ UI/GameSettingsScreen.cpp | 2 +- android/jni/Android.mk | 10 + 14 files changed, 494 insertions(+), 407 deletions(-) create mode 100644 Core/MIPS/IR/IRInterpreter.cpp create mode 100644 Core/MIPS/IR/IRInterpreter.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d137ba551bd..655d8f7e1306 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1077,6 +1077,25 @@ include_directories(ext/xxhash) set(CoreExtra) set(CoreExtraLibs) + +set(CoreExtra ${CoreExtra} + Core/MIPS/IR/IRCompALU.cpp + Core/MIPS/IR/IRCompBranch.cpp + Core/MIPS/IR/IRCompFPU.cpp + Core/MIPS/IR/IRCompLoadStore.cpp + Core/MIPS/IR/IRCompVFPU.cpp + Core/MIPS/IR/IRInst.cpp + Core/MIPS/IR/IRInst.h + Core/MIPS/IR/IRInterpreter.cpp + Core/MIPS/IR/IRInterpreter.h + Core/MIPS/IR/IRJit.cpp + Core/MIPS/IR/IRJit.h + Core/MIPS/IR/IRPassSimplify.cpp + Core/MIPS/IR/IRPassSimplify.h + Core/MIPS/IR/IRRegCache.cpp + Core/MIPS/IR/IRRegCache.h + ) + if(ARM) set(CoreExtra ${CoreExtra} Core/MIPS/ARM/ArmAsm.cpp diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index e902adf7332d..561d83d2d36e 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -188,6 +188,7 @@ + @@ -518,6 +519,7 @@ + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 5905d62de115..0fc92ec2fad1 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -667,6 +667,9 @@ MIPS\IR + + MIPS\IR + @@ -1224,6 +1227,9 @@ MIPS\IR + + MIPS\IR + diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index eaf0a9741136..508a6c3b1fa4 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -677,4 +677,14 @@ void ArmJit::WriteSyscallExit() void ArmJit::Comp_DoNothing(MIPSOpcode op) { } +MIPSOpcode ArmJit::GetOriginalOp(MIPSOpcode op) { + JitBlockCache *bc = GetBlockCache(); + int block_num = bc->GetBlockNumberFromEmuHackOp(op, true); + if (block_num >= 0) { + return bc->GetOriginalFirstOp(block_num); + } else { + return op; + } } + +} // namespace diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 07b70af9688d..efdde624bf36 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -53,6 +53,7 @@ class ArmJit : public ArmGen::ARMXCodeBlock, public JitInterface { void Compile(u32 em_address) override; // Compiles a block at current MIPS PC bool DescribeCodePtr(const u8 *ptr, std::string &name) override; + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; void Comp_RunBlock(MIPSOpcode op) override; void Comp_ReplacementFunc(MIPSOpcode op) override; diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 83c7ec6b9720..a3f032ea5aa6 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -634,4 +634,14 @@ void Arm64Jit::WriteSyscallExit() { void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { } +MIPSOpcode Arm64Jit::GetOriginalOp(MIPSOpcode op) { + JitBlockCache *bc = GetBlockCache(); + int block_num = bc->GetBlockNumberFromEmuHackOp(op, true); + if (block_num >= 0) { + return bc->GetOriginalFirstOp(block_num); + } else { + return op; + } } + +} // namespace diff --git a/Core/MIPS/ARM64/Arm64Jit.h b/Core/MIPS/ARM64/Arm64Jit.h index e1c9cf5dec45..e341df3e7989 100644 --- a/Core/MIPS/ARM64/Arm64Jit.h +++ b/Core/MIPS/ARM64/Arm64Jit.h @@ -54,6 +54,7 @@ class Arm64Jit : public Arm64Gen::ARM64CodeBlock, public JitInterface { const u8 *DoJit(u32 em_address, JitBlock *b); bool DescribeCodePtr(const u8 *ptr, std::string &name) override; + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; void Comp_RunBlock(MIPSOpcode op) override; void Comp_ReplacementFunc(MIPSOpcode op) override; diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 45902a4485c4..66ee7561f2cb 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -1,12 +1,7 @@ +#include "Common/CommonFuncs.h" #include "Core/MIPS/IR/IRInst.h" #include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/MIPSDebugInterface.h" -#include "Core/MIPS/MIPSTables.h" -#include "Core/MemMap.h" -#include "Core/HLE/HLE.h" -#include "Core/HLE/ReplaceTables.h" - -#include "math/math_util.h" static const IRMeta irMeta[] = { { IROp::SetConst, "SetConst", "GC" }, @@ -116,406 +111,6 @@ void InitIR() { } } -u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) { - const IRInst *end = inst + count; - while (inst != end) { - switch (inst->op) { - case IROp::SetConst: - mips->r[inst->dest] = constPool[inst->src1]; - break; - case IROp::SetConstF: - memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); - break; - case IROp::SetConstV: - memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); - break; - case IROp::Add: - mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; - break; - case IROp::Sub: - mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; - break; - case IROp::And: - mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2]; - break; - case IROp::Or: - mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2]; - break; - case IROp::Xor: - mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2]; - break; - case IROp::Mov: - mips->r[inst->dest] = mips->r[inst->src1]; - break; - case IROp::AddConst: - mips->r[inst->dest] = mips->r[inst->src1] + constPool[inst->src2]; - break; - case IROp::SubConst: - mips->r[inst->dest] = mips->r[inst->src1] - constPool[inst->src2]; - break; - case IROp::AndConst: - mips->r[inst->dest] = mips->r[inst->src1] & constPool[inst->src2]; - break; - case IROp::OrConst: - mips->r[inst->dest] = mips->r[inst->src1] | constPool[inst->src2]; - break; - case IROp::XorConst: - mips->r[inst->dest] = mips->r[inst->src1] ^ constPool[inst->src2]; - break; - case IROp::Neg: - mips->r[inst->dest] = -(s32)mips->r[inst->src1]; - break; - case IROp::Not: - mips->r[inst->dest] = ~mips->r[inst->src1]; - break; - case IROp::Ext8to32: - mips->r[inst->dest] = (s32)(s8)mips->r[inst->src1]; - break; - case IROp::Ext16to32: - mips->r[inst->dest] = (s32)(s16)mips->r[inst->src1]; - break; - - case IROp::Load8: - mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::Load8Ext: - mips->r[inst->dest] = (s32)(s8)Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::Load16: - mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::Load16Ext: - mips->r[inst->dest] = (s32)(s16)Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::Load32: - mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::LoadFloat: - mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::LoadFloatV: - mips->v[voffset[inst->dest]] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); - break; - - case IROp::Store8: - Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::Store16: - Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::Store32: - Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::StoreFloat: - Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); - break; - case IROp::StoreFloatV: - Memory::WriteUnchecked_Float(mips->v[voffset[inst->src3]], mips->r[inst->src1] + constPool[inst->src2]); - break; - - case IROp::ShlImm: - mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; - break; - case IROp::ShrImm: - mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2; - break; - case IROp::SarImm: - mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2; - break; - case IROp::RorImm: - { - u32 x = mips->r[inst->src1]; - int sa = inst->src2; - mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); - } - break; - - case IROp::Shl: - mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31); - break; - case IROp::Shr: - mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31); - break; - case IROp::Sar: - mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31); - break; - case IROp::Ror: - { - u32 x = mips->r[inst->src1]; - int sa = mips->r[inst->src2] & 31; - mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); - } - break; - - case IROp::Clz: - { - int x = 31; - int count = 0; - int value = mips->r[inst->src1]; - while (x >= 0 && !(value & (1 << x))) { - count++; - x--; - } - mips->r[inst->dest] = count; - break; - } - - case IROp::Slt: - mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; - break; - - case IROp::SltU: - mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2]; - break; - - case IROp::SltConst: - mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)constPool[inst->src2]; - break; - - case IROp::SltUConst: - mips->r[inst->dest] = mips->r[inst->src1] < constPool[inst->src2]; - break; - - case IROp::MovZ: - if (mips->r[inst->src1] == 0) - mips->r[inst->dest] = mips->r[inst->src2]; - break; - case IROp::MovNZ: - if (mips->r[inst->src1] != 0) - mips->r[inst->dest] = mips->r[inst->src2]; - break; - - case IROp::Max: - mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; - break; - case IROp::Min: - mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; - break; - - case IROp::MtLo: - mips->lo = mips->r[inst->src1]; - break; - case IROp::MtHi: - mips->hi = mips->r[inst->src1]; - break; - case IROp::MfLo: - mips->r[inst->dest] = mips->lo; - break; - case IROp::MfHi: - mips->r[inst->dest] = mips->hi; - break; - - case IROp::Mult: - { - s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; - memcpy(&mips->lo, &result, 8); - break; - } - case IROp::MultU: - { - u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; - memcpy(&mips->lo, &result, 8); - break; - } - - case IROp::BSwap16: - { - u32 x = mips->r[inst->src1]; - mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8); - break; - } - case IROp::BSwap32: - { - u32 x = mips->r[inst->src1]; - mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24); - break; - } - - case IROp::FAdd: - mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2]; - break; - case IROp::FSub: - mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2]; - break; - case IROp::FMul: - mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2]; - break; - case IROp::FDiv: - mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2]; - break; - - case IROp::FMov: - mips->f[inst->dest] = mips->f[inst->src1]; - break; - case IROp::FAbs: - mips->f[inst->dest] = fabsf(mips->f[inst->src1]); - break; - case IROp::FSqrt: - mips->f[inst->dest] = sqrtf(mips->f[inst->src1]); - break; - case IROp::FNeg: - mips->f[inst->dest] = -mips->f[inst->src1]; - break; - case IROp::FpCondToReg: - mips->r[inst->dest] = mips->fpcond; - break; - case IROp::VfpuCtrlToReg: - mips->r[inst->dest] = mips->vfpuCtrl[inst->src1]; - break; - case IROp::FRound: - mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); - break; - case IROp::FTrunc: - { - float src = mips->f[inst->src1]; - if (src >= 0.0f) { - mips->fs[inst->dest] = (int)floorf(src); - // Overflow, but it was positive. - if (mips->fs[inst->dest] == -2147483648LL) { - mips->fs[inst->dest] = 2147483647LL; - } - } else { - // Overflow happens to be the right value anyway. - mips->fs[inst->dest] = (int)ceilf(src); - } - break; - } - case IROp::FCeil: - mips->fs[inst->dest] = (int)ceilf(mips->f[inst->src1]); - break; - case IROp::FFloor: - mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1]); - break; - - case IROp::FCvtSW: - mips->f[inst->dest] = (float)mips->fs[inst->src1]; - break; - case IROp::FCvtWS: - { - float src = mips->f[inst->src1]; - if (my_isnanorinf(src)) - { - mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL; - break; - } - switch (mips->fcr31 & 3) - { - case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break; // RINT_0 - case 1: mips->fs[inst->dest] = (int)src; break; // CAST_1 - case 2: mips->fs[inst->dest] = (int)ceilf(src); break; // CEIL_2 - case 3: mips->fs[inst->dest] = (int)floorf(src); break; // FLOOR_3 - } - break; //cvt.w.s - } - - case IROp::ZeroFpCond: - mips->fpcond = 0; - break; - - case IROp::FMovFromGPR: - memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); - break; - case IROp::FMovToGPR: - memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); - break; - - case IROp::VMovFromGPR: - memcpy(&mips->v[voffset[inst->dest]], &mips->r[inst->src1], 4); - break; - case IROp::VMovToGPR: - memcpy(&mips->r[inst->dest], &mips->v[voffset[inst->src1]], 4); - break; - - case IROp::ExitToConst: - return constPool[inst->dest]; - - case IROp::ExitToReg: - return mips->r[inst->dest]; - - case IROp::ExitToConstIfEq: - if (mips->r[inst->src1] == mips->r[inst->src2]) - return constPool[inst->dest]; - break; - case IROp::ExitToConstIfNeq: - if (mips->r[inst->src1] != mips->r[inst->src2]) - return constPool[inst->dest]; - break; - case IROp::ExitToConstIfGtZ: - if ((s32)mips->r[inst->src1] > 0) - return constPool[inst->dest]; - break; - case IROp::ExitToConstIfGeZ: - if ((s32)mips->r[inst->src1] >= 0) - return constPool[inst->dest]; - break; - case IROp::ExitToConstIfLtZ: - if ((s32)mips->r[inst->src1] < 0) - return constPool[inst->dest]; - break; - case IROp::ExitToConstIfLeZ: - if ((s32)mips->r[inst->src1] <= 0) - return constPool[inst->dest]; - break; - - case IROp::Downcount: - mips->downcount -= (inst->src1) | ((inst->src2) << 8); - break; - - case IROp::SetPC: - mips->pc = mips->r[inst->src1]; - break; - - case IROp::SetPCConst: - mips->pc = constPool[inst->src1]; - break; - - case IROp::Syscall: - // SetPC was executed before. - { - MIPSOpcode op(constPool[inst->src1]); - CallSyscall(op); - return mips->pc; - } - - case IROp::Interpret: // SLOW fallback. Can be made faster. - { - MIPSOpcode op(constPool[inst->src1]); - MIPSInterpret(op); - break; - } - - case IROp::CallReplacement: - { - int funcIndex = constPool[inst->src1]; - const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); - int cycles = f->replaceFunc(); - mips->downcount -= cycles; - return mips->r[MIPS_REG_RA]; - } - - case IROp::Break: - Crash(); - break; - - case IROp::SetCtrlVFPU: - mips->vfpuCtrl[inst->dest] = constPool[inst->src1]; - break; - - default: - Crash(); - } -#ifdef _DEBUG - if (mips->r[0] != 0) - Crash(); -#endif - inst++; - } - - // If we got here, the block was badly constructed. - Crash(); - return 0; -} - void IRWriter::Write(IROp op, u8 dst, u8 src1, u8 src2) { IRInst inst; inst.op = op; diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp new file mode 100644 index 000000000000..0c3c66188c9d --- /dev/null +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -0,0 +1,411 @@ +#include "Core/MemMap.h" +#include "Core/HLE/HLE.h" +#include "Core/HLE/ReplaceTables.h" +#include "Core/MIPS/MIPSTables.h" + +#include "math/math_util.h" +#include "Common/CommonTypes.h" +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/IR/IRInst.h" +#include "Core/MIPS/IR/IRInterpreter.h" + +u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) { + const IRInst *end = inst + count; + while (inst != end) { + switch (inst->op) { + case IROp::SetConst: + mips->r[inst->dest] = constPool[inst->src1]; + break; + case IROp::SetConstF: + memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); + break; + case IROp::SetConstV: + memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); + break; + case IROp::Add: + mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; + break; + case IROp::Sub: + mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; + break; + case IROp::And: + mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2]; + break; + case IROp::Or: + mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2]; + break; + case IROp::Xor: + mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2]; + break; + case IROp::Mov: + mips->r[inst->dest] = mips->r[inst->src1]; + break; + case IROp::AddConst: + mips->r[inst->dest] = mips->r[inst->src1] + constPool[inst->src2]; + break; + case IROp::SubConst: + mips->r[inst->dest] = mips->r[inst->src1] - constPool[inst->src2]; + break; + case IROp::AndConst: + mips->r[inst->dest] = mips->r[inst->src1] & constPool[inst->src2]; + break; + case IROp::OrConst: + mips->r[inst->dest] = mips->r[inst->src1] | constPool[inst->src2]; + break; + case IROp::XorConst: + mips->r[inst->dest] = mips->r[inst->src1] ^ constPool[inst->src2]; + break; + case IROp::Neg: + mips->r[inst->dest] = -(s32)mips->r[inst->src1]; + break; + case IROp::Not: + mips->r[inst->dest] = ~mips->r[inst->src1]; + break; + case IROp::Ext8to32: + mips->r[inst->dest] = (s32)(s8)mips->r[inst->src1]; + break; + case IROp::Ext16to32: + mips->r[inst->dest] = (s32)(s16)mips->r[inst->src1]; + break; + + case IROp::Load8: + mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load8Ext: + mips->r[inst->dest] = (s32)(s8)Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load16: + mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load16Ext: + mips->r[inst->dest] = (s32)(s16)Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load32: + mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::LoadFloat: + mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::LoadFloatV: + mips->v[voffset[inst->dest]] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); + break; + + case IROp::Store8: + Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Store16: + Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Store32: + Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::StoreFloat: + Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::StoreFloatV: + Memory::WriteUnchecked_Float(mips->v[voffset[inst->src3]], mips->r[inst->src1] + constPool[inst->src2]); + break; + + case IROp::ShlImm: + mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; + break; + case IROp::ShrImm: + mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2; + break; + case IROp::SarImm: + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2; + break; + case IROp::RorImm: + { + u32 x = mips->r[inst->src1]; + int sa = inst->src2; + mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); + } + break; + + case IROp::Shl: + mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31); + break; + case IROp::Shr: + mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31); + break; + case IROp::Sar: + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31); + break; + case IROp::Ror: + { + u32 x = mips->r[inst->src1]; + int sa = mips->r[inst->src2] & 31; + mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); + } + break; + + case IROp::Clz: + { + int x = 31; + int count = 0; + int value = mips->r[inst->src1]; + while (x >= 0 && !(value & (1 << x))) { + count++; + x--; + } + mips->r[inst->dest] = count; + break; + } + + case IROp::Slt: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; + break; + + case IROp::SltU: + mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2]; + break; + + case IROp::SltConst: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)constPool[inst->src2]; + break; + + case IROp::SltUConst: + mips->r[inst->dest] = mips->r[inst->src1] < constPool[inst->src2]; + break; + + case IROp::MovZ: + if (mips->r[inst->src1] == 0) + mips->r[inst->dest] = mips->r[inst->src2]; + break; + case IROp::MovNZ: + if (mips->r[inst->src1] != 0) + mips->r[inst->dest] = mips->r[inst->src2]; + break; + + case IROp::Max: + mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; + break; + case IROp::Min: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; + break; + + case IROp::MtLo: + mips->lo = mips->r[inst->src1]; + break; + case IROp::MtHi: + mips->hi = mips->r[inst->src1]; + break; + case IROp::MfLo: + mips->r[inst->dest] = mips->lo; + break; + case IROp::MfHi: + mips->r[inst->dest] = mips->hi; + break; + + case IROp::Mult: + { + s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; + memcpy(&mips->lo, &result, 8); + break; + } + case IROp::MultU: + { + u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; + memcpy(&mips->lo, &result, 8); + break; + } + + case IROp::BSwap16: + { + u32 x = mips->r[inst->src1]; + mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8); + break; + } + case IROp::BSwap32: + { + u32 x = mips->r[inst->src1]; + mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24); + break; + } + + case IROp::FAdd: + mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2]; + break; + case IROp::FSub: + mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2]; + break; + case IROp::FMul: + mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2]; + break; + case IROp::FDiv: + mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2]; + break; + + case IROp::FMov: + mips->f[inst->dest] = mips->f[inst->src1]; + break; + case IROp::FAbs: + mips->f[inst->dest] = fabsf(mips->f[inst->src1]); + break; + case IROp::FSqrt: + mips->f[inst->dest] = sqrtf(mips->f[inst->src1]); + break; + case IROp::FNeg: + mips->f[inst->dest] = -mips->f[inst->src1]; + break; + case IROp::FpCondToReg: + mips->r[inst->dest] = mips->fpcond; + break; + case IROp::VfpuCtrlToReg: + mips->r[inst->dest] = mips->vfpuCtrl[inst->src1]; + break; + case IROp::FRound: + mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); + break; + case IROp::FTrunc: + { + float src = mips->f[inst->src1]; + if (src >= 0.0f) { + mips->fs[inst->dest] = (int)floorf(src); + // Overflow, but it was positive. + if (mips->fs[inst->dest] == -2147483648LL) { + mips->fs[inst->dest] = 2147483647LL; + } + } else { + // Overflow happens to be the right value anyway. + mips->fs[inst->dest] = (int)ceilf(src); + } + break; + } + case IROp::FCeil: + mips->fs[inst->dest] = (int)ceilf(mips->f[inst->src1]); + break; + case IROp::FFloor: + mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1]); + break; + + case IROp::FCvtSW: + mips->f[inst->dest] = (float)mips->fs[inst->src1]; + break; + case IROp::FCvtWS: + { + float src = mips->f[inst->src1]; + if (my_isnanorinf(src)) + { + mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL; + break; + } + switch (mips->fcr31 & 3) + { + case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break; // RINT_0 + case 1: mips->fs[inst->dest] = (int)src; break; // CAST_1 + case 2: mips->fs[inst->dest] = (int)ceilf(src); break; // CEIL_2 + case 3: mips->fs[inst->dest] = (int)floorf(src); break; // FLOOR_3 + } + break; //cvt.w.s + } + + case IROp::ZeroFpCond: + mips->fpcond = 0; + break; + + case IROp::FMovFromGPR: + memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); + break; + case IROp::FMovToGPR: + memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); + break; + + case IROp::VMovFromGPR: + memcpy(&mips->v[voffset[inst->dest]], &mips->r[inst->src1], 4); + break; + case IROp::VMovToGPR: + memcpy(&mips->r[inst->dest], &mips->v[voffset[inst->src1]], 4); + break; + + case IROp::ExitToConst: + return constPool[inst->dest]; + + case IROp::ExitToReg: + return mips->r[inst->dest]; + + case IROp::ExitToConstIfEq: + if (mips->r[inst->src1] == mips->r[inst->src2]) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfNeq: + if (mips->r[inst->src1] != mips->r[inst->src2]) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfGtZ: + if ((s32)mips->r[inst->src1] > 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfGeZ: + if ((s32)mips->r[inst->src1] >= 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfLtZ: + if ((s32)mips->r[inst->src1] < 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfLeZ: + if ((s32)mips->r[inst->src1] <= 0) + return constPool[inst->dest]; + break; + + case IROp::Downcount: + mips->downcount -= (inst->src1) | ((inst->src2) << 8); + break; + + case IROp::SetPC: + mips->pc = mips->r[inst->src1]; + break; + + case IROp::SetPCConst: + mips->pc = constPool[inst->src1]; + break; + + case IROp::Syscall: + // SetPC was executed before. + { + MIPSOpcode op(constPool[inst->src1]); + CallSyscall(op); + return mips->pc; + } + + case IROp::Interpret: // SLOW fallback. Can be made faster. + { + MIPSOpcode op(constPool[inst->src1]); + MIPSInterpret(op); + break; + } + + case IROp::CallReplacement: + { + int funcIndex = constPool[inst->src1]; + const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); + int cycles = f->replaceFunc(); + mips->downcount -= cycles; + return mips->r[MIPS_REG_RA]; + } + + case IROp::Break: + Crash(); + break; + + case IROp::SetCtrlVFPU: + mips->vfpuCtrl[inst->dest] = constPool[inst->src1]; + break; + + default: + Crash(); + } +#ifdef _DEBUG + if (mips->r[0] != 0) + Crash(); +#endif + inst++; + } + + // If we got here, the block was badly constructed. + Crash(); + return 0; +} diff --git a/Core/MIPS/IR/IRInterpreter.h b/Core/MIPS/IR/IRInterpreter.h new file mode 100644 index 000000000000..fe9f8e4ba9a5 --- /dev/null +++ b/Core/MIPS/IR/IRInterpreter.h @@ -0,0 +1,8 @@ +#pragma once + +#include "Common/CommonTypes.h" + +class MIPSState; +struct IRInst; + +u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index b9522c6b88fd..668da61a362f 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -37,6 +37,7 @@ #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRJit.h" #include "Core/MIPS/IR/IRPassSimplify.h" +#include "Core/MIPS/IR/IRInterpreter.h" #include "Core/MIPS/JitCommon/JitCommon.h" namespace MIPSComp { diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 1b41243b61f7..42712bc111d1 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -669,6 +669,8 @@ UI::EventReturn JitCompareScreen::OnAddressChange(UI::EventParams &e) { return UI::EVENT_DONE; } JitBlockCache *blockCache = MIPSComp::jit->GetBlockCache(); + if (!blockCache) + return UI::EVENT_DONE; u32 addr; if (blockAddr_->GetText().size() > 8) return UI::EVENT_DONE; @@ -731,6 +733,9 @@ UI::EventReturn JitCompareScreen::OnBlockAddress(UI::EventParams &e) { } JitBlockCache *blockCache = MIPSComp::jit->GetBlockCache(); + if (!blockCache) + return UI::EVENT_DONE; + if (Memory::IsValidAddress(e.a)) { currentBlock_ = blockCache->GetBlockNumberFromStartAddress(e.a); } else { @@ -746,6 +751,9 @@ UI::EventReturn JitCompareScreen::OnRandomBlock(UI::EventParams &e) { } JitBlockCache *blockCache = MIPSComp::jit->GetBlockCache(); + if (!blockCache) + return UI::EVENT_DONE; + int numBlocks = blockCache->GetNumBlocks(); if (numBlocks > 0) { currentBlock_ = rand() % numBlocks; @@ -769,6 +777,9 @@ void JitCompareScreen::OnRandomBlock(int flag) { return; } JitBlockCache *blockCache = MIPSComp::jit->GetBlockCache(); + if (!blockCache) + return; + int numBlocks = blockCache->GetNumBlocks(); if (numBlocks > 0) { bool anyWanted = false; @@ -797,6 +808,8 @@ UI::EventReturn JitCompareScreen::OnCurrentBlock(UI::EventParams &e) { return UI::EVENT_DONE; } JitBlockCache *blockCache = MIPSComp::jit->GetBlockCache(); + if (!blockCache) + return UI::EVENT_DONE; std::vector blockNum; blockCache->GetBlockNumbersFromAddress(currentMIPS->pc, &blockNum); if (blockNum.size() > 0) { diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 1ade2a4303f3..32c4a1f93be3 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -1060,7 +1060,7 @@ void DeveloperToolsScreen::CreateViews() { } #endif - static const char *cpuCores[] = { "Interpreter", "Dynarec (JIT)", "IRJit" }; + static const char *cpuCores[] = { "Interpreter", "Dynarec (JIT)", "IR Interpreter" }; PopupMultiChoice *core = list->Add(new PopupMultiChoice(&g_Config.iCpuCore, gr->T("CPU Core"), cpuCores, 0, ARRAY_SIZE(cpuCores), sy->GetName(), screenManager())); if (!canUseJit) { core->HideChoice(1); diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 1cda4ed0951a..92a10e800c63 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -157,6 +157,16 @@ EXEC_AND_LIB_FILES := \ $(SRC)/Core/MIPS/MIPSVFPUUtils.cpp.arm \ $(SRC)/Core/MIPS/MIPSCodeUtils.cpp.arm \ $(SRC)/Core/MIPS/MIPSDebugInterface.cpp \ + $(SRC)/Core/MIPS/IR/IRJit.cpp \ + $(SRC)/Core/MIPS/IR/IRCompALU.cpp \ + $(SRC)/Core/MIPS/IR/IRCompBranch.cpp \ + $(SRC)/Core/MIPS/IR/IRCompFPU.cpp \ + $(SRC)/Core/MIPS/IR/IRCompLoadStore.cpp \ + $(SRC)/Core/MIPS/IR/IRCompVFPU.cpp \ + $(SRC)/Core/MIPS/IR/IRInst.cpp \ + $(SRC)/Core/MIPS/IR/IRInterpreter.cpp \ + $(SRC)/Core/MIPS/IR/IRPassSimplify.cpp \ + $(SRC)/Core/MIPS/IR/IRRegCache.cpp \ $(SRC)/UI/ui_atlas.cpp \ $(SRC)/UI/OnScreenDisplay.cpp \ $(SRC)/ext/libkirk/AES.c \ From 4e52f613f115682f42f57d827bc55ce187083205 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 22:23:51 +0200 Subject: [PATCH 20/77] Additional fixes and buildfixes --- Core/MIPS/IR/IRJit.h | 2 ++ Core/MIPS/IR/IRPassSimplify.cpp | 4 ++++ Core/MemMapFunctions.cpp | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index e26f1c24391c..65da0f8e26a1 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "Common/CPUDetect.h" #include "Core/MIPS/JitCommon/JitState.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 5eb1ea800107..9110ae55f739 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -60,6 +60,10 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { for (int i = 0; i < (int)in.GetInstructions().size(); i++) { IRInst inst = in.GetInstructions()[i]; bool symmetric = true; + if (out.GetConstants().size() > 128) { + // Avoid causing a constant explosion. + goto doDefault; + } switch (inst.op) { case IROp::SetConst: gpr.SetImm(inst.dest, constants[inst.src1]); diff --git a/Core/MemMapFunctions.cpp b/Core/MemMapFunctions.cpp index 112ae7093c7f..7ffc80dc9a14 100644 --- a/Core/MemMapFunctions.cpp +++ b/Core/MemMapFunctions.cpp @@ -123,7 +123,7 @@ inline void WriteToHardware(u32 address, const T data) { *(T*)GetPointerUnchecked(address) = data; } else { // In jit, we only flush PC when bIgnoreBadMemAccess is off. - if (g_Config.iCpuCore != CPU_CORE_INTERPRETER && g_Config.bIgnoreBadMemAccess) { + if (g_Config.iCpuCore == CPU_CORE_JIT && g_Config.bIgnoreBadMemAccess) { WARN_LOG(MEMMAP, "WriteToHardware: Invalid address %08x", address); } else { WARN_LOG(MEMMAP, "WriteToHardware: Invalid address %08x PC %08x LR %08x", address, currentMIPS->pc, currentMIPS->r[MIPS_REG_RA]); From ed0a0378d788006287091f51178aa7e4a1f285ba Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 22:27:20 +0200 Subject: [PATCH 21/77] Another buildfix --- Common/CommonFuncs.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/Common/CommonFuncs.h b/Common/CommonFuncs.h index c26b696afbce..92e15bfc6f73 100644 --- a/Common/CommonFuncs.h +++ b/Common/CommonFuncs.h @@ -20,30 +20,27 @@ #include "base/compat.h" #include "CommonTypes.h" -#if defined(IOS) || defined(MIPS) -#include -#endif - template struct CompileTimeAssert; template<> struct CompileTimeAssert {}; -#ifndef _WIN32 +#if !defined(_WIN32) #include #include #if defined(_M_IX86) || defined(_M_X86) - #define Crash() {asm ("int $3");} +#include +#define Crash() {asm ("int $3");} #else - #define Crash() {kill(getpid(), SIGINT);} +#define Crash() {kill(getpid(), SIGINT);} #endif #define ARRAYSIZE(A) (sizeof(A)/sizeof((A)[0])) inline u32 __rotl(u32 x, int shift) { - shift &= 31; - if (!shift) return x; - return (x << shift) | (x >> (32 - shift)); + shift &= 31; + if (!shift) return x; + return (x << shift) | (x >> (32 - shift)); } inline u64 __rotl64(u64 x, unsigned int shift){ From 52517ab609b8d3c940d532406cfe131278417243 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 22:32:08 +0200 Subject: [PATCH 22/77] Fix the fix --- Common/CommonFuncs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/CommonFuncs.h b/Common/CommonFuncs.h index 92e15bfc6f73..a533e2df6ad7 100644 --- a/Common/CommonFuncs.h +++ b/Common/CommonFuncs.h @@ -29,9 +29,9 @@ template<> struct CompileTimeAssert {}; #include #if defined(_M_IX86) || defined(_M_X86) -#include #define Crash() {asm ("int $3");} #else +#include #define Crash() {kill(getpid(), SIGINT);} #endif From 5dbac165f434ac0f1699c752e5768ea47655c207 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 14:06:42 -0700 Subject: [PATCH 23/77] Qt, gcc, and Symbian buildfixes. --- Core/MIPS/IR/IRJit.h | 8 ++++---- Core/MIPS/IR/IRRegCache.cpp | 1 + Qt/mainwindow.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 65da0f8e26a1..e9881f6a6e0b 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -57,10 +57,10 @@ class IRBlock { void SetInstructions(const std::vector &inst, const std::vector &constants) { instr_ = new IRInst[inst.size()]; numInstructions_ = (u16)inst.size(); - memcpy(instr_, inst.data(), sizeof(IRInst) * inst.size()); + memcpy(instr_, &inst[0], sizeof(IRInst) * inst.size()); const_ = new u32[constants.size()]; numConstants_ = (u16)constants.size(); - memcpy(const_, constants.data(), sizeof(u32) * constants.size()); + memcpy(const_, &constants[0], sizeof(u32) * constants.size()); } const IRInst *GetInstructions() const { return instr_; } @@ -85,13 +85,13 @@ class IRBlockCache { void InvalidateICache(u32 addess, u32 length); int GetNumBlocks() const { return (int)blocks_.size(); } int AllocateBlock(int emAddr) { - blocks_.emplace_back(IRBlock(emAddr)); + blocks_.push_back(IRBlock(emAddr)); size_ = (int)blocks_.size(); return (int)blocks_.size() - 1; } IRBlock *GetBlock(int i) { if (i >= 0 && i < size_) { - return blocks_.data() + i; + return &blocks_[i]; } else { return nullptr; } diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index 09aeeb9c9026..aa0aab21acf6 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -1,3 +1,4 @@ +#include #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRInst.h" diff --git a/Qt/mainwindow.h b/Qt/mainwindow.h index bf8bdf2a30f9..ae201054aa1f 100644 --- a/Qt/mainwindow.h +++ b/Qt/mainwindow.h @@ -87,7 +87,7 @@ private slots: // Options // Core - void dynarecAct() { g_Config.bJit = !g_Config.bJit; } + void dynarecAct() { g_Config.iCpuCore = g_Config.iCpuCore == CPU_CORE_INTERPRETER ? CPU_CORE_JIT : CPU_CORE_INTERPRETER; } void vertexDynarecAct() { g_Config.bVertexDecoderJit = !g_Config.bVertexDecoderJit; } void fastmemAct() { g_Config.bFastMemory = !g_Config.bFastMemory; } void ignoreIllegalAct() { g_Config.bIgnoreBadMemAccess = !g_Config.bIgnoreBadMemAccess; } From d4480d50fdad77788e7c0fc717a7c312b79ff6b2 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 22:54:07 +0200 Subject: [PATCH 24/77] jit-ir: Less instructions cause flushing in constant propagation. --- Core/MIPS/IR/IRPassSimplify.cpp | 36 ++++++++++++++++++++++++++++++++- Core/MIPS/IR/IRRegCache.cpp | 6 ++++++ Core/MIPS/IR/IRRegCache.h | 1 + 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 9110ae55f739..efa8c6cdcaa5 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -62,7 +62,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { bool symmetric = true; if (out.GetConstants().size() > 128) { // Avoid causing a constant explosion. - goto doDefault; + goto doDefaultAndFlush; } switch (inst.op) { case IROp::SetConst: @@ -134,6 +134,39 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::Mult: + case IROp::MultU: + gpr.MapInIn(inst.src1, inst.src2); + goto doDefault; + + case IROp::MovZ: + case IROp::MovNZ: + gpr.MapInInIn(inst.dest, inst.src1, inst.src2); + goto doDefault; + + case IROp::Min: + case IROp::Max: + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + goto doDefault; + + case IROp::Clz: + case IROp::BSwap16: + case IROp::BSwap32: + case IROp::Ext16to32: + case IROp::Ext8to32: + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + + case IROp::MfHi: + case IROp::MfLo: + gpr.MapDirty(inst.dest); + goto doDefault; + + case IROp::MtHi: + case IROp::MtLo: + gpr.MapIn(inst.src1); + goto doDefault; + case IROp::Store8: case IROp::Store16: case IROp::Store32: @@ -175,6 +208,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::ExitToConstIfLtZ: default: { + doDefaultAndFlush: gpr.FlushAll(); doDefault: // Remap constants to the new reality diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index aa0aab21acf6..63ae135878a1 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -45,6 +45,12 @@ void IRRegCache::MapInIn(int rs, int rt) { Flush(rt); } +void IRRegCache::MapInInIn(int rd, int rs, int rt) { + Flush(rd); + Flush(rs); + Flush(rt); +} + void IRRegCache::MapDirtyIn(int rd, int rs) { if (rs != rd) { Discard(rd); diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h index 68570f50acf5..9fcdab8b1e85 100644 --- a/Core/MIPS/IR/IRRegCache.h +++ b/Core/MIPS/IR/IRRegCache.h @@ -35,6 +35,7 @@ class IRRegCache { void MapDirty(int rd); void MapIn(int rd); void MapInIn(int rs, int rt); + void MapInInIn(int rd, int rs, int rt); void MapDirtyIn(int rd, int rs); void MapDirtyInIn(int rd, int rs, int rt); From c7e4658b6d9fa0fc1ea1f6116767c7f091f35686 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 May 2016 23:25:47 +0200 Subject: [PATCH 25/77] More constant propagation --- Core/MIPS/IR/IRPassSimplify.cpp | 38 ++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index efa8c6cdcaa5..bceff578e258 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -157,6 +157,15 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { gpr.MapDirtyIn(inst.dest, inst.src1); goto doDefault; + case IROp::FMovFromGPR: + if (gpr.IsImm(inst.src1)) { + out.Write(IROp::SetConstF, inst.dest, out.AddConstant(gpr.GetImm(inst.src1))); + } else { + gpr.MapIn(inst.src1); + goto doDefault; + } + break; + case IROp::MfHi: case IROp::MfLo: gpr.MapDirty(inst.dest); @@ -174,11 +183,18 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { gpr.MapIn(inst.dest); out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { - // Just pass through, no excessive flushing gpr.MapInIn(inst.dest, inst.src1); goto doDefault; } break; + case IROp::StoreFloat: + if (gpr.IsImm(inst.src1)) { + out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); + } else { + gpr.MapIn(inst.src1); + goto doDefault; + } + break; case IROp::Load8: case IROp::Load8Ext: @@ -193,6 +209,26 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { goto doDefault; } break; + case IROp::LoadFloat: + if (gpr.IsImm(inst.src1)) { + out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); + } else { + gpr.MapIn(inst.src1); + goto doDefault; + } + break; + + // FP-only instructions don't need to flush immediates. + case IROp::FAdd: + case IROp::FMul: + case IROp::FDiv: + case IROp::FSub: + case IROp::FNeg: + case IROp::FAbs: + case IROp::FSqrt: + case IROp::FMov: + out.Write(inst); + break; case IROp::Syscall: case IROp::Interpret: From d19174b52b82b56f9f35265b1c5c7f319dbd9a28 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 14:56:43 -0700 Subject: [PATCH 26/77] jit-ir: Skip const flush on downcount op. This allows discarding more unused constants. --- Core/MIPS/IR/IRPassSimplify.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index bceff578e258..886ea35a174f 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -218,6 +218,10 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::Downcount: + out.Write(inst); + break; + // FP-only instructions don't need to flush immediates. case IROp::FAdd: case IROp::FMul: From 0d7f15116761922d60dab15e6f4524bac9f9d23b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 16:08:35 -0700 Subject: [PATCH 27/77] jit-ir: Add a utility func for applying passes. --- Core/MIPS/IR/IRInst.h | 11 +++++++++++ Core/MIPS/IR/IRJit.cpp | 9 ++++----- Core/MIPS/IR/IRPassSimplify.cpp | 25 +++++++++++++++++++++++++ Core/MIPS/IR/IRPassSimplify.h | 2 ++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index a25996590607..def7185d7a2e 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -243,6 +243,17 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c // Each IR block gets a constant pool. class IRWriter { public: + IRWriter &operator =(const IRWriter &w) { + insts_ = w.insts_; + constPool_ = w.constPool_; + return *this; + } + IRWriter &operator =(IRWriter &&w) { + insts_ = std::move(w.insts_); + constPool_ = std::move(w.constPool_); + return *this; + } + void Write(IROp op, u8 dst = 0, u8 src1 = 0, u8 src2 = 0); void Write(IRInst inst) { insts_.push_back(inst); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 668da61a362f..a7223bcc317a 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -248,15 +248,14 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { ir.Simplify(); IRWriter simplified; - IRWriter *code = &ir; if (true) { - if (PropagateConstants(ir, simplified)) + static const IRPassFunc passes[] = { + &PropagateConstants, + }; + if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) logBlocks = 1; code = &simplified; - // Some blocks in tekken generate curious numbers of constants after propagation. - //if (ir.GetConstants().size() >= 64) - // logBlocks = 1; } b->SetInstructions(code->GetInstructions(), code->GetConstants()); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 886ea35a174f..7f2e765b58c7 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -51,6 +51,31 @@ IROp ArithToArithConst(IROp op) { } } +bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out) { + if (c == 1) { + return passes[0](in, out); + } + + bool logBlocks = false; + + IRWriter temp[2]; + const IRWriter *nextIn = ∈ + IRWriter *nextOut = &temp[1]; + for (size_t i = 0; i < c - 1; ++i) { + if (passes[i](*nextIn, *nextOut)) { + logBlocks = true; + } + + temp[0] = std::move(temp[1]); + nextIn = &temp[0]; + } + + if (passes[c - 1](*nextIn, out)) { + logBlocks = true; + } + + return logBlocks; +} bool PropagateConstants(const IRWriter &in, IRWriter &out) { IRRegCache gpr(&out); diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index 5a57be1cfae4..efba1749eb97 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -5,5 +5,7 @@ // Dumb example of a simplification pass that can't add or remove instructions. void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); +typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out); +bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out); bool PropagateConstants(const IRWriter &in, IRWriter &out); \ No newline at end of file From d09f3a22a8d288aa1feab6b8b037cead76b1c7ca Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 16:19:48 -0700 Subject: [PATCH 28/77] jit-ir: Propagate single-op constants. --- Core/MIPS/IR/IRPassSimplify.cpp | 45 +++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 7f2e765b58c7..f6002fb46ab9 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -37,6 +37,28 @@ u32 Evaluate(u32 a, u32 b, IROp op) { } } +u32 Evaluate(u32 a, IROp op) { + switch (op) { + case IROp::Not: return ~a; + case IROp::Neg: return -(s32)a; + case IROp::BSwap16: return ((a & 0xFF00FF00) >> 8) | ((a & 0x00FF00FF) << 8); + case IROp::BSwap32: return swap32(a); + case IROp::Ext8to32: return (u32)(s32)(s8)(u8)a; + case IROp::Ext16to32: return (u32)(s32)(s16)(u16)a; + case IROp::Clz: { + int x = 31; + int count = 0; + while (x >= 0 && !(a & (1 << x))) { + count++; + x--; + } + return count; + } + default: + return -1; + } +} + IROp ArithToArithConst(IROp op) { switch (op) { case IROp::Add: return IROp::AddConst; @@ -121,6 +143,21 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::Neg: + case IROp::Not: + case IROp::BSwap16: + case IROp::BSwap32: + case IROp::Ext8to32: + case IROp::Ext16to32: + case IROp::Clz: + if (gpr.IsImm(inst.src1)) { + gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), inst.op)); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + goto doDefault; + } + break; + case IROp::AddConst: case IROp::SubConst: case IROp::AndConst: @@ -174,14 +211,6 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); goto doDefault; - case IROp::Clz: - case IROp::BSwap16: - case IROp::BSwap32: - case IROp::Ext16to32: - case IROp::Ext8to32: - gpr.MapDirtyIn(inst.dest, inst.src1); - goto doDefault; - case IROp::FMovFromGPR: if (gpr.IsImm(inst.src1)) { out.Write(IROp::SetConstF, inst.dest, out.AddConstant(gpr.GetImm(inst.src1))); From a22ff68e9ee0735811dfe95b34eafe65e53108fc Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 18:48:05 -0700 Subject: [PATCH 29/77] jit-ir: Skip flushing when updating PC. --- Core/MIPS/IR/IRPassSimplify.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index f6002fb46ab9..3ea09d11c52e 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -273,7 +273,16 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::Downcount: - out.Write(inst); + case IROp::SetPCConst: + goto doDefault; + + case IROp::SetPC: + if (gpr.IsImm(inst.src1)) { + out.Write(IROp::SetPCConst, out.AddConstant(gpr.GetImm(inst.src1))); + } else { + gpr.MapIn(inst.src1); + goto doDefault; + } break; // FP-only instructions don't need to flush immediates. From a1b4b5170c0295fe454e5d757278978358e5ba7c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 18:59:32 -0700 Subject: [PATCH 30/77] jit-ir: Propagate constants even for overlaps. --- Core/MIPS/IR/IRPassSimplify.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 3ea09d11c52e..02aea8fff8d2 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -126,17 +126,26 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::Xor: if (gpr.IsImm(inst.src1) && gpr.IsImm(inst.src2)) { gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), gpr.GetImm(inst.src2), inst.op)); - } else if (gpr.IsImm(inst.src2) && inst.src1 != inst.src2 && inst.dest != inst.src2) { + } else if (gpr.IsImm(inst.src2)) { + const u32 imm2 = gpr.GetImm(inst.src2); gpr.MapDirtyIn(inst.dest, inst.src1); - if (gpr.GetImm(inst.src2) == 0 && (inst.op == IROp::Add || inst.op == IROp::Or)) { + if (imm2 == 0 && (inst.op == IROp::Add || inst.op == IROp::Or)) { + // Add / Or with zero is just a Mov. if (inst.dest != inst.src1) out.Write(IROp::Mov, inst.dest, inst.src1); } else { - out.Write(ArithToArithConst(inst.op), inst.dest, inst.src1, out.AddConstant(gpr.GetImm(inst.src2))); + out.Write(ArithToArithConst(inst.op), inst.dest, inst.src1, out.AddConstant(imm2)); } - } else if (symmetric && gpr.IsImm(inst.src1) && inst.src1 != inst.src2 && inst.dest != inst.src2) { + } else if (symmetric && gpr.IsImm(inst.src1)) { + const u32 imm1 = gpr.GetImm(inst.src1); gpr.MapDirtyIn(inst.dest, inst.src2); - out.Write(ArithToArithConst(inst.op), inst.dest, inst.src2, out.AddConstant(gpr.GetImm(inst.src1))); + if (imm1 == 0 && (inst.op == IROp::Add || inst.op == IROp::Or)) { + // Add / Or with zero is just a Mov. + if (inst.dest != inst.src2) + out.Write(IROp::Mov, inst.dest, inst.src2); + } else { + out.Write(ArithToArithConst(inst.op), inst.dest, inst.src2, out.AddConstant(imm1)); + } } else { gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); goto doDefault; From 5221a02db4188f6c27cae682beaada8a04dad84b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 19:11:58 -0700 Subject: [PATCH 31/77] jit-ir: Propagate constants for shifts. This might optimize away an IRTEMP_0 in such cases. --- Core/MIPS/IR/IRCompALU.cpp | 6 +++--- Core/MIPS/IR/IRPassSimplify.cpp | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 82053dc63fa3..4fbec417ba71 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -182,13 +182,13 @@ void IRJit::Comp_RType3(MIPSOpcode op) { } } -void IRJit::CompShiftImm(MIPSOpcode op, IROp shiftOpConst, int sa) { +void IRJit::CompShiftImm(MIPSOpcode op, IROp shiftOpImm, int sa) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; - ir.Write(shiftOpConst, rd, rt, sa); + ir.Write(shiftOpImm, rd, rt, sa); } -void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpConst) { +void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpImm) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 02aea8fff8d2..6a6df0e5b17a 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -73,6 +73,17 @@ IROp ArithToArithConst(IROp op) { } } +IROp ShiftToShiftImm(IROp op) { + switch (op) { + case IROp::Shl: return IROp::ShlImm; + case IROp::Shr: return IROp::ShrImm; + case IROp::Ror: return IROp::RorImm; + case IROp::Sar: return IROp::SarImm; + default: + return (IROp)-1; + } +} + bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out) { if (c == 1) { return passes[0](in, out); @@ -182,6 +193,27 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::Shl: + case IROp::Shr: + case IROp::Ror: + case IROp::Sar: + if (gpr.IsImm(inst.src1) && gpr.IsImm(inst.src2)) { + gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), gpr.GetImm(inst.src2), inst.op)); + } else if (gpr.IsImm(inst.src2)) { + const u8 sa = gpr.GetImm(inst.src2) & 31; + gpr.MapDirtyIn(inst.dest, inst.src1); + if (sa == 0) { + if (inst.dest != inst.src1) + out.Write(IROp::Mov, inst.dest, inst.src1); + } else { + out.Write(ShiftToShiftImm(inst.op), inst.dest, inst.src1, sa); + } + } else { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + goto doDefault; + } + break; + case IROp::ShlImm: case IROp::ShrImm: case IROp::RorImm: From 6bd31ecb272409d5742df7c5dff4dfb87a56dad3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 21:25:34 -0700 Subject: [PATCH 32/77] jit-ir: Flush consts better for a few f/v ops. --- Core/MIPS/IR/IRInst.h | 2 ++ Core/MIPS/IR/IRPassSimplify.cpp | 47 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index def7185d7a2e..a3739898b893 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -213,6 +213,8 @@ enum { IRTEMP_RHS, // Reserved for use in branches // Hacky way to get to other state + IRREG_VPFU_CTRL_BASE = 208, + IRREG_VPFU_CC = 211, IRREG_LO = 226, // offset of lo in MIPSState / 4 IRREG_HI = 227, IRREG_FCR31 = 228, diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 6a6df0e5b17a..a6a939cd469d 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -261,6 +261,23 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::VMovFromGPR: + if (gpr.IsImm(inst.src1)) { + out.Write(IROp::SetConstV, inst.dest, out.AddConstant(gpr.GetImm(inst.src1))); + } else { + gpr.MapIn(inst.src1); + goto doDefault; + } + break; + + case IROp::FMovToGPR: + gpr.MapDirty(inst.dest); + goto doDefault; + + case IROp::VMovToGPR: + gpr.MapDirty(inst.dest); + goto doDefault; + case IROp::MfHi: case IROp::MfLo: gpr.MapDirty(inst.dest); @@ -283,6 +300,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; case IROp::StoreFloat: + case IROp::StoreFloatV: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { @@ -305,6 +323,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; case IROp::LoadFloat: + case IROp::LoadFloatV: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { @@ -335,9 +354,37 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::FAbs: case IROp::FSqrt: case IROp::FMov: + case IROp::FRound: + case IROp::FTrunc: + case IROp::FCeil: + case IROp::FFloor: + case IROp::FCvtSW: + out.Write(inst); + break; + + case IROp::SetCtrlVFPU: + goto doDefault; + + case IROp::FCvtWS: + // TODO: Actually, this should just use the currently set rounding mode. + // Move up with FCvtSW when that's implemented. + gpr.MapIn(IRREG_FCR31); out.Write(inst); break; + case IROp::FpCondToReg: + if (gpr.IsImm(IRREG_FPCOND)) { + gpr.SetImm(inst.dest, gpr.GetImm(IRREG_FPCOND)); + } else { + gpr.MapDirtyIn(inst.dest, IRREG_FPCOND); + out.Write(inst); + } + break; + + case IROp::VfpuCtrlToReg: + gpr.MapDirtyIn(inst.dest, IRREG_VPFU_CTRL_BASE + inst.src1); + goto doDefault; + case IROp::Syscall: case IROp::Interpret: case IROp::ExitToConst: From 7ce923d01f17926bcec8cc1b9f5217ad832f4397 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 22:11:53 -0700 Subject: [PATCH 33/77] jit-ir: Fix SetConstV. Oops. --- Core/MIPS/IR/IRInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 0c3c66188c9d..9b0c9eb1eb6b 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -21,7 +21,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); break; case IROp::SetConstV: - memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); + memcpy(&mips->v[voffset[inst->dest]], &constPool[inst->src1], 4); break; case IROp::Add: mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; From f638477b9a4007ce7d8dcdecf44a4e8db56f4f48 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 May 2016 22:17:39 -0700 Subject: [PATCH 34/77] jit-ir: Add the rest to PropagateConstants. Just for completeness. --- Core/MIPS/IR/IRPassSimplify.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index a6a939cd469d..3285ff194d89 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -126,6 +126,9 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::SetConst: gpr.SetImm(inst.dest, constants[inst.src1]); break; + case IROp::SetConstF: + case IROp::SetConstV: + goto doDefault; case IROp::Sub: case IROp::Slt: @@ -381,6 +384,22 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::ZeroFpCond: + case IROp::FCmpUnordered: + case IROp::FCmpEqual: + case IROp::FCmpEqualUnordered: + case IROp::FCmpLessOrdered: + case IROp::FCmpLessUnordered: + case IROp::FCmpLessEqualOrdered: + case IROp::FCmpLessEqualUnordered: + gpr.MapDirty(IRREG_FPCOND); + goto doDefault; + + case IROp::RestoreRoundingMode: + case IROp::ApplyRoundingMode: + case IROp::UpdateRoundingMode: + goto doDefault; + case IROp::VfpuCtrlToReg: gpr.MapDirtyIn(inst.dest, IRREG_VPFU_CTRL_BASE + inst.src1); goto doDefault; From f6d245f3c4a5ce32fc978652ac05578deff3858e Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 9 May 2016 00:13:01 -0700 Subject: [PATCH 35/77] jit-ir: Remove redundant simplify pass. This is just doing the same thing as the const folding pass, really. --- Core/MIPS/IR/IRInst.cpp | 4 ---- Core/MIPS/IR/IRInst.h | 2 -- Core/MIPS/IR/IRJit.cpp | 2 -- Core/MIPS/IR/IRPassSimplify.cpp | 19 +------------------ Core/MIPS/IR/IRPassSimplify.h | 3 --- 5 files changed, 1 insertion(+), 29 deletions(-) diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 66ee7561f2cb..6b3231ce6d9c 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -143,10 +143,6 @@ int IRWriter::AddConstantFloat(float value) { return AddConstant(val); } -void IRWriter::Simplify() { - SimplifyInPlace(&insts_[0], (int)insts_.size(), constPool_.data()); -} - const char *GetGPRName(int r) { if (r < 32) { return currentDebugMIPS->GetRegName(0, r); diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index a3739898b893..77d71ed91534 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -270,8 +270,6 @@ class IRWriter { constPool_.clear(); } - void Simplify(); - const std::vector &GetInstructions() const { return insts_; } const std::vector &GetConstants() const { return constPool_; } diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index a7223bcc317a..76b58161d3e2 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -245,8 +245,6 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { } } - ir.Simplify(); - IRWriter simplified; IRWriter *code = &ir; if (true) { diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 3285ff194d89..b296a5b3a343 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -1,24 +1,7 @@ +#include "Common/Log.h" #include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/IR/IRRegCache.h" -void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool) { - for (int i = 0; i < count; i++) { - switch (inst[i].op) { - case IROp::AddConst: - if (constPool[inst[i].src2] == 0) - inst[i].op = IROp::Mov; - else if (inst[i].src1 == 0) { - inst[i].op = IROp::SetConst; - inst[i].src1 = inst[i].src2; - } - break; - default: - break; - } - } -} - - u32 Evaluate(u32 a, u32 b, IROp op) { switch (op) { case IROp::Add: case IROp::AddConst: return a + b; diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index efba1749eb97..5bf3f53fb9eb 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -2,9 +2,6 @@ #include "Core/MIPS/IR/IRInst.h" -// Dumb example of a simplification pass that can't add or remove instructions. -void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); - typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out); bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out); From ccb8f8d77e5fef95b2cde188b1f303d40ac466af Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 9 May 2016 00:36:38 -0700 Subject: [PATCH 36/77] jit-ir: Fix replacement hooks. --- Core/MIPS/IR/IRInterpreter.cpp | 2 +- Core/MIPS/IR/IRJit.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 9b0c9eb1eb6b..03b06c77e129 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -384,7 +384,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); int cycles = f->replaceFunc(); mips->downcount -= cycles; - return mips->r[MIPS_REG_RA]; + break; } case IROp::Break: diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 76b58161d3e2..e5bddd6c8984 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -340,6 +340,8 @@ void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); } else { ApplyRoundingMode(); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + ir.Write(IROp::ExitToReg, MIPS_REG_RA, 0, 0); js.compiling = false; } } else { From eb6551d72a71288477abdef92a7aebf7ba5dba59 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 9 May 2016 01:10:04 -0700 Subject: [PATCH 37/77] jit-ir: Correct downcount handling. Oops, was wrong - already accounted for delay slots. Clear so we don't double count when emitting a syscall. Fixes FF4 utility msg flickering. --- Core/MIPS/IR/IRCompBranch.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index e2d6c99c8523..acfdfaffe2e1 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -66,8 +66,9 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); - int dcAmount = js.downcountAmount + 1; + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; MIPSGPReg lhs = rs; MIPSGPReg rhs = rt; @@ -109,8 +110,9 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); - int dcAmount = js.downcountAmount + 1; + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; MIPSGPReg lhs = rs; if (!delaySlotIsNice) { // if likely, we don't need this @@ -184,8 +186,9 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { if (!likely) CompileDelaySlot(); - int dcAmount = js.downcountAmount + 1; + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; FlushAll(); // Not taken @@ -223,8 +226,9 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { logBlocks = 1; ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC); - int dcAmount = js.downcountAmount + 1; + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. @@ -271,8 +275,9 @@ void IRJit::Comp_Jump(MIPSOpcode op) { u32 off = _IMM26 << 2; u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off; - int dcAmount = js.downcountAmount + 1; + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; // Might be a stubbed address or something? if (!Memory::IsValidAddress(targetAddr)) { @@ -320,8 +325,9 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { if (andLink && rs == rd) delaySlotIsNice = false; - int dcAmount = js.downcountAmount + 1; + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; int destReg; if (IsSyscall(delaySlotOp)) { @@ -363,18 +369,18 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { } void IRJit::Comp_Syscall(MIPSOpcode op) { - // If we're in a delay slot, this is off by one. - const int offset = js.inDelaySlot ? -1 : 0; RestoreRoundingMode(); - js.downcountAmount = -offset; - int dcAmount = js.downcountAmount + 1; + // Note: If we're in a delay slot, this is off by one compared to the interpreter. + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); + js.downcountAmount = 0; FlushAll(); ir.Write(IROp::Syscall, 0, ir.AddConstant(op.encoding)); + // TODO: This never happens because of Syscall exiting. ApplyRoundingMode(); js.compiling = false; } From f50617d67924b32c6193648cc74551d9bb93db84 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 00:37:08 +0200 Subject: [PATCH 38/77] Skip const flush on set float constant --- Core/MIPS/IR/IRPassSimplify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index b296a5b3a343..929eeed9759f 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -427,4 +427,4 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } } return logBlocks; -} \ No newline at end of file +} From 6e44e97ffa48ffa8fbe0e4896e6696fb0851fb7d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 19:25:51 +0200 Subject: [PATCH 39/77] Refactor prep: Split JitInterface into MIPSFrontendInterface and JitInterface --- Core/HLE/ReplaceTables.cpp | 2 +- Core/MIPS/ARM/ArmJit.h | 2 +- Core/MIPS/ARM64/Arm64Jit.h | 2 +- Core/MIPS/IR/IRJit.cpp | 4 ++- Core/MIPS/IR/IRJit.h | 2 +- Core/MIPS/IR/IRPassSimplify.cpp | 4 +++ Core/MIPS/JitCommon/JitCommon.h | 46 +++++++++++++++++++-------------- Core/MIPS/MIPS/MipsJit.h | 3 ++- Core/MIPS/MIPSTables.cpp | 5 ++-- Core/MIPS/MIPSTables.h | 4 +-- Core/MIPS/x86/Jit.h | 2 +- 11 files changed, 44 insertions(+), 32 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 06b5392d9d51..47a8b2d2ad6e 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -1109,7 +1109,7 @@ static int Hook_omertachinmokunookitethelegacy_download_frame() { return 0; } -#define JITFUNC(f) (&MIPSComp::JitInterface::f) +#define JITFUNC(f) (&MIPSComp::MIPSFrontendInterface::f) // Can either replace with C functions or functions emitted in Asm/ArmAsm. static const ReplacementTableEntry entries[] = { diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index efdde624bf36..7ec62e04c87a 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -33,7 +33,7 @@ namespace MIPSComp { -class ArmJit : public ArmGen::ARMXCodeBlock, public JitInterface { +class ArmJit : public ArmGen::ARMXCodeBlock, public JitInterface, public MIPSFrontendInterface { public: ArmJit(MIPSState *mips); virtual ~ArmJit(); diff --git a/Core/MIPS/ARM64/Arm64Jit.h b/Core/MIPS/ARM64/Arm64Jit.h index e341df3e7989..4aec5feccacb 100644 --- a/Core/MIPS/ARM64/Arm64Jit.h +++ b/Core/MIPS/ARM64/Arm64Jit.h @@ -33,7 +33,7 @@ namespace MIPSComp { -class Arm64Jit : public Arm64Gen::ARM64CodeBlock, public JitInterface { +class Arm64Jit : public Arm64Gen::ARM64CodeBlock, public JitInterface, public MIPSFrontendInterface { public: Arm64Jit(MIPSState *mips); virtual ~Arm64Jit(); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index e5bddd6c8984..ee5f5c8206b9 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -254,6 +254,8 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) logBlocks = 1; code = &simplified; + if (ir.GetInstructions().size() >= 24) + logBlocks = 1; } b->SetInstructions(code->GetInstructions(), code->GetConstants()); @@ -408,4 +410,4 @@ MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) { return b->GetOriginalFirstOp(); } -} // namespace MIPSComp \ No newline at end of file +} // namespace MIPSComp diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index e9881f6a6e0b..76a27d1a4bef 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -101,7 +101,7 @@ class IRBlockCache { std::vector blocks_; }; -class IRJit : public JitInterface { +class IRJit : public JitInterface, public MIPSFrontendInterface{ public: IRJit(MIPSState *mips); virtual ~IRJit(); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 929eeed9759f..0e5353ff5717 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -121,6 +121,10 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::And: case IROp::Or: case IROp::Xor: + // Regularize, for the add/or check below. + if (symmetric && inst.src2 == inst.dest && inst.src1 != inst.src2) { + std::swap(inst.src1, inst.src2); + } if (gpr.IsImm(inst.src1) && gpr.IsImm(inst.src2)) { gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), gpr.GetImm(inst.src2), inst.op)); } else if (gpr.IsImm(inst.src2)) { diff --git a/Core/MIPS/JitCommon/JitCommon.h b/Core/MIPS/JitCommon/JitCommon.h index e27707ea3558..62a91a30c209 100644 --- a/Core/MIPS/JitCommon/JitCommon.h +++ b/Core/MIPS/JitCommon/JitCommon.h @@ -42,27 +42,11 @@ class MIPSState; namespace MIPSComp { void JitAt(); - class JitInterface { + class MIPSFrontendInterface { public: - virtual ~JitInterface() {} + virtual ~MIPSFrontendInterface() {} - virtual bool DescribeCodePtr(const u8 *ptr, std::string &name) = 0; - virtual const u8 *GetDispatcher() const = 0; - virtual JitBlockCache *GetBlockCache() = 0; - virtual void InvalidateCache() = 0; - virtual void InvalidateCacheAt(u32 em_address, int length = 4) = 0; - virtual void DoState(PointerWrap &p) = 0; - virtual void DoDummyState(PointerWrap &p) = 0; - virtual void RunLoopUntil(u64 globalticks) = 0; - virtual void Compile(u32 em_address) = 0; - virtual void ClearCache() = 0; virtual void EatPrefix() = 0; - virtual MIPSOpcode GetOriginalOp(MIPSOpcode op) = 0; - - // Block linking. This may need to work differently for whole-function JITs and stuff - // like that. - virtual void LinkBlock(u8 *exitPoint, const u8 *entryPoint) = 0; - virtual void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) = 0; virtual void Comp_Generic(MIPSOpcode op) = 0; virtual void Comp_RunBlock(MIPSOpcode op) = 0; @@ -132,8 +116,30 @@ namespace MIPSComp { virtual int Replace_fabsf() = 0; }; - typedef void (JitInterface::*MIPSCompileFunc)(MIPSOpcode opcode); - typedef int (JitInterface::*MIPSReplaceFunc)(); + class JitInterface { + public: + virtual ~JitInterface() {} + + virtual bool DescribeCodePtr(const u8 *ptr, std::string &name) = 0; + virtual const u8 *GetDispatcher() const = 0; + virtual JitBlockCache *GetBlockCache() = 0; + virtual void InvalidateCache() = 0; + virtual void InvalidateCacheAt(u32 em_address, int length = 4) = 0; + virtual void DoState(PointerWrap &p) = 0; + virtual void DoDummyState(PointerWrap &p) = 0; + virtual void RunLoopUntil(u64 globalticks) = 0; + virtual void Compile(u32 em_address) = 0; + virtual void ClearCache() = 0; + virtual MIPSOpcode GetOriginalOp(MIPSOpcode op) = 0; + + // Block linking. This may need to work differently for whole-function JITs and stuff + // like that. + virtual void LinkBlock(u8 *exitPoint, const u8 *entryPoint) = 0; + virtual void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) = 0; + }; + + typedef void (MIPSFrontendInterface::*MIPSCompileFunc)(MIPSOpcode opcode); + typedef int (MIPSFrontendInterface::*MIPSReplaceFunc)(); extern JitInterface *jit; diff --git a/Core/MIPS/MIPS/MipsJit.h b/Core/MIPS/MIPS/MipsJit.h index 1fcb6faea6fd..8c5dbf9bd380 100644 --- a/Core/MIPS/MIPS/MipsJit.h +++ b/Core/MIPS/MIPS/MipsJit.h @@ -20,6 +20,7 @@ #include "Common/MipsEmitter.h" using namespace MIPSGen; +#include "Core/MIPS/JitCommon/JitCommon.h" #include "Core/MIPS/JitCommon/JitState.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" #include "../MIPSVFPUUtils.h" @@ -31,7 +32,7 @@ using namespace MIPSGen; namespace MIPSComp { -class MipsJit : public MIPSGen::MIPSCodeBlock, public JitInterface +class MipsJit : public MIPSGen::MIPSCodeBlock, public JitInterface, public MIPSFrontendInterface { public: MipsJit(MIPSState *mips); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index f0b51db0c0df..76e260b377fb 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -82,7 +82,7 @@ struct MIPSInstruction { #define ENCODING(a) {a} #define INSTR(name, comp, dis, inter, flags) {Instruc, name, comp, dis, inter, MIPSInfo(flags)} -#define JITFUNC(f) (&JitInterface::f) +#define JITFUNC(f) (&MIPSFrontendInterface::f) using namespace MIPSDis; using namespace MIPSInt; @@ -912,7 +912,7 @@ const MIPSInstruction *MIPSGetInstruction(MIPSOpcode op) { return instr; } -void MIPSCompileOp(MIPSOpcode op, MIPSComp::JitInterface *jit) { +void MIPSCompileOp(MIPSOpcode op, MIPSComp::MIPSFrontendInterface *jit) { if (op == 0) return; const MIPSInstruction *instr = MIPSGetInstruction(op); @@ -923,7 +923,6 @@ void MIPSCompileOp(MIPSOpcode op, MIPSComp::JitInterface *jit) { } else { ERROR_LOG_REPORT(CPU,"MIPSCompileOp %08x failed",op.encoding); } - if (info & OUT_EAT_PREFIX) jit->EatPrefix(); } else { diff --git a/Core/MIPS/MIPSTables.h b/Core/MIPS/MIPSTables.h index 39c8c06753d5..3987aa3e87f7 100644 --- a/Core/MIPS/MIPSTables.h +++ b/Core/MIPS/MIPSTables.h @@ -112,10 +112,10 @@ typedef void (CDECL *MIPSDisFunc)(MIPSOpcode opcode, char *out); typedef void (CDECL *MIPSInterpretFunc)(MIPSOpcode opcode); namespace MIPSComp { - class JitInterface; + class MIPSFrontendInterface; } -void MIPSCompileOp(MIPSOpcode op, MIPSComp::JitInterface *jit); +void MIPSCompileOp(MIPSOpcode op, MIPSComp::MIPSFrontendInterface *jit); void MIPSDisAsm(MIPSOpcode op, u32 pc, char *out, bool tabsToSpaces = false); MIPSInfo MIPSGetInfo(MIPSOpcode op); void MIPSInterpret(MIPSOpcode op); //only for those rare ones diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index a6f44443311a..4206f4d307a5 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -46,7 +46,7 @@ struct RegCacheState { FPURegCacheState fpr; }; -class Jit : public Gen::XCodeBlock, public JitInterface { +class Jit : public Gen::XCodeBlock, public JitInterface, public MIPSFrontendInterface { public: Jit(MIPSState *mips); virtual ~Jit(); From e806c369b2f3b18ea6c8095ddbfe920fc4427b20 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 19:57:18 +0200 Subject: [PATCH 40/77] Separate the IR frontend from the IR "Jit" --- Core/MIPS/IR/IRCompALU.cpp | 20 +++--- Core/MIPS/IR/IRCompBranch.cpp | 25 ++++--- Core/MIPS/IR/IRCompFPU.cpp | 10 +-- Core/MIPS/IR/IRCompLoadStore.cpp | 6 +- Core/MIPS/IR/IRCompVFPU.cpp | 82 +++++++++++----------- Core/MIPS/IR/IRJit.cpp | 86 +++++++++++++----------- Core/MIPS/IR/IRJit.h | 112 +++++++++++++------------------ 7 files changed, 162 insertions(+), 179 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 4fbec417ba71..6500d1b0df11 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -45,7 +45,7 @@ using namespace MIPSAnalyst; namespace MIPSComp { -void IRJit::Comp_IType(MIPSOpcode op) { +void IRFrontend::Comp_IType(MIPSOpcode op) { CONDITIONAL_DISABLE; s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension @@ -87,7 +87,7 @@ void IRJit::Comp_IType(MIPSOpcode op) { } } -void IRJit::Comp_RType2(MIPSOpcode op) { +void IRFrontend::Comp_RType2(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rs = _RS; @@ -110,7 +110,7 @@ void IRJit::Comp_RType2(MIPSOpcode op) { } } -void IRJit::Comp_RType3(MIPSOpcode op) { +void IRFrontend::Comp_RType3(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rt = _RT; @@ -182,13 +182,13 @@ void IRJit::Comp_RType3(MIPSOpcode op) { } } -void IRJit::CompShiftImm(MIPSOpcode op, IROp shiftOpImm, int sa) { +void IRFrontend::CompShiftImm(MIPSOpcode op, IROp shiftOpImm, int sa) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; ir.Write(shiftOpImm, rd, rt, sa); } -void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpImm) { +void IRFrontend::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpImm) { MIPSGPReg rd = _RD; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; @@ -196,7 +196,7 @@ void IRJit::CompShiftVar(MIPSOpcode op, IROp shiftOp, IROp shiftOpImm) { ir.Write(shiftOp, rd, rt, IRTEMP_0); } -void IRJit::Comp_ShiftType(MIPSOpcode op) { +void IRFrontend::Comp_ShiftType(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rs = _RS; MIPSGPReg rd = _RD; @@ -221,7 +221,7 @@ void IRJit::Comp_ShiftType(MIPSOpcode op) { } } -void IRJit::Comp_Special3(MIPSOpcode op) { +void IRFrontend::Comp_Special3(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rs = _RS; MIPSGPReg rt = _RT; @@ -260,7 +260,7 @@ void IRJit::Comp_Special3(MIPSOpcode op) { } -void IRJit::Comp_Allegrex(MIPSOpcode op) { +void IRFrontend::Comp_Allegrex(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rt = _RT; MIPSGPReg rd = _RD; @@ -284,7 +284,7 @@ void IRJit::Comp_Allegrex(MIPSOpcode op) { } } -void IRJit::Comp_Allegrex2(MIPSOpcode op) { +void IRFrontend::Comp_Allegrex2(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rt = _RT; MIPSGPReg rd = _RD; @@ -305,7 +305,7 @@ void IRJit::Comp_Allegrex2(MIPSOpcode op) { } } -void IRJit::Comp_MulDivType(MIPSOpcode op) { +void IRFrontend::Comp_MulDivType(MIPSOpcode op) { CONDITIONAL_DISABLE; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index acfdfaffe2e1..3dda003b562d 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -53,7 +53,7 @@ namespace MIPSComp { using namespace Arm64Gen; -void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { +void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -98,7 +98,7 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { js.compiling = false; } -void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely) { +void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -135,7 +135,7 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool js.compiling = false; } -void IRJit::Comp_RelBranch(MIPSOpcode op) { +void IRFrontend::Comp_RelBranch(MIPSOpcode op) { // The CC flags here should be opposite of the actual branch becuase they skip the branching action. switch (op >> 26) { case 4: BranchRSRTComp(op, IRComparison::NotEqual, false); break;//beq @@ -156,7 +156,7 @@ void IRJit::Comp_RelBranch(MIPSOpcode op) { } } -void IRJit::Comp_RelBranchRI(MIPSOpcode op) { +void IRFrontend::Comp_RelBranchRI(MIPSOpcode op) { switch ((op >> 16) & 0x1F) { case 0: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz case 1: BranchRSZeroComp(op, IRComparison::Less, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez @@ -173,7 +173,7 @@ void IRJit::Comp_RelBranchRI(MIPSOpcode op) { } // If likely is set, discard the branch slot if NOT taken. -void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { +void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -201,7 +201,7 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { js.compiling = false; } -void IRJit::Comp_FPUBranch(MIPSOpcode op) { +void IRFrontend::Comp_FPUBranch(MIPSOpcode op) { switch((op >> 16) & 0x1f) { case 0: BranchFPFlag(op, IRComparison::NotEqual, false); break; // bc1f case 1: BranchFPFlag(op, IRComparison::Equal, false); break; // bc1t @@ -214,7 +214,7 @@ void IRJit::Comp_FPUBranch(MIPSOpcode op) { } // If likely is set, discard the branch slot if NOT taken. -void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { +void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -223,7 +223,6 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 targetAddr = GetCompilerPC() + offset + 4; MIPSOpcode delaySlotOp = GetOffsetInstruction(1); - logBlocks = 1; ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC); int dcAmount = js.downcountAmount; @@ -257,7 +256,7 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { js.compiling = false; } -void IRJit::Comp_VBranch(MIPSOpcode op) { +void IRFrontend::Comp_VBranch(MIPSOpcode op) { switch ((op >> 16) & 3) { case 0: BranchVFPUFlag(op, IRComparison::NotEqual, false); break; // bvf case 1: BranchVFPUFlag(op, IRComparison::Equal, false); break; // bvt @@ -266,7 +265,7 @@ void IRJit::Comp_VBranch(MIPSOpcode op) { } } -void IRJit::Comp_Jump(MIPSOpcode op) { +void IRFrontend::Comp_Jump(MIPSOpcode op) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -311,7 +310,7 @@ void IRJit::Comp_Jump(MIPSOpcode op) { js.compiling = false; } -void IRJit::Comp_JumpReg(MIPSOpcode op) { +void IRFrontend::Comp_JumpReg(MIPSOpcode op) { if (js.inDelaySlot) { ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart); return; @@ -368,7 +367,7 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { js.compiling = false; } -void IRJit::Comp_Syscall(MIPSOpcode op) { +void IRFrontend::Comp_Syscall(MIPSOpcode op) { RestoreRoundingMode(); // Note: If we're in a delay slot, this is off by one compared to the interpreter. @@ -385,7 +384,7 @@ void IRJit::Comp_Syscall(MIPSOpcode op) { js.compiling = false; } -void IRJit::Comp_Break(MIPSOpcode op) { +void IRFrontend::Comp_Break(MIPSOpcode op) { ir.Write(IROp::Break); js.compiling = false; } diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index b0ff42cf261c..1ca4a08e96ac 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -54,7 +54,7 @@ namespace MIPSComp { -void IRJit::Comp_FPU3op(MIPSOpcode op) { +void IRFrontend::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; int ft = _FT; @@ -72,7 +72,7 @@ void IRJit::Comp_FPU3op(MIPSOpcode op) { } } -void IRJit::Comp_FPULS(MIPSOpcode op) { +void IRFrontend::Comp_FPULS(MIPSOpcode op) { CONDITIONAL_DISABLE; s32 offset = _IMM16; int ft = _FT; @@ -93,7 +93,7 @@ void IRJit::Comp_FPULS(MIPSOpcode op) { } } -void IRJit::Comp_FPUComp(MIPSOpcode op) { +void IRFrontend::Comp_FPUComp(MIPSOpcode op) { DISABLE; // IROps not yet implemented int opc = op & 0xF; @@ -136,7 +136,7 @@ void IRJit::Comp_FPUComp(MIPSOpcode op) { ir.Write(irOp, fs, ft); } -void IRJit::Comp_FPU2op(MIPSOpcode op) { +void IRFrontend::Comp_FPU2op(MIPSOpcode op) { CONDITIONAL_DISABLE; int fs = _FS; @@ -192,7 +192,7 @@ void IRJit::Comp_FPU2op(MIPSOpcode op) { } } -void IRJit::Comp_mxc1(MIPSOpcode op) { +void IRFrontend::Comp_mxc1(MIPSOpcode op) { CONDITIONAL_DISABLE; int fs = _FS; diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index 4e702a544f2a..41c76a1a7d83 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -65,11 +65,11 @@ #define DISABLE { Comp_Generic(op); return; } namespace MIPSComp { - void IRJit::Comp_ITypeMemLR(MIPSOpcode op, bool load) { + void IRFrontend::Comp_ITypeMemLR(MIPSOpcode op, bool load) { DISABLE; } - void IRJit::Comp_ITypeMem(MIPSOpcode op) { + void IRFrontend::Comp_ITypeMem(MIPSOpcode op) { CONDITIONAL_DISABLE; int offset = (signed short)(op & 0xFFFF); @@ -124,7 +124,7 @@ namespace MIPSComp { } } - void IRJit::Comp_Cache(MIPSOpcode op) { + void IRFrontend::Comp_Cache(MIPSOpcode op) { // int imm = (s16)(op & 0xFFFF); // int rs = _RS; // int addr = R(rs) + imm; diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 1f2623ac67e3..e6f5ca3a8757 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -51,7 +51,7 @@ namespace MIPSComp { - void IRJit::Comp_VPFX(MIPSOpcode op) { + void IRFrontend::Comp_VPFX(MIPSOpcode op) { CONDITIONAL_DISABLE; int data = op & 0xFFFFF; int regnum = (op >> 24) & 3; @@ -74,7 +74,7 @@ namespace MIPSComp { } } - void IRJit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { + void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; @@ -128,7 +128,7 @@ namespace MIPSComp { } } - void IRJit::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) { + void IRFrontend::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) { _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); GetVectorRegs(regs, sz, vectorReg); @@ -143,7 +143,7 @@ namespace MIPSComp { } } - void IRJit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { + void IRFrontend::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); if (!js.prefixD) return; @@ -176,11 +176,11 @@ namespace MIPSComp { */ } - void IRJit::Comp_SV(MIPSOpcode op) { + void IRFrontend::Comp_SV(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_SVQ(MIPSOpcode op) { + void IRFrontend::Comp_SVQ(MIPSOpcode op) { int imm = (signed short)(op & 0xFFFC); int vt = (((op >> 16) & 0x1f)) | ((op & 1) << 5); MIPSGPReg rs = _RS; @@ -215,37 +215,37 @@ namespace MIPSComp { } } - void IRJit::Comp_VVectorInit(MIPSOpcode op) { + void IRFrontend::Comp_VVectorInit(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VIdt(MIPSOpcode op) { + void IRFrontend::Comp_VIdt(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VMatrixInit(MIPSOpcode op) { + void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VHdp(MIPSOpcode op) { + void IRFrontend::Comp_VHdp(MIPSOpcode op) { DISABLE; } static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f }; - void IRJit::Comp_Vhoriz(MIPSOpcode op) { + void IRFrontend::Comp_Vhoriz(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VDot(MIPSOpcode op) { + void IRFrontend::Comp_VDot(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VecDo3(MIPSOpcode op) { + void IRFrontend::Comp_VecDo3(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VV2Op(MIPSOpcode op) { + void IRFrontend::Comp_VV2Op(MIPSOpcode op) { CONDITIONAL_DISABLE; // Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) { @@ -254,19 +254,19 @@ namespace MIPSComp { DISABLE; } - void IRJit::Comp_Vi2f(MIPSOpcode op) { + void IRFrontend::Comp_Vi2f(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vh2f(MIPSOpcode op) { + void IRFrontend::Comp_Vh2f(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vf2i(MIPSOpcode op) { + void IRFrontend::Comp_Vf2i(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Mftv(MIPSOpcode op) { + void IRFrontend::Comp_Mftv(MIPSOpcode op) { int imm = op & 0xFF; MIPSGPReg rt = _RT; switch ((op >> 21) & 0x1f) { @@ -275,7 +275,6 @@ namespace MIPSComp { if (rt != 0) { if (imm < 128) { //R(rt) = VI(imm); ir.Write(IROp::VMovToGPR, rt, imm); - logBlocks = 1; } else { DISABLE; } @@ -285,7 +284,6 @@ namespace MIPSComp { case 7: // mtv if (imm < 128) { ir.Write(IROp::VMovFromGPR, imm, rt); - logBlocks = 1; } else { DISABLE; } @@ -296,93 +294,93 @@ namespace MIPSComp { } } - void IRJit::Comp_Vmfvc(MIPSOpcode op) { + void IRFrontend::Comp_Vmfvc(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vmtvc(MIPSOpcode op) { + void IRFrontend::Comp_Vmtvc(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vmmov(MIPSOpcode op) { + void IRFrontend::Comp_Vmmov(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VScl(MIPSOpcode op) { + void IRFrontend::Comp_VScl(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vmmul(MIPSOpcode op) { + void IRFrontend::Comp_Vmmul(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vmscl(MIPSOpcode op) { + void IRFrontend::Comp_Vmscl(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vtfm(MIPSOpcode op) { + void IRFrontend::Comp_Vtfm(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VCrs(MIPSOpcode op) { + void IRFrontend::Comp_VCrs(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VDet(MIPSOpcode op) { + void IRFrontend::Comp_VDet(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vi2x(MIPSOpcode op) { + void IRFrontend::Comp_Vi2x(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vx2i(MIPSOpcode op) { + void IRFrontend::Comp_Vx2i(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_VCrossQuat(MIPSOpcode op) { + void IRFrontend::Comp_VCrossQuat(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vcmp(MIPSOpcode op) { + void IRFrontend::Comp_Vcmp(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vcmov(MIPSOpcode op) { + void IRFrontend::Comp_Vcmov(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Viim(MIPSOpcode op) { + void IRFrontend::Comp_Viim(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vfim(MIPSOpcode op) { + void IRFrontend::Comp_Vfim(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vcst(MIPSOpcode op) { + void IRFrontend::Comp_Vcst(MIPSOpcode op) { DISABLE; } // Very heavily used by FF:CC. Should be replaced by a fast approximation instead of // calling the math library. - void IRJit::Comp_VRot(MIPSOpcode op) { + void IRFrontend::Comp_VRot(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vsgn(MIPSOpcode op) { + void IRFrontend::Comp_Vsgn(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vocp(MIPSOpcode op) { + void IRFrontend::Comp_Vocp(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_ColorConv(MIPSOpcode op) { + void IRFrontend::Comp_ColorConv(MIPSOpcode op) { DISABLE; } - void IRJit::Comp_Vbfy(MIPSOpcode op) { + void IRFrontend::Comp_Vbfy(MIPSOpcode op) { DISABLE; } } diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index ee5f5c8206b9..9ed4f2eb70de 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -42,11 +42,7 @@ namespace MIPSComp { -IRJit::IRJit(MIPSState *mips) : mips_(mips) { - logBlocks = 0; - dontLogBlocks = 0; - js.startDefaultPrefix = mips_->HasDefaultPrefix(); - js.currentRoundingFunc = convertS0ToSCRATCH1[0]; +IRJit::IRJit(MIPSState *mips) : mips_(mips), frontend_(mips->HasDefaultPrefix()) { u32 size = 128 * 1024; // blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); InitIR(); @@ -55,7 +51,14 @@ IRJit::IRJit(MIPSState *mips) : mips_(mips) { IRJit::~IRJit() { } -void IRJit::DoState(PointerWrap &p) { +IRFrontend::IRFrontend(bool startDefaultPrefix) { + logBlocks = 0; + dontLogBlocks = 0; + js.startDefaultPrefix = startDefaultPrefix; + // js.currentRoundingFunc = convertS0ToSCRATCH1[0]; +} + +void IRFrontend::DoState(PointerWrap &p) { auto s = p.Section("Jit", 1, 2); if (!s) return; @@ -67,10 +70,10 @@ void IRJit::DoState(PointerWrap &p) { } else { js.hasSetRounding = 1; } +} - if (p.GetMode() == PointerWrap::MODE_READ) { - js.currentRoundingFunc = convertS0ToSCRATCH1[(mips_->fcr31) & 3]; - } +void IRJit::DoState(PointerWrap &p) { + frontend_.DoState(p); } // This is here so the savestate matches between jit and non-jit. @@ -87,11 +90,11 @@ void IRJit::DoDummyState(PointerWrap &p) { } } -void IRJit::FlushAll() { +void IRFrontend::FlushAll() { FlushPrefixV(); } -void IRJit::FlushPrefixV() { +void IRFrontend::FlushPrefixV() { if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) { ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_SPREFIX, ir.AddConstant(js.prefixS)); js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY); @@ -121,7 +124,7 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) { blocks_.InvalidateICache(em_address, length); } -void IRJit::EatInstruction(MIPSOpcode op) { +void IRFrontend::EatInstruction(MIPSOpcode op) { MIPSInfo info = MIPSGetInfo(op); if (info & DELAYSLOT) { ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op."); @@ -135,23 +138,15 @@ void IRJit::EatInstruction(MIPSOpcode op) { js.downcountAmount += MIPSGetInstructionCycleEstimate(op); } -void IRJit::CompileDelaySlot() { +void IRFrontend::CompileDelaySlot() { js.inDelaySlot = true; MIPSOpcode op = GetOffsetInstruction(1); MIPSCompileOp(op, this); js.inDelaySlot = false; } -void IRJit::Compile(u32 em_address) { - PROFILE_THIS_SCOPE("jitc"); - - int block_num = blocks_.AllocateBlock(em_address); - IRBlock *b = blocks_.GetBlock(block_num); - DoJit(em_address, b); - b->Finalize(block_num); // Overwrites the first instruction - +bool IRFrontend::CheckRounding() { bool cleanSlate = false; - if (js.hasSetRounding && !js.lastSetRounding) { WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); // Won't loop, since hasSetRounding is only ever set to 1. @@ -161,16 +156,27 @@ void IRJit::Compile(u32 em_address) { // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); + WARN_LOG(JIT, "An uneaten prefix at end of block"); js.LogPrefix(); // Let's try that one more time. We won't get back here because we toggled the value. js.startDefaultPrefix = false; - // TODO ARM64: This crashes. - //cleanSlate = true; + // TODO: Make sure this works. + // cleanSlate = true; } - if (cleanSlate) { + return cleanSlate; +} + +void IRJit::Compile(u32 em_address) { + PROFILE_THIS_SCOPE("jitc"); + + int block_num = blocks_.AllocateBlock(em_address); + IRBlock *b = blocks_.GetBlock(block_num); + frontend_.DoJit(em_address, b); + b->Finalize(block_num); // Overwrites the first instruction + + if (frontend_.CheckRounding()) { // Our assumptions are all wrong so it's clean-slate time. ClearCache(); Compile(em_address); @@ -208,18 +214,18 @@ void IRJit::RunLoopUntil(u64 globalticks) { // RestoreRoundingMode(true); } -u32 IRJit::GetCompilerPC() { +u32 IRFrontend::GetCompilerPC() { return js.compilerPC; } -MIPSOpcode IRJit::GetOffsetInstruction(int offset) { +MIPSOpcode IRFrontend::GetOffsetInstruction(int offset) { return Memory::Read_Instruction(GetCompilerPC() + 4 * offset); } -void IRJit::DoJit(u32 em_address, IRBlock *b) { +void IRFrontend::DoJit(u32 em_address, IRBlock *b) { js.cancel = false; - js.blockStart = mips_->pc; - js.compilerPC = mips_->pc; + js.blockStart = em_address; + js.compilerPC = em_address; js.lastContinuedPC = 0; js.initialBlockSize = 0; js.nextExit = 0; @@ -262,7 +268,7 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { if (logBlocks > 0 && dontLogBlocks == 0) { char temp2[256]; - ILOG("=============== mips %d %08x ===============", blocks_.GetNumBlocks(), em_address); + ILOG("=============== mips %08x ===============", em_address); for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { temp2[0] = 0; MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); @@ -301,7 +307,7 @@ bool IRJit::DescribeCodePtr(const u8 *ptr, std::string &name) { return false; } -void IRJit::Comp_RunBlock(MIPSOpcode op) { +void IRFrontend::Comp_RunBlock(MIPSOpcode op) { // This shouldn't be necessary, the dispatcher should catch us before we get here. ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); } @@ -319,7 +325,7 @@ bool IRJit::ReplaceJalTo(u32 dest) { return false; } -void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { +void IRFrontend::Comp_ReplacementFunc(MIPSOpcode op) { int index = op.encoding & MIPS_EMUHACK_VALUE_MASK; const ReplacementTableEntry *entry = GetReplacementFunc(index); @@ -351,7 +357,7 @@ void IRJit::Comp_ReplacementFunc(MIPSOpcode op) { } } -void IRJit::Comp_Generic(MIPSOpcode op) { +void IRFrontend::Comp_Generic(MIPSOpcode op) { FlushAll(); ir.Write(IROp::Interpret, 0, ir.AddConstant(op.encoding)); const MIPSInfo info = MIPSGetInfo(op); @@ -363,7 +369,7 @@ void IRJit::Comp_Generic(MIPSOpcode op) { } // Destroys SCRATCH2 -void IRJit::RestoreRoundingMode(bool force) { +void IRFrontend::RestoreRoundingMode(bool force) { // If the game has never set an interesting rounding mode, we can safely skip this. if (force || js.hasSetRounding) { ir.Write(IROp::RestoreRoundingMode); @@ -371,7 +377,7 @@ void IRJit::RestoreRoundingMode(bool force) { } // Destroys SCRATCH1 and SCRATCH2 -void IRJit::ApplyRoundingMode(bool force) { +void IRFrontend::ApplyRoundingMode(bool force) { // If the game has never set an interesting rounding mode, we can safely skip this. if (force || js.hasSetRounding) { ir.Write(IROp::ApplyRoundingMode); @@ -379,14 +385,14 @@ void IRJit::ApplyRoundingMode(bool force) { } // Destroys SCRATCH1 and SCRATCH2 -void IRJit::UpdateRoundingMode() { +void IRFrontend::UpdateRoundingMode() { ir.Write(IROp::UpdateRoundingMode); } -void IRJit::Comp_DoNothing(MIPSOpcode op) { +void IRFrontend::Comp_DoNothing(MIPSOpcode op) { } -int IRJit::Replace_fabsf() { +int IRFrontend::Replace_fabsf() { Crash(); return 0; } diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 76a27d1a4bef..3947136ea93f 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -101,28 +101,11 @@ class IRBlockCache { std::vector blocks_; }; -class IRJit : public JitInterface, public MIPSFrontendInterface{ +class IRFrontend : public MIPSFrontendInterface { public: - IRJit(MIPSState *mips); - virtual ~IRJit(); - - void DoState(PointerWrap &p) override; - void DoDummyState(PointerWrap &p) override; - - const JitOptions &GetJitOptions() { return jo; } - - // Compiled ops should ignore delay slots - // the compiler will take care of them by itself - // OR NOT + IRFrontend(bool startDefaultPrefix); void Comp_Generic(MIPSOpcode op) override; - void RunLoopUntil(u64 globalticks) override; - - void Compile(u32 em_address) override; // Compiles a block at current MIPS PC - void DoJit(u32 em_address, IRBlock *b); - - bool DescribeCodePtr(const u8 *ptr, std::string &name) override; - void Comp_RunBlock(MIPSOpcode op) override; void Comp_ReplacementFunc(MIPSOpcode op) override; @@ -195,25 +178,17 @@ class IRJit : public JitInterface, public MIPSFrontendInterface{ void Comp_Vbfy(MIPSOpcode op) override; int Replace_fabsf(); + void DoState(PointerWrap &p); + bool CheckRounding(); // returns true if we need a do-over + void DoJit(u32 em_address, IRBlock *b); - // Not using a regular block cache. - JitBlockCache *GetBlockCache() override { return nullptr; } - MIPSOpcode GetOriginalOp(MIPSOpcode op) override; - - void ClearCache(); - void InvalidateCache(); - void InvalidateCacheAt(u32 em_address, int length = 4); +private: + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void UpdateRoundingMode(); void EatPrefix() { js.EatPrefix(); } - const u8 *GetDispatcher() const override { - return dispatcher; - } - - void LinkBlock(u8 *exitPoint, const u8 *checkedEntry) override; - void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) override; - -private: void FlushAll(); void FlushPrefixV(); @@ -222,12 +197,6 @@ class IRJit : public JitInterface, public MIPSFrontendInterface{ void EatInstruction(MIPSOpcode op); MIPSOpcode GetOffsetInstruction(int offset); - void RestoreRoundingMode(bool force = false); - void ApplyRoundingMode(bool force = false); - void UpdateRoundingMode(); - - bool ReplaceJalTo(u32 dest); - // Utility compilation functions void BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely); void BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely); @@ -255,44 +224,55 @@ class IRJit : public JitInterface, public MIPSFrontendInterface{ // Utils void Comp_ITypeMemLR(MIPSOpcode op, bool load); - JitOptions jo; + // State JitState js; - - IRBlockCache blocks_; - - MIPSState *mips_; + IRWriter ir; int dontLogBlocks; int logBlocks; +}; - IRWriter ir; +class IRJit : public JitInterface { +public: + IRJit(MIPSState *mips); + virtual ~IRJit(); - // where to write branch-likely trampolines. not used atm - // u32 blTrampolines_; - // int blTrampolineCount_; + void DoState(PointerWrap &p) override; + void DoDummyState(PointerWrap &p) override; -public: - // Code pointers - const u8 *enterDispatcher; + const JitOptions &GetJitOptions() { return jo; } - const u8 *outerLoop; - const u8 *outerLoopPCInSCRATCH1; - const u8 *dispatcherCheckCoreState; - const u8 *dispatcherPCInSCRATCH1; - const u8 *dispatcher; - const u8 *dispatcherNoCheck; + void RunLoopUntil(u64 globalticks) override; - const u8 *breakpointBailout; + void Compile(u32 em_address) override; // Compiles a block at current MIPS PC - const u8 *saveStaticRegisters; - const u8 *loadStaticRegisters; + bool DescribeCodePtr(const u8 *ptr, std::string &name) override; + // Not using a regular block cache. + JitBlockCache *GetBlockCache() override { return nullptr; } + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; + + void ClearCache(); + void InvalidateCache(); + void InvalidateCacheAt(u32 em_address, int length = 4); + + const u8 *GetDispatcher() const override { return nullptr; } + + void LinkBlock(u8 *exitPoint, const u8 *checkedEntry) override; + void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) override; + +private: + bool ReplaceJalTo(u32 dest); + + JitOptions jo; + + IRFrontend frontend_; + IRBlockCache blocks_; - const u8 *restoreRoundingMode; - const u8 *applyRoundingMode; - const u8 *updateRoundingMode; + MIPSState *mips_; - // Indexed by FPCR FZ:RN bits for convenience. Uses SCRATCH2. - const u8 *convertS0ToSCRATCH1[8]; + // where to write branch-likely trampolines. not used atm + // u32 blTrampolines_; + // int blTrampolineCount_; }; } // namespace MIPSComp From e711a47a7526bc2e3324be6e4158e5d2caa8fde7 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 20:05:06 +0200 Subject: [PATCH 41/77] Complete the separation of the IR compiler frontend from the "Jit" --- CMakeLists.txt | 2 + Core/Core.vcxproj | 2 + Core/Core.vcxproj.filters | 6 + Core/MIPS/IR/IRFrontend.cpp | 288 ++++++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRFrontend.h | 144 ++++++++++++++++++ Core/MIPS/IR/IRJit.cpp | 253 ------------------------------- Core/MIPS/IR/IRJit.h | 133 +---------------- android/jni/Android.mk | 1 + 8 files changed, 444 insertions(+), 385 deletions(-) create mode 100644 Core/MIPS/IR/IRFrontend.cpp create mode 100644 Core/MIPS/IR/IRFrontend.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 655d8f7e1306..7ce5de115b88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1084,6 +1084,8 @@ set(CoreExtra ${CoreExtra} Core/MIPS/IR/IRCompFPU.cpp Core/MIPS/IR/IRCompLoadStore.cpp Core/MIPS/IR/IRCompVFPU.cpp + Core/MIPS/IR/IRFrontend.cpp + Core/MIPS/IR/IRFrontend.h Core/MIPS/IR/IRInst.cpp Core/MIPS/IR/IRInst.h Core/MIPS/IR/IRInterpreter.cpp diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index 561d83d2d36e..60d9c7c66c21 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -187,6 +187,7 @@ + @@ -518,6 +519,7 @@ + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 0fc92ec2fad1..600070d1e8d4 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -670,6 +670,9 @@ MIPS\IR + + MIPS\IR + @@ -1230,6 +1233,9 @@ MIPS\IR + + MIPS\IR + diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp new file mode 100644 index 000000000000..94a7ebe1056d --- /dev/null +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -0,0 +1,288 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/logging.h" + +#include "Common/ChunkFile.h" +#include "Core/Reporting.h" +#include "Core/MemMap.h" + +#include "Core/MIPS/MIPSTables.h" +#include "Core/HLE/ReplaceTables.h" + +#include "Core/MIPS/IR/IRFrontend.h" +#include "Core/MIPS/IR/IRRegCache.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRPassSimplify.h" +#include "Core/MIPS/IR/IRInterpreter.h" + +namespace MIPSComp { + +IRFrontend::IRFrontend(bool startDefaultPrefix) { + logBlocks = 0; + dontLogBlocks = 0; + js.startDefaultPrefix = startDefaultPrefix; + // js.currentRoundingFunc = convertS0ToSCRATCH1[0]; +} + +void IRFrontend::DoState(PointerWrap &p) { + auto s = p.Section("Jit", 1, 2); + if (!s) + return; + + p.Do(js.startDefaultPrefix); + if (s >= 2) { + p.Do(js.hasSetRounding); + js.lastSetRounding = 0; + } else { + js.hasSetRounding = 1; + } +} + +void IRFrontend::FlushAll() { + FlushPrefixV(); +} + +void IRFrontend::FlushPrefixV() { + if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) { + ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_SPREFIX, ir.AddConstant(js.prefixS)); + js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) { + ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_TPREFIX, ir.AddConstant(js.prefixT)); + js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) { + ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_DPREFIX, ir.AddConstant(js.prefixD)); + js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY); + } +} + +void IRFrontend::EatInstruction(MIPSOpcode op) { + MIPSInfo info = MIPSGetInfo(op); + if (info & DELAYSLOT) { + ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op."); + } + if (js.inDelaySlot) { + ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot."); + } + + js.numInstructions++; + js.compilerPC += 4; + js.downcountAmount += MIPSGetInstructionCycleEstimate(op); +} + +void IRFrontend::CompileDelaySlot() { + js.inDelaySlot = true; + MIPSOpcode op = GetOffsetInstruction(1); + MIPSCompileOp(op, this); + js.inDelaySlot = false; +} + +bool IRFrontend::CheckRounding() { + bool cleanSlate = false; + if (js.hasSetRounding && !js.lastSetRounding) { + WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); + // Won't loop, since hasSetRounding is only ever set to 1. + js.lastSetRounding = js.hasSetRounding; + cleanSlate = true; + } + + // Drat. The VFPU hit an uneaten prefix at the end of a block. + if (js.startDefaultPrefix && js.MayHavePrefix()) { + WARN_LOG(JIT, "An uneaten prefix at end of block"); + js.LogPrefix(); + + // Let's try that one more time. We won't get back here because we toggled the value. + js.startDefaultPrefix = false; + // TODO: Make sure this works. + // cleanSlate = true; + } + + return cleanSlate; +} + + +void IRFrontend::Comp_ReplacementFunc(MIPSOpcode op) { + int index = op.encoding & MIPS_EMUHACK_VALUE_MASK; + + const ReplacementTableEntry *entry = GetReplacementFunc(index); + if (!entry) { + ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding); + return; + } + + if (entry->flags & REPFLAG_DISABLED) { + MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); + } else if (entry->replaceFunc) { + FlushAll(); + RestoreRoundingMode(); + ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC())); + ir.Write(IROp::CallReplacement, 0, ir.AddConstant(index)); + + if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { + // Compile the original instruction at this address. We ignore cycles for hooks. + ApplyRoundingMode(); + MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); + } else { + ApplyRoundingMode(); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + ir.Write(IROp::ExitToReg, MIPS_REG_RA, 0, 0); + js.compiling = false; + } + } else { + ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name); + } +} + +void IRFrontend::Comp_Generic(MIPSOpcode op) { + FlushAll(); + ir.Write(IROp::Interpret, 0, ir.AddConstant(op.encoding)); + const MIPSInfo info = MIPSGetInfo(op); + if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) { + // If it does eat them, it'll happen in MIPSCompileOp(). + if ((info & OUT_EAT_PREFIX) == 0) + js.PrefixUnknown(); + } +} + +// Destroys SCRATCH2 +void IRFrontend::RestoreRoundingMode(bool force) { + // If the game has never set an interesting rounding mode, we can safely skip this. + if (force || js.hasSetRounding) { + ir.Write(IROp::RestoreRoundingMode); + } +} + +// Destroys SCRATCH1 and SCRATCH2 +void IRFrontend::ApplyRoundingMode(bool force) { + // If the game has never set an interesting rounding mode, we can safely skip this. + if (force || js.hasSetRounding) { + ir.Write(IROp::ApplyRoundingMode); + } +} + +// Destroys SCRATCH1 and SCRATCH2 +void IRFrontend::UpdateRoundingMode() { + ir.Write(IROp::UpdateRoundingMode); +} + +void IRFrontend::Comp_DoNothing(MIPSOpcode op) { +} + +int IRFrontend::Replace_fabsf() { + Crash(); + return 0; +} + +u32 IRFrontend::GetCompilerPC() { + return js.compilerPC; +} + +MIPSOpcode IRFrontend::GetOffsetInstruction(int offset) { + return Memory::Read_Instruction(GetCompilerPC() + 4 * offset); +} + +void IRFrontend::DoJit(u32 em_address, IRBlock *b) { + js.cancel = false; + js.blockStart = em_address; + js.compilerPC = em_address; + js.lastContinuedPC = 0; + js.initialBlockSize = 0; + js.nextExit = 0; + js.downcountAmount = 0; + js.curBlock = nullptr; + js.compiling = true; + js.inDelaySlot = false; + js.PrefixStart(); + ir.Clear(); + + js.numInstructions = 0; + while (js.compiling) { + MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC()); + js.downcountAmount += MIPSGetInstructionCycleEstimate(inst); + MIPSCompileOp(inst, this); + js.compilerPC += 4; + js.numInstructions++; + + if (ir.GetConstants().size() > 64) { + // Need to break the block + ir.Write(IROp::ExitToConst, ir.AddConstant(js.compilerPC)); + js.compiling = false; + } + } + + IRWriter simplified; + IRWriter *code = &ir; + if (true) { + static const IRPassFunc passes[] = { + &PropagateConstants, + }; + if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) + logBlocks = 1; + code = &simplified; + if (ir.GetInstructions().size() >= 24) + logBlocks = 1; + } + + b->SetInstructions(code->GetInstructions(), code->GetConstants()); + + if (logBlocks > 0 && dontLogBlocks == 0) { + char temp2[256]; + ILOG("=============== mips %08x ===============", em_address); + for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { + temp2[0] = 0; + MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); + ILOG("M: %08x %s", cpc, temp2); + } + } + + if (logBlocks > 0 && dontLogBlocks == 0) { + ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); + for (int i = 0; i < ir.GetInstructions().size(); i++) { + char buf[256]; + DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); + ILOG("%s", buf); + } + ILOG("=============== end ================="); + } + + if (logBlocks > 0 && dontLogBlocks == 0) { + ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); + for (int i = 0; i < code->GetInstructions().size(); i++) { + char buf[256]; + DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], code->GetConstants().data()); + ILOG("%s", buf); + } + ILOG("=============== end ================="); + } + + if (logBlocks > 0) + logBlocks--; + if (dontLogBlocks > 0) + dontLogBlocks--; +} + +void IRFrontend::Comp_RunBlock(MIPSOpcode op) { + // This shouldn't be necessary, the dispatcher should catch us before we get here. + ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); +} + + +} // namespace \ No newline at end of file diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h new file mode 100644 index 000000000000..c5854dffdf7e --- /dev/null +++ b/Core/MIPS/IR/IRFrontend.h @@ -0,0 +1,144 @@ +#pragma once + +#include "Common/CommonTypes.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/MIPS/MIPSVFPUUtils.h" +#include "Core/MIPS/IR/IRInst.h" + +namespace MIPSComp { + +class IRBlock; + +class IRFrontend : public MIPSFrontendInterface { +public: + IRFrontend(bool startDefaultPrefix); + void Comp_Generic(MIPSOpcode op) override; + + void Comp_RunBlock(MIPSOpcode op) override; + void Comp_ReplacementFunc(MIPSOpcode op) override; + + // Ops + void Comp_ITypeMem(MIPSOpcode op) override; + void Comp_Cache(MIPSOpcode op) override; + + void Comp_RelBranch(MIPSOpcode op) override; + void Comp_RelBranchRI(MIPSOpcode op) override; + void Comp_FPUBranch(MIPSOpcode op) override; + void Comp_FPULS(MIPSOpcode op) override; + void Comp_FPUComp(MIPSOpcode op) override; + void Comp_Jump(MIPSOpcode op) override; + void Comp_JumpReg(MIPSOpcode op) override; + void Comp_Syscall(MIPSOpcode op) override; + void Comp_Break(MIPSOpcode op) override; + + void Comp_IType(MIPSOpcode op) override; + void Comp_RType2(MIPSOpcode op) override; + void Comp_RType3(MIPSOpcode op) override; + void Comp_ShiftType(MIPSOpcode op) override; + void Comp_Allegrex(MIPSOpcode op) override; + void Comp_Allegrex2(MIPSOpcode op) override; + void Comp_VBranch(MIPSOpcode op) override; + void Comp_MulDivType(MIPSOpcode op) override; + void Comp_Special3(MIPSOpcode op) override; + + void Comp_FPU3op(MIPSOpcode op) override; + void Comp_FPU2op(MIPSOpcode op) override; + void Comp_mxc1(MIPSOpcode op) override; + + void Comp_DoNothing(MIPSOpcode op) override; + + void Comp_SV(MIPSOpcode op) override; + void Comp_SVQ(MIPSOpcode op) override; + void Comp_VPFX(MIPSOpcode op) override; + void Comp_VVectorInit(MIPSOpcode op) override; + void Comp_VMatrixInit(MIPSOpcode op) override; + void Comp_VDot(MIPSOpcode op) override; + void Comp_VecDo3(MIPSOpcode op) override; + void Comp_VV2Op(MIPSOpcode op) override; + void Comp_Mftv(MIPSOpcode op) override; + void Comp_Vmfvc(MIPSOpcode op) override; + void Comp_Vmtvc(MIPSOpcode op) override; + void Comp_Vmmov(MIPSOpcode op) override; + void Comp_VScl(MIPSOpcode op) override; + void Comp_Vmmul(MIPSOpcode op) override; + void Comp_Vmscl(MIPSOpcode op) override; + void Comp_Vtfm(MIPSOpcode op) override; + void Comp_VHdp(MIPSOpcode op) override; + void Comp_VCrs(MIPSOpcode op) override; + void Comp_VDet(MIPSOpcode op) override; + void Comp_Vi2x(MIPSOpcode op) override; + void Comp_Vx2i(MIPSOpcode op) override; + void Comp_Vf2i(MIPSOpcode op) override; + void Comp_Vi2f(MIPSOpcode op) override; + void Comp_Vh2f(MIPSOpcode op) override; + void Comp_Vcst(MIPSOpcode op) override; + void Comp_Vhoriz(MIPSOpcode op) override; + void Comp_VRot(MIPSOpcode op) override; + void Comp_VIdt(MIPSOpcode op) override; + void Comp_Vcmp(MIPSOpcode op) override; + void Comp_Vcmov(MIPSOpcode op) override; + void Comp_Viim(MIPSOpcode op) override; + void Comp_Vfim(MIPSOpcode op) override; + void Comp_VCrossQuat(MIPSOpcode op) override; + void Comp_Vsgn(MIPSOpcode op) override; + void Comp_Vocp(MIPSOpcode op) override; + void Comp_ColorConv(MIPSOpcode op) override; + void Comp_Vbfy(MIPSOpcode op) override; + + int Replace_fabsf(); + void DoState(PointerWrap &p); + bool CheckRounding(); // returns true if we need a do-over + void DoJit(u32 em_address, IRBlock *b); + +private: + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void UpdateRoundingMode(); + + void EatPrefix() { js.EatPrefix(); } + + void FlushAll(); + void FlushPrefixV(); + + u32 GetCompilerPC(); + void CompileDelaySlot(); + void EatInstruction(MIPSOpcode op); + MIPSOpcode GetOffsetInstruction(int offset); + + // Utility compilation functions + void BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely); + void BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely); + void BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely); + void BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely); + + // Utilities to reduce duplicated code + void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa); + void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst); + + void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); + void ApplyPrefixD(const u8 *vregs, VectorSize sz); + void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixS, sz); + } + void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixT, sz); + } + void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); + + // Utils + void Comp_ITypeMemLR(MIPSOpcode op, bool load); + + // State + JitState js; + IRWriter ir; + + int dontLogBlocks; + int logBlocks; +}; + +} // namespace \ No newline at end of file diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 9ed4f2eb70de..94ab72bfd98a 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -18,7 +18,6 @@ #include "base/logging.h" #include "profiler/profiler.h" #include "Common/ChunkFile.h" -#include "Common/CPUDetect.h" #include "Common/StringUtils.h" #include "Core/Reporting.h" @@ -32,7 +31,6 @@ #include "Core/MIPS/MIPSCodeUtils.h" #include "Core/MIPS/MIPSInt.h" #include "Core/MIPS/MIPSTables.h" -#include "Core/HLE/ReplaceTables.h" #include "Core/HLE/sceKernelMemory.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRJit.h" @@ -51,27 +49,6 @@ IRJit::IRJit(MIPSState *mips) : mips_(mips), frontend_(mips->HasDefaultPrefix()) IRJit::~IRJit() { } -IRFrontend::IRFrontend(bool startDefaultPrefix) { - logBlocks = 0; - dontLogBlocks = 0; - js.startDefaultPrefix = startDefaultPrefix; - // js.currentRoundingFunc = convertS0ToSCRATCH1[0]; -} - -void IRFrontend::DoState(PointerWrap &p) { - auto s = p.Section("Jit", 1, 2); - if (!s) - return; - - p.Do(js.startDefaultPrefix); - if (s >= 2) { - p.Do(js.hasSetRounding); - js.lastSetRounding = 0; - } else { - js.hasSetRounding = 1; - } -} - void IRJit::DoState(PointerWrap &p) { frontend_.DoState(p); } @@ -90,27 +67,6 @@ void IRJit::DoDummyState(PointerWrap &p) { } } -void IRFrontend::FlushAll() { - FlushPrefixV(); -} - -void IRFrontend::FlushPrefixV() { - if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) { - ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_SPREFIX, ir.AddConstant(js.prefixS)); - js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY); - } - - if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) { - ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_TPREFIX, ir.AddConstant(js.prefixT)); - js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY); - } - - if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) { - ir.Write(IROp::SetCtrlVFPU, VFPU_CTRL_DPREFIX, ir.AddConstant(js.prefixD)); - js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY); - } -} - void IRJit::ClearCache() { ILOG("IRJit: Clearing the cache!"); blocks_.Clear(); @@ -124,50 +80,6 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) { blocks_.InvalidateICache(em_address, length); } -void IRFrontend::EatInstruction(MIPSOpcode op) { - MIPSInfo info = MIPSGetInfo(op); - if (info & DELAYSLOT) { - ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op."); - } - if (js.inDelaySlot) { - ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot."); - } - - js.numInstructions++; - js.compilerPC += 4; - js.downcountAmount += MIPSGetInstructionCycleEstimate(op); -} - -void IRFrontend::CompileDelaySlot() { - js.inDelaySlot = true; - MIPSOpcode op = GetOffsetInstruction(1); - MIPSCompileOp(op, this); - js.inDelaySlot = false; -} - -bool IRFrontend::CheckRounding() { - bool cleanSlate = false; - if (js.hasSetRounding && !js.lastSetRounding) { - WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); - // Won't loop, since hasSetRounding is only ever set to 1. - js.lastSetRounding = js.hasSetRounding; - cleanSlate = true; - } - - // Drat. The VFPU hit an uneaten prefix at the end of a block. - if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "An uneaten prefix at end of block"); - js.LogPrefix(); - - // Let's try that one more time. We won't get back here because we toggled the value. - js.startDefaultPrefix = false; - // TODO: Make sure this works. - // cleanSlate = true; - } - - return cleanSlate; -} - void IRJit::Compile(u32 em_address) { PROFILE_THIS_SCOPE("jitc"); @@ -214,104 +126,11 @@ void IRJit::RunLoopUntil(u64 globalticks) { // RestoreRoundingMode(true); } -u32 IRFrontend::GetCompilerPC() { - return js.compilerPC; -} - -MIPSOpcode IRFrontend::GetOffsetInstruction(int offset) { - return Memory::Read_Instruction(GetCompilerPC() + 4 * offset); -} - -void IRFrontend::DoJit(u32 em_address, IRBlock *b) { - js.cancel = false; - js.blockStart = em_address; - js.compilerPC = em_address; - js.lastContinuedPC = 0; - js.initialBlockSize = 0; - js.nextExit = 0; - js.downcountAmount = 0; - js.curBlock = nullptr; - js.compiling = true; - js.inDelaySlot = false; - js.PrefixStart(); - ir.Clear(); - - js.numInstructions = 0; - while (js.compiling) { - MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC()); - js.downcountAmount += MIPSGetInstructionCycleEstimate(inst); - MIPSCompileOp(inst, this); - js.compilerPC += 4; - js.numInstructions++; - - if (ir.GetConstants().size() > 64) { - // Need to break the block - ir.Write(IROp::ExitToConst, ir.AddConstant(js.compilerPC)); - js.compiling = false; - } - } - - IRWriter simplified; - IRWriter *code = &ir; - if (true) { - static const IRPassFunc passes[] = { - &PropagateConstants, - }; - if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) - logBlocks = 1; - code = &simplified; - if (ir.GetInstructions().size() >= 24) - logBlocks = 1; - } - - b->SetInstructions(code->GetInstructions(), code->GetConstants()); - - if (logBlocks > 0 && dontLogBlocks == 0) { - char temp2[256]; - ILOG("=============== mips %08x ===============", em_address); - for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { - temp2[0] = 0; - MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); - ILOG("M: %08x %s", cpc, temp2); - } - } - - if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); - for (int i = 0; i < ir.GetInstructions().size(); i++) { - char buf[256]; - DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); - ILOG("%s", buf); - } - ILOG("=============== end ================="); - } - - if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); - for (int i = 0; i < code->GetInstructions().size(); i++) { - char buf[256]; - DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], code->GetConstants().data()); - ILOG("%s", buf); - } - ILOG("=============== end ================="); - } - - if (logBlocks > 0) - logBlocks--; - if (dontLogBlocks > 0) - dontLogBlocks--; -} - bool IRJit::DescribeCodePtr(const u8 *ptr, std::string &name) { // Used in disassembly viewer. return false; } -void IRFrontend::Comp_RunBlock(MIPSOpcode op) { - // This shouldn't be necessary, the dispatcher should catch us before we get here. - ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); -} - void IRJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) { Crash(); } @@ -325,78 +144,6 @@ bool IRJit::ReplaceJalTo(u32 dest) { return false; } -void IRFrontend::Comp_ReplacementFunc(MIPSOpcode op) { - int index = op.encoding & MIPS_EMUHACK_VALUE_MASK; - - const ReplacementTableEntry *entry = GetReplacementFunc(index); - if (!entry) { - ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding); - return; - } - - if (entry->flags & REPFLAG_DISABLED) { - MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); - } else if (entry->replaceFunc) { - FlushAll(); - RestoreRoundingMode(); - ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC())); - ir.Write(IROp::CallReplacement, 0, ir.AddConstant(index)); - - if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { - // Compile the original instruction at this address. We ignore cycles for hooks. - ApplyRoundingMode(); - MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this); - } else { - ApplyRoundingMode(); - ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); - ir.Write(IROp::ExitToReg, MIPS_REG_RA, 0, 0); - js.compiling = false; - } - } else { - ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name); - } -} - -void IRFrontend::Comp_Generic(MIPSOpcode op) { - FlushAll(); - ir.Write(IROp::Interpret, 0, ir.AddConstant(op.encoding)); - const MIPSInfo info = MIPSGetInfo(op); - if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) { - // If it does eat them, it'll happen in MIPSCompileOp(). - if ((info & OUT_EAT_PREFIX) == 0) - js.PrefixUnknown(); - } -} - -// Destroys SCRATCH2 -void IRFrontend::RestoreRoundingMode(bool force) { - // If the game has never set an interesting rounding mode, we can safely skip this. - if (force || js.hasSetRounding) { - ir.Write(IROp::RestoreRoundingMode); - } -} - -// Destroys SCRATCH1 and SCRATCH2 -void IRFrontend::ApplyRoundingMode(bool force) { - // If the game has never set an interesting rounding mode, we can safely skip this. - if (force || js.hasSetRounding) { - ir.Write(IROp::ApplyRoundingMode); - } -} - -// Destroys SCRATCH1 and SCRATCH2 -void IRFrontend::UpdateRoundingMode() { - ir.Write(IROp::UpdateRoundingMode); -} - -void IRFrontend::Comp_DoNothing(MIPSOpcode op) { -} - -int IRFrontend::Replace_fabsf() { - Crash(); - return 0; -} - void IRBlockCache::Clear() { blocks_.clear(); } diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 3947136ea93f..aa026b0bd8d5 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -20,11 +20,11 @@ #include #include "Common/CPUDetect.h" -#include "Core/MIPS/JitCommon/JitState.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" #include "Core/MIPS/JitCommon/JitCommon.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRInst.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/MIPSVFPUUtils.h" #ifndef offsetof @@ -101,137 +101,6 @@ class IRBlockCache { std::vector blocks_; }; -class IRFrontend : public MIPSFrontendInterface { -public: - IRFrontend(bool startDefaultPrefix); - void Comp_Generic(MIPSOpcode op) override; - - void Comp_RunBlock(MIPSOpcode op) override; - void Comp_ReplacementFunc(MIPSOpcode op) override; - - // Ops - void Comp_ITypeMem(MIPSOpcode op) override; - void Comp_Cache(MIPSOpcode op) override; - - void Comp_RelBranch(MIPSOpcode op) override; - void Comp_RelBranchRI(MIPSOpcode op) override; - void Comp_FPUBranch(MIPSOpcode op) override; - void Comp_FPULS(MIPSOpcode op) override; - void Comp_FPUComp(MIPSOpcode op) override; - void Comp_Jump(MIPSOpcode op) override; - void Comp_JumpReg(MIPSOpcode op) override; - void Comp_Syscall(MIPSOpcode op) override; - void Comp_Break(MIPSOpcode op) override; - - void Comp_IType(MIPSOpcode op) override; - void Comp_RType2(MIPSOpcode op) override; - void Comp_RType3(MIPSOpcode op) override; - void Comp_ShiftType(MIPSOpcode op) override; - void Comp_Allegrex(MIPSOpcode op) override; - void Comp_Allegrex2(MIPSOpcode op) override; - void Comp_VBranch(MIPSOpcode op) override; - void Comp_MulDivType(MIPSOpcode op) override; - void Comp_Special3(MIPSOpcode op) override; - - void Comp_FPU3op(MIPSOpcode op) override; - void Comp_FPU2op(MIPSOpcode op) override; - void Comp_mxc1(MIPSOpcode op) override; - - void Comp_DoNothing(MIPSOpcode op) override; - - void Comp_SV(MIPSOpcode op) override; - void Comp_SVQ(MIPSOpcode op) override; - void Comp_VPFX(MIPSOpcode op) override; - void Comp_VVectorInit(MIPSOpcode op) override; - void Comp_VMatrixInit(MIPSOpcode op) override; - void Comp_VDot(MIPSOpcode op) override; - void Comp_VecDo3(MIPSOpcode op) override; - void Comp_VV2Op(MIPSOpcode op) override; - void Comp_Mftv(MIPSOpcode op) override; - void Comp_Vmfvc(MIPSOpcode op) override; - void Comp_Vmtvc(MIPSOpcode op) override; - void Comp_Vmmov(MIPSOpcode op) override; - void Comp_VScl(MIPSOpcode op) override; - void Comp_Vmmul(MIPSOpcode op) override; - void Comp_Vmscl(MIPSOpcode op) override; - void Comp_Vtfm(MIPSOpcode op) override; - void Comp_VHdp(MIPSOpcode op) override; - void Comp_VCrs(MIPSOpcode op) override; - void Comp_VDet(MIPSOpcode op) override; - void Comp_Vi2x(MIPSOpcode op) override; - void Comp_Vx2i(MIPSOpcode op) override; - void Comp_Vf2i(MIPSOpcode op) override; - void Comp_Vi2f(MIPSOpcode op) override; - void Comp_Vh2f(MIPSOpcode op) override; - void Comp_Vcst(MIPSOpcode op) override; - void Comp_Vhoriz(MIPSOpcode op) override; - void Comp_VRot(MIPSOpcode op) override; - void Comp_VIdt(MIPSOpcode op) override; - void Comp_Vcmp(MIPSOpcode op) override; - void Comp_Vcmov(MIPSOpcode op) override; - void Comp_Viim(MIPSOpcode op) override; - void Comp_Vfim(MIPSOpcode op) override; - void Comp_VCrossQuat(MIPSOpcode op) override; - void Comp_Vsgn(MIPSOpcode op) override; - void Comp_Vocp(MIPSOpcode op) override; - void Comp_ColorConv(MIPSOpcode op) override; - void Comp_Vbfy(MIPSOpcode op) override; - - int Replace_fabsf(); - void DoState(PointerWrap &p); - bool CheckRounding(); // returns true if we need a do-over - void DoJit(u32 em_address, IRBlock *b); - -private: - void RestoreRoundingMode(bool force = false); - void ApplyRoundingMode(bool force = false); - void UpdateRoundingMode(); - - void EatPrefix() { js.EatPrefix(); } - - void FlushAll(); - void FlushPrefixV(); - - u32 GetCompilerPC(); - void CompileDelaySlot(); - void EatInstruction(MIPSOpcode op); - MIPSOpcode GetOffsetInstruction(int offset); - - // Utility compilation functions - void BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely); - void BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely); - void BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely); - void BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely); - - // Utilities to reduce duplicated code - void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa); - void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst); - - void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); - void ApplyPrefixD(const u8 *vregs, VectorSize sz); - void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { - _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); - GetVectorRegs(regs, sz, vectorReg); - ApplyPrefixST(regs, js.prefixS, sz); - } - void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { - _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); - GetVectorRegs(regs, sz, vectorReg); - ApplyPrefixST(regs, js.prefixT, sz); - } - void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); - - // Utils - void Comp_ITypeMemLR(MIPSOpcode op, bool load); - - // State - JitState js; - IRWriter ir; - - int dontLogBlocks; - int logBlocks; -}; - class IRJit : public JitInterface { public: IRJit(MIPSState *mips); diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 92a10e800c63..115b96997252 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -157,6 +157,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/Core/MIPS/MIPSVFPUUtils.cpp.arm \ $(SRC)/Core/MIPS/MIPSCodeUtils.cpp.arm \ $(SRC)/Core/MIPS/MIPSDebugInterface.cpp \ + $(SRC)/Core/MIPS/IR/IRFrontend.cpp \ $(SRC)/Core/MIPS/IR/IRJit.cpp \ $(SRC)/Core/MIPS/IR/IRCompALU.cpp \ $(SRC)/Core/MIPS/IR/IRCompBranch.cpp \ From 28087a6088c41c5cb7a9f56515f59b8ae38a741b Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 20:18:22 +0200 Subject: [PATCH 42/77] IRFrontend shouldn't know about IRBlock --- Core/MIPS/IR/IRFrontend.cpp | 5 +++-- Core/MIPS/IR/IRFrontend.h | 5 ++--- Core/MIPS/IR/IRJit.cpp | 6 +++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 94a7ebe1056d..bf8e5d573a01 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -199,7 +199,7 @@ MIPSOpcode IRFrontend::GetOffsetInstruction(int offset) { return Memory::Read_Instruction(GetCompilerPC() + 4 * offset); } -void IRFrontend::DoJit(u32 em_address, IRBlock *b) { +void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::vector &constants) { js.cancel = false; js.blockStart = em_address; js.compilerPC = em_address; @@ -241,7 +241,8 @@ void IRFrontend::DoJit(u32 em_address, IRBlock *b) { logBlocks = 1; } - b->SetInstructions(code->GetInstructions(), code->GetConstants()); + instructions = code->GetInstructions(); + constants = code->GetConstants(); if (logBlocks > 0 && dontLogBlocks == 0) { char temp2[256]; diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h index c5854dffdf7e..9b8db0c76a04 100644 --- a/Core/MIPS/IR/IRFrontend.h +++ b/Core/MIPS/IR/IRFrontend.h @@ -8,8 +8,6 @@ namespace MIPSComp { -class IRBlock; - class IRFrontend : public MIPSFrontendInterface { public: IRFrontend(bool startDefaultPrefix); @@ -89,7 +87,8 @@ class IRFrontend : public MIPSFrontendInterface { int Replace_fabsf(); void DoState(PointerWrap &p); bool CheckRounding(); // returns true if we need a do-over - void DoJit(u32 em_address, IRBlock *b); + + void DoJit(u32 em_address, std::vector &instructions, std::vector &constants); private: void RestoreRoundingMode(bool force = false); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 94ab72bfd98a..661543a9d0df 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -85,7 +85,11 @@ void IRJit::Compile(u32 em_address) { int block_num = blocks_.AllocateBlock(em_address); IRBlock *b = blocks_.GetBlock(block_num); - frontend_.DoJit(em_address, b); + + std::vector instructions; + std::vector constants; + frontend_.DoJit(em_address, instructions, constants); + b->SetInstructions(instructions, constants); b->Finalize(block_num); // Overwrites the first instruction if (frontend_.CheckRounding()) { From a5d5c5ce2b589e4beb08f3ca61daad1e41bc10d5 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 22:40:59 +0200 Subject: [PATCH 43/77] Do the voffset remapping before the IR. This will let us easily add some virtual VFPU registers for the IR to the end, plus it's slightly faster. --- Core/MIPS/IR/IRCompVFPU.cpp | 20 ++++++++++---------- Core/MIPS/IR/IRFrontend.cpp | 1 - Core/MIPS/IR/IRInterpreter.cpp | 10 +++++----- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index e6f5ca3a8757..a2a3295c6e27 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -192,20 +192,20 @@ namespace MIPSComp { case 54: //lv.q { // TODO: Add vector load/store instruction to the IR - ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm)); - ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + ir.Write(IROp::LoadFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm)); + ir.Write(IROp::LoadFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::LoadFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::LoadFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12)); } break; case 62: //sv.q { // CC might be set by slow path below, so load regs first. - ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm)); - ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + ir.Write(IROp::StoreFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm)); + ir.Write(IROp::StoreFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::StoreFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::StoreFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12)); } break; @@ -274,7 +274,7 @@ namespace MIPSComp { // rt = 0, imm = 255 appears to be used as a CPU interlock by some games. if (rt != 0) { if (imm < 128) { //R(rt) = VI(imm); - ir.Write(IROp::VMovToGPR, rt, imm); + ir.Write(IROp::VMovToGPR, rt, voffset[imm]); } else { DISABLE; } @@ -283,7 +283,7 @@ namespace MIPSComp { case 7: // mtv if (imm < 128) { - ir.Write(IROp::VMovFromGPR, imm, rt); + ir.Write(IROp::VMovFromGPR, voffset[imm], rt); } else { DISABLE; } diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index bf8e5d573a01..95ce9d9c4b02 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -26,7 +26,6 @@ #include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" -#include "Core/MIPS/IR/IRJit.h" #include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/IR/IRInterpreter.h" diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 03b06c77e129..54f20edb4a76 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -21,7 +21,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); break; case IROp::SetConstV: - memcpy(&mips->v[voffset[inst->dest]], &constPool[inst->src1], 4); + memcpy(&mips->v[inst->dest], &constPool[inst->src1], 4); break; case IROp::Add: mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; @@ -88,7 +88,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); break; case IROp::LoadFloatV: - mips->v[voffset[inst->dest]] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); + mips->v[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); break; case IROp::Store8: @@ -104,7 +104,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; case IROp::StoreFloatV: - Memory::WriteUnchecked_Float(mips->v[voffset[inst->src3]], mips->r[inst->src1] + constPool[inst->src2]); + Memory::WriteUnchecked_Float(mips->v[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; case IROp::ShlImm: @@ -314,10 +314,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; case IROp::VMovFromGPR: - memcpy(&mips->v[voffset[inst->dest]], &mips->r[inst->src1], 4); + memcpy(&mips->v[inst->dest], &mips->r[inst->src1], 4); break; case IROp::VMovToGPR: - memcpy(&mips->r[inst->dest], &mips->v[voffset[inst->src1]], 4); + memcpy(&mips->r[inst->dest], &mips->v[inst->src1], 4); break; case IROp::ExitToConst: From 558bb197c795d3caca1f6f52111a980ce2569c2b Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 May 2016 23:47:56 +0200 Subject: [PATCH 44/77] More VFPU --- Core/MIPS/IR/IRCompALU.cpp | 4 +- Core/MIPS/IR/IRCompBranch.cpp | 2 +- Core/MIPS/IR/IRCompFPU.cpp | 2 +- Core/MIPS/IR/IRCompLoadStore.cpp | 2 +- Core/MIPS/IR/IRCompVFPU.cpp | 64 +++++++++++++++++++++++--------- Core/MIPS/IR/IRFrontend.cpp | 4 +- Core/MIPS/IR/IRInst.cpp | 2 + Core/MIPS/IR/IRInst.h | 11 ++++-- Core/MIPS/IR/IRInterpreter.cpp | 27 ++++++++++++++ Core/MIPS/IR/IRPassSimplify.cpp | 4 +- Core/MIPS/MIPS.h | 21 ++++++----- 11 files changed, 104 insertions(+), 39 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 6500d1b0df11..8ffa632affd2 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -19,11 +19,9 @@ #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSCodeUtils.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Common/CPUDetect.h" -using namespace MIPSAnalyst; - #define _RS MIPS_GET_RS(op) #define _RT MIPS_GET_RT(op) #define _RD MIPS_GET_RD(op) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 3dda003b562d..76833bf32906 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -28,7 +28,7 @@ #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSTables.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" #include "Common/Arm64Emitter.h" diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 1ca4a08e96ac..068a58013a87 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -21,7 +21,7 @@ #include "Core/MIPS/MIPSCodeUtils.h" #include "Core/MIPS/MIPSTables.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Common/CPUDetect.h" diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index 41c76a1a7d83..b890f4ff6808 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -42,7 +42,7 @@ #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSCodeUtils.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" #define _RS MIPS_GET_RS(op) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index a2a3295c6e27..2bb96e754942 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -27,7 +27,7 @@ #include "Core/Config.h" #include "Core/Reporting.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. @@ -50,6 +50,15 @@ #define _IMM26 (op & 0x03FFFFFF) namespace MIPSComp { + static void ApplyVoffset(u8 regs[4], int count) { + for (int i = 0; i < count; i++) { + regs[i] = voffset[regs[i]]; + } + } + + static bool IsConsecutive4(const u8 regs[4]) { + return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1); + } void IRFrontend::Comp_VPFX(MIPSOpcode op) { CONDITIONAL_DISABLE; @@ -177,7 +186,21 @@ namespace MIPSComp { } void IRFrontend::Comp_SV(MIPSOpcode op) { - DISABLE; + s32 offset = (signed short)(op & 0xFFFC); + int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5); + MIPSGPReg rs = _RS; + switch (op >> 26) { + case 50: //lv.s + ir.Write(IROp::LoadFloatV, voffset[vt], rs, ir.AddConstant(offset)); + break; + + case 58: //sv.s + ir.Write(IROp::StoreFloatV, voffset[vt], rs, ir.AddConstant(offset)); + break; + + default: + DISABLE; + } } void IRFrontend::Comp_SVQ(MIPSOpcode op) { @@ -187,27 +210,32 @@ namespace MIPSComp { u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); + ApplyVoffset(vregs, 4); // Translate to memory order switch (op >> 26) { case 54: //lv.q - { - // TODO: Add vector load/store instruction to the IR - ir.Write(IROp::LoadFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm)); - ir.Write(IROp::LoadFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::LoadFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::LoadFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12)); - } - break; + if (IsConsecutive4(vregs)) { + ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm)); + } else { + // Let's not even bother with "vertical" loads for now. + ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + } + break; case 62: //sv.q - { - // CC might be set by slow path below, so load regs first. - ir.Write(IROp::StoreFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm)); - ir.Write(IROp::StoreFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::StoreFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::StoreFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12)); - } - break; + if (IsConsecutive4(vregs)) { + ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm)); + } else { + // Let's not even bother with "vertical" stores for now. + ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + } + break; default: DISABLE; diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 95ce9d9c4b02..3b13978b43bf 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -236,8 +236,8 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) logBlocks = 1; code = &simplified; - if (ir.GetInstructions().size() >= 24) - logBlocks = 1; + //if (ir.GetInstructions().size() >= 24) + // logBlocks = 1; } instructions = code->GetInstructions(); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 6b3231ce6d9c..d82e72ccdb86 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -60,11 +60,13 @@ static const IRMeta irMeta[] = { { IROp::Load32, "Load32", "GGC" }, { IROp::LoadFloat, "LoadFloat", "FGC" }, { IROp::LoadFloatV, "LoadFloatV", "VGC" }, + { IROp::LoadVec4, "LoadVec4", "VGC" }, { IROp::Store8, "Store8", "GGC" }, { IROp::Store16, "Store16", "GGC" }, { IROp::Store32, "Store32", "GGC" }, { IROp::StoreFloat, "StoreFloat", "FGC" }, { IROp::StoreFloatV, "StoreFloatV", "VGC" }, + { IROp::StoreVec4, "StoreVec4", "VGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 77d71ed91534..70f0e0ff6e82 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -90,12 +90,14 @@ enum class IROp : u8 { Load32, LoadFloat, LoadFloatV, + LoadVec4, Store8, Store16, Store32, StoreFloat, StoreFloatV, + StoreVec4, Ext8to32, Ext16to32, @@ -212,13 +214,16 @@ enum { IRTEMP_LHS, // Reserved for use in branches IRTEMP_RHS, // Reserved for use in branches + // 16 float temps for vector S and T prefixes and things like that. + // IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0] + // Hacky way to get to other state - IRREG_VPFU_CTRL_BASE = 208, - IRREG_VPFU_CC = 211, + IRREG_VFPU_CTRL_BASE = 208, + IRREG_VFPU_CC = 211, IRREG_LO = 226, // offset of lo in MIPSState / 4 IRREG_HI = 227, IRREG_FCR31 = 228, - IRREG_FPCOND = 229 + IRREG_FPCOND = 229, }; struct IRMeta { diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 54f20edb4a76..63e0bd533ef4 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -1,3 +1,7 @@ +#ifdef _M_SSE +#include +#endif + #include "Core/MemMap.h" #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" @@ -107,6 +111,29 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c Memory::WriteUnchecked_Float(mips->v[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; + case IROp::LoadVec4: + { + u32 base = mips->r[inst->src1] + constPool[inst->src2]; +#if defined(_M_SSE) + _mm_store_ps(&mips->v[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); +#else + for (int i = 0; i < 4; i++) + mips->v[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); +#endif + break; + } + case IROp::StoreVec4: + { + u32 base = mips->r[inst->src1] + constPool[inst->src2]; +#if defined(_M_SSE) + _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->v[inst->dest])); +#else + for (int i = 0; i < 4; i++) + Memory::WriteUnchecked_Float(mips->v[inst->dest + i], base + 4 * i); +#endif + break; + } + case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; break; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 0e5353ff5717..50bfca890357 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -291,6 +291,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::StoreFloat: case IROp::StoreFloatV: + case IROp::StoreVec4: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { @@ -314,6 +315,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::LoadFloat: case IROp::LoadFloatV: + case IROp::LoadVec4: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { @@ -388,7 +390,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { goto doDefault; case IROp::VfpuCtrlToReg: - gpr.MapDirtyIn(inst.dest, IRREG_VPFU_CTRL_BASE + inst.src1); + gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1); goto doDefault; case IROp::Syscall: diff --git a/Core/MIPS/MIPS.h b/Core/MIPS/MIPS.h index bbc9952c4dc1..d3a01f1bde31 100644 --- a/Core/MIPS/MIPS.h +++ b/Core/MIPS/MIPS.h @@ -86,6 +86,7 @@ enum MIPSGPReg { MIPS_REG_RA=31, // Not real regs, just for convenience/jit mapping. + // NOTE: These are not the same as the offsets the IR has to use! MIPS_REG_HI = 32, MIPS_REG_LO = 33, MIPS_REG_FPCOND = 34, @@ -155,7 +156,7 @@ class MIPSState void DoState(PointerWrap &p); - // MUST start with r and be followed by f! + // MUST start with r and be followed by f, v, and t! u32 r[32]; union { float f[32]; @@ -166,23 +167,25 @@ class MIPSState float v[128]; u32 vi[128]; }; - // Used for temporary variables by IR Interpreter. + + // Register-allocated JIT Temps don't get flushed so we don't reserve space for them. + // However, the IR interpreter needs some temps that can stick around between ops. // Can be indexed through r[] using indices 192+. - u32 t[16]; + u32 t[16]; //192 + // float vt[16]; //208 TODO: VFPU temp - // Temps don't get flushed so we don't reserve space for them. // If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code. - u32 vfpuCtrl[16]; + u32 vfpuCtrl[16]; // 208 // ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct. - u32 padLoHi; + u32 padLoHi; // 224 union { struct { - u32 pc; + u32 pc; //225 - u32 lo; // offset 192 + 16 + 16 + 1 + 1 - u32 hi; + u32 lo; //226 + u32 hi; //227 u32 fcr31; //fpu control register u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23) From 45efcda6b1cd16f167e9310b78b91dd487e682e8 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 10 May 2016 21:50:08 +0200 Subject: [PATCH 45/77] IR: Some more VFPU --- Core/MIPS/IR/IRCompVFPU.cpp | 84 ++++++++++++++++++++++++++++++--- Core/MIPS/IR/IRInst.cpp | 21 +++++++++ Core/MIPS/IR/IRInst.h | 21 +++++++++ Core/MIPS/IR/IRInterpreter.cpp | 38 +++++++++++++++ Core/MIPS/IR/IRJit.cpp | 6 +-- Core/MIPS/IR/IRPassSimplify.cpp | 20 +++++++- Core/MIPS/MIPSVFPUUtils.h | 14 +++++- Core/MIPS/x86/CompVFPU.cpp | 5 +- 8 files changed, 195 insertions(+), 14 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 2bb96e754942..e67b93cdddbd 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -16,6 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include + #include "math/math_util.h" #include "Core/MemMap.h" @@ -57,7 +58,9 @@ namespace MIPSComp { } static bool IsConsecutive4(const u8 regs[4]) { - return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1); + return regs[1] == regs[0] + 1 && + regs[2] == regs[1] + 1 && + regs[3] == regs[2] + 1; } void IRFrontend::Comp_VPFX(MIPSOpcode op) { @@ -244,15 +247,79 @@ namespace MIPSComp { } void IRFrontend::Comp_VVectorInit(MIPSOpcode op) { - DISABLE; + if (!js.HasNoPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int type = (op >> 16) & 0xF; + int vd = _VD; + + if (sz == 4 && IsVectorColumn(vd)) { + u8 dregs[4]; + GetVectorRegs(dregs, sz, vd); + ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); + } else if (sz == 1) { + ir.Write(IROp::SetConstV, voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); + } else { + DISABLE; + } } void IRFrontend::Comp_VIdt(MIPSOpcode op) { - DISABLE; + if (!js.HasNoPrefix()) + DISABLE; + + int vd = _VD; + VectorSize sz = GetVecSize(op); + if (sz != V_Quad) + DISABLE; + + if (!IsVectorColumn(vd)) + DISABLE; + + u8 dregs[4]; + GetVectorRegs(dregs, sz, vd); + int row = vd & 3; + Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); + ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)init); } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { - DISABLE; + MatrixSize sz = GetMtxSize(op); + if (sz != M_4x4) { + DISABLE; + } + + // Not really about trying here, it will work if enabled. + VectorSize vsz = GetVectorSize(sz); + u8 vecs[4]; + int vd = _VD; + if (IsMatrixTransposed(vd)) { + // All outputs are transpositionally symmetric, so should be fine. + vd = TransposeMatrixReg(vd); + } + GetMatrixColumns(vd, M_4x4, vecs); + for (int i = 0; i < 4; i++) { + u8 vec[4]; + GetVectorRegs(vec, vsz, vecs[i]); + // As they are columns, they will be nicely consecutive. + Vec4Init init; + switch ((op >> 16) & 0xF) { + case 3: + init = Vec4Init((int)Vec4Init::Set_1000 + i); + break; + case 6: + init = Vec4Init::AllZERO; + break; + case 7: + init = Vec4Init::AllONE; + break; + default: + return; + } + ir.Write(IROp::InitVec4, voffset[vec[0]], (int)init); + } + return; } void IRFrontend::Comp_VHdp(MIPSOpcode op) { @@ -275,7 +342,7 @@ namespace MIPSComp { void IRFrontend::Comp_VV2Op(MIPSOpcode op) { CONDITIONAL_DISABLE; - // Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure + // Eliminate silly no-op VMOVs, common in Wipeout Pure if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) { return; } @@ -379,7 +446,12 @@ namespace MIPSComp { } void IRFrontend::Comp_Viim(MIPSOpcode op) { - DISABLE; + if (!js.HasNoPrefix()) + DISABLE; + + u8 dreg = _VT; + s32 imm = (s32)(s16)(u16)(op & 0xFFFF); + ir.Write(IROp::SetConstV, voffset[dreg], ir.AddConstantFloat((float)imm)); } void IRFrontend::Comp_Vfim(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index d82e72ccdb86..e9bc55ab7844 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -85,9 +85,18 @@ static const IRMeta irMeta[] = { { IROp::FMovToGPR, "FMovToGPR", "GF" }, { IROp::VMovFromGPR, "VMovFromGPR", "VG" }, { IROp::VMovToGPR, "VMovToGPR", "GV" }, + { IROp::InitVec4, "InitVec4", "Vv"}, { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, + + { IROp::VSin, "VSin", "VV" }, + { IROp::VCos, "VCos", "VV" }, + { IROp::VSqrt, "VSqrt", "VV" }, + { IROp::VRSqrt, "VRSqrt", "VV" }, + { IROp::VRecip, "VRecip", "VV" }, + { IROp::VAsin, "VAsin", "VV" }, + { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, { IROp::ExitToConst, "Exit", "C" }, @@ -177,6 +186,15 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co "RCX6", "RCX7", }; + static const char *initVec4Names[8] = { + "[0 0 0 0]", + "[1 1 1 1]", + "[-1 -1 -1 -1]", + "[1 0 0 0]", + "[0 1 0 0]", + "[0 0 1 0]", + "[0 0 0 1]", + }; switch (type) { case 'G': @@ -197,6 +215,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'T': snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]); break; + case 'v': + snprintf(buf, bufSize, "%s", initVec4Names[param]); + break; case '_': case '\0': buf[0] = 0; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 70f0e0ff6e82..e2c0f6644a33 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -144,6 +144,16 @@ enum class IROp : u8 { VMovFromGPR, VMovToGPR, + InitVec4, + + // Slow special functions. Used on singles. + VSin, + VCos, + VSqrt, + VRSqrt, + VRecip, + VAsin, + // Fake/System instructions Interpret, @@ -181,6 +191,17 @@ enum IRComparison { Bad, }; +// Some common vec4 constants. +enum class Vec4Init { + AllZERO, + AllONE, + AllMinusONE, + Set_1000, + Set_0100, + Set_0010, + Set_0001, +}; + // Hm, unused inline IRComparison Invert(IRComparison comp) { switch (comp) { diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 63e0bd533ef4..2a601bb8f8a1 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -6,6 +6,7 @@ #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" #include "Core/MIPS/MIPSTables.h" +#include "Core/MIPS/MIPSVFPUUtils.h" #include "math/math_util.h" #include "Common/CommonTypes.h" @@ -14,6 +15,16 @@ #include "Core/MIPS/IR/IRInst.h" #include "Core/MIPS/IR/IRInterpreter.h" +alignas(16) float vec4InitValues[8][4] = { + { 0.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, -1.0f }, + { 1.0f, 0.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 0.0f, 1.0f }, +}; + u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) { const IRInst *end = inst + count; while (inst != end) { @@ -134,6 +145,33 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } + case IROp::InitVec4: +#if defined(_M_SSE) + _mm_store_ps(&mips->v[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); +#else + memcpy(&mips->v[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float)); +#endif + break; + + case IROp::VSin: + mips->v[inst->dest] = vfpu_sin(mips->v[inst->src1]); + break; + case IROp::VCos: + mips->v[inst->dest] = vfpu_cos(mips->v[inst->src1]); + break; + case IROp::VSqrt: + mips->v[inst->dest] = sqrtf(mips->v[inst->src1]); + break; + case IROp::VRSqrt: + mips->v[inst->dest] = 1.0f / sqrtf(mips->v[inst->src1]); + break; + case IROp::VRecip: + mips->v[inst->dest] = 1.0f / mips->v[inst->src1]; + break; + case IROp::VAsin: + mips->v[inst->dest] = vfpu_asin(mips->v[inst->src1]); + break; + case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; break; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 661543a9d0df..fb490268559d 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -114,9 +114,9 @@ void IRJit::RunLoopUntil(u64 globalticks) { } while (mips_->downcount >= 0) { u32 inst = Memory::ReadUnchecked_U32(mips_->pc); - u32 opcode = inst >> 24; - u32 data = inst & 0xFFFFFF; - if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) { + u32 opcode = inst & 0xFF000000; + if (opcode == MIPS_EMUHACK_OPCODE) { + u32 data = inst & 0xFFFFFF; IRBlock *block = blocks_.GetBlock(data); mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); } else { diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 50bfca890357..d7c93593f9ab 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -340,8 +340,13 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { // FP-only instructions don't need to flush immediates. case IROp::FAdd: case IROp::FMul: - case IROp::FDiv: + // Regularize, to help x86 backends (add.s r0, r1, r0 -> add.s r0, r0, r1) + if (inst.src2 == inst.dest && inst.src1 != inst.src2) + std::swap(inst.src1, inst.src2); + out.Write(inst); + break; case IROp::FSub: + case IROp::FDiv: case IROp::FNeg: case IROp::FAbs: case IROp::FSqrt: @@ -373,6 +378,19 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::InitVec4: + out.Write(inst); + break; + + case IROp::VSin: + case IROp::VCos: + case IROp::VSqrt: + case IROp::VRSqrt: + case IROp::VRecip: + case IROp::VAsin: + out.Write(inst); + break; + case IROp::ZeroFpCond: case IROp::FCmpUnordered: case IROp::FCmpEqual: diff --git a/Core/MIPS/MIPSVFPUUtils.h b/Core/MIPS/MIPSVFPUUtils.h index bb8403217f3d..7f6ada0fa2c6 100644 --- a/Core/MIPS/MIPSVFPUUtils.h +++ b/Core/MIPS/MIPSVFPUUtils.h @@ -45,6 +45,10 @@ inline float vfpu_cos(float angle) { return cosf(angle); } +inline float vfpu_asin(float angle) { + return asinf(angle) / M_PI_2; +} + inline void vfpu_sincos(float angle, float &sine, float &cosine) { angle -= floorf(angle * 0.25f) * 4.f; angle *= (float)M_PI_2; @@ -127,7 +131,15 @@ int GetNumVectorElements(VectorSize sz); int GetMatrixSide(MatrixSize sz); const char *GetVectorNotation(int reg, VectorSize size); const char *GetMatrixNotation(int reg, MatrixSize size); - +inline bool IsMatrixTransposed(int matrixReg) { + return (matrixReg >> 5) & 1; +} +inline bool IsVectorColumn(int vectorReg) { + return !((vectorReg >> 5) & 1); +} +inline int TransposeMatrixReg(int matrixReg) { + return matrixReg ^ 0x20; +} int GetVectorOverlap(int reg1, VectorSize size1, int reg2, VectorSize size2); float Float16ToFloat32(unsigned short l); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index fcc51c364659..749967f53a61 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -101,8 +101,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { for (int i = 0; i < n; i++) origV[i] = vregs[i]; - for (int i = 0; i < n; i++) - { + for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; @@ -2142,7 +2141,7 @@ void CosOnly(SinCosArg angle) { } void ASinScaled(SinCosArg angle) { - sincostemp[0] = asinf(angle) / M_PI_2; + sincostemp[0] = vfpu_asin(angle); } void SinCosNegSin(SinCosArg angle) { From db1d1ff9fdfdfaa1bf382060308629eab82aee9d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 10 May 2016 22:55:27 +0200 Subject: [PATCH 46/77] IR: Merge the FPU and VFPU instruction sets, no reason to keep them apart --- Core/MIPS/IR/IRCompVFPU.cpp | 38 ++++++++++++------------ Core/MIPS/IR/IRInst.cpp | 32 +++++++++------------ Core/MIPS/IR/IRInst.h | 16 ++++------- Core/MIPS/IR/IRInterpreter.cpp | 51 +++++++++++---------------------- Core/MIPS/IR/IRPassSimplify.cpp | 33 +++++---------------- 5 files changed, 62 insertions(+), 108 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index e67b93cdddbd..8f35cbef86fe 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -50,10 +50,12 @@ #define _IMM16 (signed short)(op & 0xFFFF) #define _IMM26 (op & 0x03FFFFFF) +const int vfpuBase = 32; // skip the FP registers + namespace MIPSComp { static void ApplyVoffset(u8 regs[4], int count) { for (int i = 0; i < count; i++) { - regs[i] = voffset[regs[i]]; + regs[i] = vfpuBase + voffset[regs[i]]; } } @@ -194,11 +196,11 @@ namespace MIPSComp { MIPSGPReg rs = _RS; switch (op >> 26) { case 50: //lv.s - ir.Write(IROp::LoadFloatV, voffset[vt], rs, ir.AddConstant(offset)); + ir.Write(IROp::LoadFloat, vfpuBase + voffset[vt], rs, ir.AddConstant(offset)); break; case 58: //sv.s - ir.Write(IROp::StoreFloatV, voffset[vt], rs, ir.AddConstant(offset)); + ir.Write(IROp::StoreFloat, vfpuBase + voffset[vt], rs, ir.AddConstant(offset)); break; default: @@ -221,10 +223,10 @@ namespace MIPSComp { ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm)); } else { // Let's not even bother with "vertical" loads for now. - ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm)); - ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + ir.Write(IROp::LoadFloat, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::LoadFloat, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::LoadFloat, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::LoadFloat, vregs[3], rs, ir.AddConstant(imm + 12)); } break; @@ -233,10 +235,10 @@ namespace MIPSComp { ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm)); } else { // Let's not even bother with "vertical" stores for now. - ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm)); - ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + ir.Write(IROp::StoreFloat, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::StoreFloat, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::StoreFloat, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::StoreFloat, vregs[3], rs, ir.AddConstant(imm + 12)); } break; @@ -257,9 +259,9 @@ namespace MIPSComp { if (sz == 4 && IsVectorColumn(vd)) { u8 dregs[4]; GetVectorRegs(dregs, sz, vd); - ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); + ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); } else if (sz == 1) { - ir.Write(IROp::SetConstV, voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); + ir.Write(IROp::SetConstF, vfpuBase + voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); } else { DISABLE; } @@ -281,7 +283,7 @@ namespace MIPSComp { GetVectorRegs(dregs, sz, vd); int row = vd & 3; Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); - ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)init); + ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)init); } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { @@ -317,7 +319,7 @@ namespace MIPSComp { default: return; } - ir.Write(IROp::InitVec4, voffset[vec[0]], (int)init); + ir.Write(IROp::InitVec4, vfpuBase + voffset[vec[0]], (int)init); } return; } @@ -369,7 +371,7 @@ namespace MIPSComp { // rt = 0, imm = 255 appears to be used as a CPU interlock by some games. if (rt != 0) { if (imm < 128) { //R(rt) = VI(imm); - ir.Write(IROp::VMovToGPR, rt, voffset[imm]); + ir.Write(IROp::FMovToGPR, rt, vfpuBase + voffset[imm]); } else { DISABLE; } @@ -378,7 +380,7 @@ namespace MIPSComp { case 7: // mtv if (imm < 128) { - ir.Write(IROp::VMovFromGPR, voffset[imm], rt); + ir.Write(IROp::FMovFromGPR, vfpuBase + voffset[imm], rt); } else { DISABLE; } @@ -451,7 +453,7 @@ namespace MIPSComp { u8 dreg = _VT; s32 imm = (s32)(s16)(u16)(op & 0xFFFF); - ir.Write(IROp::SetConstV, voffset[dreg], ir.AddConstantFloat((float)imm)); + ir.Write(IROp::SetConstF, vfpuBase + voffset[dreg], ir.AddConstantFloat((float)imm)); } void IRFrontend::Comp_Vfim(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index e9bc55ab7844..1f80be40b217 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -6,7 +6,6 @@ static const IRMeta irMeta[] = { { IROp::SetConst, "SetConst", "GC" }, { IROp::SetConstF, "SetConstF", "FC" }, - { IROp::SetConstV, "SetConstV", "VC" }, { IROp::Mov, "Mov", "GG" }, { IROp::Add, "Add", "GGG" }, { IROp::Sub, "Sub", "GGG" }, @@ -59,14 +58,12 @@ static const IRMeta irMeta[] = { { IROp::Load16Ext, "Load16Ext", "GGC" }, { IROp::Load32, "Load32", "GGC" }, { IROp::LoadFloat, "LoadFloat", "FGC" }, - { IROp::LoadFloatV, "LoadFloatV", "VGC" }, - { IROp::LoadVec4, "LoadVec4", "VGC" }, + { IROp::LoadVec4, "LoadVec4", "FGC" }, { IROp::Store8, "Store8", "GGC" }, { IROp::Store16, "Store16", "GGC" }, { IROp::Store32, "Store32", "GGC" }, { IROp::StoreFloat, "StoreFloat", "FGC" }, - { IROp::StoreFloatV, "StoreFloatV", "VGC" }, - { IROp::StoreVec4, "StoreVec4", "VGC" }, + { IROp::StoreVec4, "StoreVec4", "FGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, @@ -83,19 +80,17 @@ static const IRMeta irMeta[] = { { IROp::FCvtSW, "FCvtSW", "FF" }, { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, - { IROp::VMovFromGPR, "VMovFromGPR", "VG" }, - { IROp::VMovToGPR, "VMovToGPR", "GV" }, - { IROp::InitVec4, "InitVec4", "Vv"}, + { IROp::InitVec4, "InitVec4", "Fv"}, { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, - { IROp::VSin, "VSin", "VV" }, - { IROp::VCos, "VCos", "VV" }, - { IROp::VSqrt, "VSqrt", "VV" }, - { IROp::VRSqrt, "VRSqrt", "VV" }, - { IROp::VRecip, "VRecip", "VV" }, - { IROp::VAsin, "VAsin", "VV" }, + { IROp::FSin, "FSin", "FF" }, + { IROp::FCos, "FCos", "FF" }, + { IROp::FSqrt, "FSqrt", "FF" }, + { IROp::FRSqrt, "FRSqrt", "FF" }, + { IROp::FRecip, "FRecip", "FF" }, + { IROp::FAsin, "FAsin", "FF" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, @@ -201,7 +196,11 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co snprintf(buf, bufSize, "%s", GetGPRName(param)); break; case 'F': - snprintf(buf, bufSize, "f%d", param); + if (param >= 32) { + snprintf(buf, bufSize, "v%d", param - 32); + } else { + snprintf(buf, bufSize, "f%d", param); + } break; case 'C': snprintf(buf, bufSize, "%08x", constPool[param]); @@ -209,9 +208,6 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'I': snprintf(buf, bufSize, "%02x", param); break; - case 'V': - snprintf(buf, bufSize, "v%d", param); - break; case 'T': snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]); break; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index e2c0f6644a33..1c033b0ce376 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -18,7 +18,6 @@ enum class IROp : u8 { SetConst, SetConstF, - SetConstV, Mov, @@ -89,14 +88,12 @@ enum class IROp : u8 { Load16Ext, Load32, LoadFloat, - LoadFloatV, LoadVec4, Store8, Store16, Store32, StoreFloat, - StoreFloatV, StoreVec4, Ext8to32, @@ -141,18 +138,15 @@ enum class IROp : u8 { UpdateRoundingMode, SetCtrlVFPU, - VMovFromGPR, - VMovToGPR, InitVec4, // Slow special functions. Used on singles. - VSin, - VCos, - VSqrt, - VRSqrt, - VRecip, - VAsin, + FSin, + FCos, + FRSqrt, + FRecip, + FAsin, // Fake/System instructions Interpret, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 2a601bb8f8a1..6c71682c18a2 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -35,9 +35,6 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::SetConstF: memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4); break; - case IROp::SetConstV: - memcpy(&mips->v[inst->dest], &constPool[inst->src1], 4); - break; case IROp::Add: mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; break; @@ -102,9 +99,6 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::LoadFloat: mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); break; - case IROp::LoadFloatV: - mips->v[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); - break; case IROp::Store8: Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); @@ -118,18 +112,15 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::StoreFloat: Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; - case IROp::StoreFloatV: - Memory::WriteUnchecked_Float(mips->v[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); - break; case IROp::LoadVec4: { u32 base = mips->r[inst->src1] + constPool[inst->src2]; #if defined(_M_SSE) - _mm_store_ps(&mips->v[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); + _mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); #else for (int i = 0; i < 4; i++) - mips->v[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); + mips->f[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); #endif break; } @@ -137,39 +128,36 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c { u32 base = mips->r[inst->src1] + constPool[inst->src2]; #if defined(_M_SSE) - _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->v[inst->dest])); + _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest])); #else for (int i = 0; i < 4; i++) - Memory::WriteUnchecked_Float(mips->v[inst->dest + i], base + 4 * i); + Memory::WriteUnchecked_Float(mips->f[inst->dest + i], base + 4 * i); #endif break; } case IROp::InitVec4: #if defined(_M_SSE) - _mm_store_ps(&mips->v[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); + _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); #else - memcpy(&mips->v[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float)); + memcpy(&mips->f[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float)); #endif break; - case IROp::VSin: - mips->v[inst->dest] = vfpu_sin(mips->v[inst->src1]); - break; - case IROp::VCos: - mips->v[inst->dest] = vfpu_cos(mips->v[inst->src1]); + case IROp::FSin: + mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]); break; - case IROp::VSqrt: - mips->v[inst->dest] = sqrtf(mips->v[inst->src1]); + case IROp::FCos: + mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]); break; - case IROp::VRSqrt: - mips->v[inst->dest] = 1.0f / sqrtf(mips->v[inst->src1]); + case IROp::FRSqrt: + mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]); break; - case IROp::VRecip: - mips->v[inst->dest] = 1.0f / mips->v[inst->src1]; + case IROp::FRecip: + mips->f[inst->dest] = 1.0f / mips->f[inst->src1]; break; - case IROp::VAsin: - mips->v[inst->dest] = vfpu_asin(mips->v[inst->src1]); + case IROp::FAsin: + mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]); break; case IROp::ShlImm: @@ -378,13 +366,6 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); break; - case IROp::VMovFromGPR: - memcpy(&mips->v[inst->dest], &mips->r[inst->src1], 4); - break; - case IROp::VMovToGPR: - memcpy(&mips->r[inst->dest], &mips->v[inst->src1], 4); - break; - case IROp::ExitToConst: return constPool[inst->dest]; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index d7c93593f9ab..e846c8420a1c 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -110,7 +110,6 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { gpr.SetImm(inst.dest, constants[inst.src1]); break; case IROp::SetConstF: - case IROp::SetConstV: goto doDefault; case IROp::Sub: @@ -251,23 +250,10 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; - case IROp::VMovFromGPR: - if (gpr.IsImm(inst.src1)) { - out.Write(IROp::SetConstV, inst.dest, out.AddConstant(gpr.GetImm(inst.src1))); - } else { - gpr.MapIn(inst.src1); - goto doDefault; - } - break; - case IROp::FMovToGPR: gpr.MapDirty(inst.dest); goto doDefault; - case IROp::VMovToGPR: - gpr.MapDirty(inst.dest); - goto doDefault; - case IROp::MfHi: case IROp::MfLo: gpr.MapDirty(inst.dest); @@ -290,7 +276,6 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; case IROp::StoreFloat: - case IROp::StoreFloatV: case IROp::StoreVec4: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); @@ -314,7 +299,6 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; case IROp::LoadFloat: - case IROp::LoadFloatV: case IROp::LoadVec4: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); @@ -345,17 +329,23 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { std::swap(inst.src1, inst.src2); out.Write(inst); break; + case IROp::FSub: case IROp::FDiv: case IROp::FNeg: case IROp::FAbs: - case IROp::FSqrt: case IROp::FMov: case IROp::FRound: case IROp::FTrunc: case IROp::FCeil: case IROp::FFloor: case IROp::FCvtSW: + case IROp::FSin: + case IROp::FCos: + case IROp::FSqrt: + case IROp::FRSqrt: + case IROp::FRecip: + case IROp::FAsin: out.Write(inst); break; @@ -382,15 +372,6 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { out.Write(inst); break; - case IROp::VSin: - case IROp::VCos: - case IROp::VSqrt: - case IROp::VRSqrt: - case IROp::VRecip: - case IROp::VAsin: - out.Write(inst); - break; - case IROp::ZeroFpCond: case IROp::FCmpUnordered: case IROp::FCmpEqual: From b3dd36982f7a92596705f777e5bb42ef7c5eeb57 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 10 May 2016 23:14:26 +0200 Subject: [PATCH 47/77] Prefix prep --- Core/MIPS/IR/IRCompVFPU.cpp | 26 +++++++++----------------- Core/MIPS/IR/IRInst.cpp | 2 ++ Core/MIPS/IR/IRInst.h | 3 +++ Core/MIPS/IR/IRInterpreter.cpp | 7 +++++++ 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 8f35cbef86fe..2346473786e8 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -152,17 +152,21 @@ namespace MIPSComp { int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { // Hopefully this is rare, we'll just write it into a reg we drop. - //if (js.VfpuWriteMask(i)) - // regs[i] = fpr.GetTempV(); + if (js.VfpuWriteMask(i)) + regs[i] = fpr.GetTempV(); } } + inline int GetDSat(int prefix, int i) { + return (prefix >> (i * 2)) & 3; + } + + // "D" prefix is really a post process. No need to allocate a temporary register. void IRFrontend::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); if (!js.prefixD) return; - /* int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) @@ -171,23 +175,11 @@ namespace MIPSComp { int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { // clamped = x < 0 ? (x > 1 ? 1 : x) : x [0, 1] - fpr.MapRegV(vregs[i], MAP_DIRTY); - - fp.MOVI2F(S0, 0.0f, SCRATCH1); - fp.MOVI2F(S1, 1.0f, SCRATCH1); - fp.FMIN(fpr.V(vregs[i]), fpr.V(vregs[i]), S1); - fp.FMAX(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); + ir.Write(IROp::FSat0_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]); } else if (sat == 3) { - // clamped = x < -1 ? (x > 1 ? 1 : x) : x [-1, 1] - fpr.MapRegV(vregs[i], MAP_DIRTY); - - fp.MOVI2F(S0, -1.0f, SCRATCH1); - fp.MOVI2F(S1, 1.0f, SCRATCH1); - fp.FMIN(fpr.V(vregs[i]), fpr.V(vregs[i]), S1); - fp.FMAX(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); + ir.Write(IROp::FSatMinus1_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]); } } - */ } void IRFrontend::Comp_SV(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 1f80be40b217..469c97cf5818 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -78,6 +78,8 @@ static const IRMeta irMeta[] = { { IROp::FFloor, "FFloor", "FF" }, { IROp::FCvtWS, "FCvtWS", "FF" }, { IROp::FCvtSW, "FCvtSW", "FF" }, + { IROp::FSat0_1, "FSat(0 - 1)", "FF" }, + { IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" }, { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, { IROp::InitVec4, "InitVec4", "Fv"}, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 1c033b0ce376..322ef2386d3d 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -120,6 +120,9 @@ enum class IROp : u8 { FMovFromGPR, FMovToGPR, + FSat0_1, + FSatMinus1_1, + FpCondToReg, VfpuCtrlToReg, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 6c71682c18a2..f77216ef0c37 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -303,6 +303,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::FNeg: mips->f[inst->dest] = -mips->f[inst->src1]; break; + case IROp::FSat0_1: + mips->f[inst->dest] = clamp_value(mips->f[inst->src1], 0.0f, 1.0f); + break; + case IROp::FSatMinus1_1: + mips->f[inst->dest] = clamp_value(mips->f[inst->src1], -1.0f, 1.0f); + break; + case IROp::FpCondToReg: mips->r[inst->dest] = mips->fpcond; break; From 219548b8e28627cebcb14f550a1f7287643a5796 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 11 May 2016 00:16:07 +0200 Subject: [PATCH 48/77] Prefix prep --- Core/MIPS/IR/IRCompVFPU.cpp | 78 +++++++++++++++++++++------------- Core/MIPS/IR/IRFrontend.h | 16 +++---- Core/MIPS/IR/IRInst.cpp | 19 ++++++--- Core/MIPS/IR/IRInst.h | 17 ++++++-- Core/MIPS/IR/IRInterpreter.cpp | 9 ++++ Core/MIPS/MIPS.h | 15 ++++--- 6 files changed, 96 insertions(+), 58 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 2346473786e8..c9d0083d2eb3 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -88,7 +88,7 @@ namespace MIPSComp { } } - void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { + void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg) { if (prefix == 0xE4) return; @@ -109,13 +109,9 @@ namespace MIPSComp { if (!constants && regnum == i && !abs && !negate) continue; - /* // This puts the value into a temp reg, so we won't write the modified value back. - vregs[i] = fpr.GetTempV(); + vregs[i] = tempReg + i; if (!constants) { - fpr.MapDirtyInV(vregs[i], origV[regnum]); - fpr.SpillLockV(vregs[i]); - // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { @@ -124,36 +120,58 @@ namespace MIPSComp { } if (abs) { - fp.FABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); + ir.Write(IROp::FAbs, vregs[i], origV[regnum]); if (negate) - fp.FNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); + ir.Write(IROp::FNeg, vregs[i], vregs[i]); } else { if (negate) - fp.FNEG(fpr.V(vregs[i]), fpr.V(origV[regnum])); + ir.Write(IROp::FNeg, vregs[i], origV[regnum]); else - fp.FMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); + ir.Write(IROp::FMov, vregs[i], origV[regnum]); } } else { - fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT); - fpr.SpillLockV(vregs[i]); - fp.MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs << 2)], SCRATCH1, (bool)negate); + if (negate) { + ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(-constantArray[regnum + (abs << 2)])); + } else { + ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(constantArray[regnum + (abs << 2)])); + } } - */ } } + void IRFrontend::GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg) { + ::GetVectorRegs(regs, N, vectorReg); + ApplyVoffset(regs, N); + } + + void IRFrontend::GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg) { + ::GetMatrixRegs(regs, N, matrixReg); + // TODO + } + + void IRFrontend::GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); + ::GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixS, sz, IRVTEMP_PFX_S); + } + void IRFrontend::GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); + ::GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixT, sz, IRVTEMP_PFX_T); + } + void IRFrontend::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) { _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); GetVectorRegs(regs, sz, vectorReg); + int n = GetNumVectorElements(sz); if (js.prefixD == 0) return; - int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { - // Hopefully this is rare, we'll just write it into a reg we drop. + // Hopefully this is rare, we'll just write it into a dumping ground reg. if (js.VfpuWriteMask(i)) - regs[i] = fpr.GetTempV(); + regs[i] = IRVTEMP_PFX_D + i; } } @@ -171,13 +189,12 @@ namespace MIPSComp { for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) continue; - - int sat = (js.prefixD >> (i * 2)) & 3; + int sat = GetDSat(js.prefixD, i); if (sat == 1) { // clamped = x < 0 ? (x > 1 ? 1 : x) : x [0, 1] - ir.Write(IROp::FSat0_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]); + ir.Write(IROp::FSat0_1, vregs[i], vregs[i]); } else if (sat == 3) { - ir.Write(IROp::FSatMinus1_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]); + ir.Write(IROp::FSatMinus1_1, vregs[i], vregs[i]); } } } @@ -207,7 +224,6 @@ namespace MIPSComp { u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); - ApplyVoffset(vregs, 4); // Translate to memory order switch (op >> 26) { case 54: //lv.q @@ -251,9 +267,11 @@ namespace MIPSComp { if (sz == 4 && IsVectorColumn(vd)) { u8 dregs[4]; GetVectorRegs(dregs, sz, vd); - ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); + ir.Write(IROp::InitVec4, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); } else if (sz == 1) { - ir.Write(IROp::SetConstF, vfpuBase + voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); + u8 dreg; + GetVectorRegs(&dreg, V_Single, vd); + ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); } else { DISABLE; } @@ -275,7 +293,7 @@ namespace MIPSComp { GetVectorRegs(dregs, sz, vd); int row = vd & 3; Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); - ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)init); + ir.Write(IROp::InitVec4, dregs[0], (int)init); } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { @@ -311,7 +329,7 @@ namespace MIPSComp { default: return; } - ir.Write(IROp::InitVec4, vfpuBase + voffset[vec[0]], (int)init); + ir.Write(IROp::InitVec4, vec[0], (int)init); } return; } @@ -440,12 +458,14 @@ namespace MIPSComp { } void IRFrontend::Comp_Viim(MIPSOpcode op) { - if (!js.HasNoPrefix()) + if (!js.HasUnknownPrefix()) DISABLE; - u8 dreg = _VT; s32 imm = (s32)(s16)(u16)(op & 0xFFFF); - ir.Write(IROp::SetConstF, vfpuBase + voffset[dreg], ir.AddConstantFloat((float)imm)); + u8 dreg; + GetVectorRegsPrefixD(&dreg, V_Single, _VT); + ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat((float)imm)); + ApplyPrefixD(&dreg, V_Single); } void IRFrontend::Comp_Vfim(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h index 9b8db0c76a04..7e813a307d81 100644 --- a/Core/MIPS/IR/IRFrontend.h +++ b/Core/MIPS/IR/IRFrontend.h @@ -115,19 +115,13 @@ class IRFrontend : public MIPSFrontendInterface { void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa); void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst); - void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); + void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg); void ApplyPrefixD(const u8 *vregs, VectorSize sz); - void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { - _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); - GetVectorRegs(regs, sz, vectorReg); - ApplyPrefixST(regs, js.prefixS, sz); - } - void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { - _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); - GetVectorRegs(regs, sz, vectorReg); - ApplyPrefixST(regs, js.prefixT, sz); - } + void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg); + void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg); void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); + void GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg); + void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg); // Utils void Comp_ITypeMemLR(MIPSOpcode op, bool load); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 469c97cf5818..9d41d74fbfdd 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -70,6 +70,12 @@ static const IRMeta irMeta[] = { { IROp::FDiv, "FDiv", "FFF" }, { IROp::FMov, "FMov", "FF" }, { IROp::FSqrt, "FSqrt", "FF" }, + { IROp::FSin, "FSin", "FF" }, + { IROp::FCos, "FCos", "FF" }, + { IROp::FSqrt, "FSqrt", "FF" }, + { IROp::FRSqrt, "FRSqrt", "FF" }, + { IROp::FRecip, "FRecip", "FF" }, + { IROp::FAsin, "FAsin", "FF" }, { IROp::FNeg, "FNeg", "FF" }, { IROp::FAbs, "FAbs", "FF" }, { IROp::FRound, "FRound", "FF" }, @@ -82,17 +88,12 @@ static const IRMeta irMeta[] = { { IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" }, { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, - { IROp::InitVec4, "InitVec4", "Fv"}, { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, - { IROp::FSin, "FSin", "FF" }, - { IROp::FCos, "FCos", "FF" }, - { IROp::FSqrt, "FSqrt", "FF" }, - { IROp::FRSqrt, "FRSqrt", "FF" }, - { IROp::FRecip, "FRecip", "FF" }, - { IROp::FAsin, "FAsin", "FF" }, + { IROp::InitVec4, "InitVec4", "Fv" }, + { IROp::ShuffleVec4, "ShuffleVec4", "FFs" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, @@ -192,6 +193,7 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co "[0 0 1 0]", "[0 0 0 1]", }; + static const char *xyzw = "xyzw"; switch (type) { case 'G': @@ -216,6 +218,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'v': snprintf(buf, bufSize, "%s", initVec4Names[param]); break; + case 's': + snprintf(buf, bufSize, "%s%s%s%s", xyzw[param & 3], xyzw[(param >> 2) & 3], xyzw[(param >> 4) & 3], xyzw[(param >> 6) & 3]); + break; case '_': case '\0': buf[0] = 0; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 322ef2386d3d..e9bbc4acbf3b 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -142,7 +142,11 @@ enum class IROp : u8 { SetCtrlVFPU, + // 4-wide instructions to assist SIMD. + // Can of course add a pass to break them up if a target does not + // support SIMD. InitVec4, + ShuffleVec4, // Slow special functions. Used on singles. FSin, @@ -232,16 +236,21 @@ enum { IRTEMP_LHS, // Reserved for use in branches IRTEMP_RHS, // Reserved for use in branches + IRVTEMP_PFX_S = 224 - 32, // Relative to the FP regs + IRVTEMP_PFX_T = 228 - 32, + IRVTEMP_PFX_D = 232 - 32, + IRVTEMP_0 = 236 - 32, + // 16 float temps for vector S and T prefixes and things like that. // IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0] // Hacky way to get to other state IRREG_VFPU_CTRL_BASE = 208, IRREG_VFPU_CC = 211, - IRREG_LO = 226, // offset of lo in MIPSState / 4 - IRREG_HI = 227, - IRREG_FCR31 = 228, - IRREG_FPCOND = 229, + IRREG_LO = 242, // offset of lo in MIPSState / 4 + IRREG_HI = 243, + IRREG_FCR31 = 244, + IRREG_FPCOND = 245, }; struct IRMeta { diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index f77216ef0c37..68740ad0a672 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -144,6 +144,15 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c #endif break; + case IROp::ShuffleVec4: + { + // Can't use the SSE shuffle here because it takes an immediate. + // Backends with SSE support could use that though. + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)]; + break; + } + case IROp::FSin: mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]); break; diff --git a/Core/MIPS/MIPS.h b/Core/MIPS/MIPS.h index d3a01f1bde31..f51644fd7685 100644 --- a/Core/MIPS/MIPS.h +++ b/Core/MIPS/MIPS.h @@ -172,23 +172,24 @@ class MIPSState // However, the IR interpreter needs some temps that can stick around between ops. // Can be indexed through r[] using indices 192+. u32 t[16]; //192 - // float vt[16]; //208 TODO: VFPU temp // If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code. u32 vfpuCtrl[16]; // 208 + float vt[16]; //224 TODO: VFPU temp + // ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct. - u32 padLoHi; // 224 + u32 padLoHi; // 240 union { struct { - u32 pc; //225 + u32 pc; //241 - u32 lo; //226 - u32 hi; //227 + u32 lo; //242 + u32 hi; //243 - u32 fcr31; //fpu control register - u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23) + u32 fcr31; //244 fpu control register + u32 fpcond; //245 cache the cond flag of fcr31 (& 1 << 23) }; u32 other[6]; }; From 2cbfb192c4bc52e7167e927a9d1402d722134ef7 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 12:17:25 +0200 Subject: [PATCH 49/77] IR: Lots more VFPU support, some with SIMD --- Core/MIPS/IR/IRCompVFPU.cpp | 602 ++++++++++++++++++++++++++++++-- Core/MIPS/IR/IRFrontend.h | 4 +- Core/MIPS/IR/IRInst.cpp | 16 +- Core/MIPS/IR/IRInst.h | 19 +- Core/MIPS/IR/IRInterpreter.cpp | 76 +++- Core/MIPS/IR/IRJit.h | 8 +- Core/MIPS/IR/IRPassSimplify.cpp | 8 +- Core/MIPS/IR/IRPassSimplify.h | 2 +- Core/MIPS/MIPSVFPUUtils.cpp | 4 + Core/MIPS/x86/CompVFPU.cpp | 3 +- 10 files changed, 699 insertions(+), 43 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index c9d0083d2eb3..b9620f822b9d 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -65,6 +65,26 @@ namespace MIPSComp { regs[3] == regs[2] + 1; } + // Vector regs can overlap in all sorts of swizzled ways. + // This does allow a single overlap in sregs[i]. + static bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) { + for (int i = 0; i < sn; ++i) { + if (sregs[i] == dreg && i != di) + return false; + } + for (int i = 0; i < tn; ++i) { + if (tregs[i] == dreg) + return false; + } + + // Hurray, no overlap, we can write directly. + return true; + } + + static bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) { + return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg; + } + void IRFrontend::Comp_VPFX(MIPSOpcode op) { CONDITIONAL_DISABLE; int data = op & 0xFFFFF; @@ -146,17 +166,19 @@ namespace MIPSComp { void IRFrontend::GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg) { ::GetMatrixRegs(regs, N, matrixReg); - // TODO + for (int i = 0; i < GetMatrixSide(N); i++) { + ApplyVoffset(regs + 4 * i, GetVectorSize(N)); + } } void IRFrontend::GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); - ::GetVectorRegs(regs, sz, vectorReg); + GetVectorRegs(regs, sz, vectorReg); ApplyPrefixST(regs, js.prefixS, sz, IRVTEMP_PFX_S); } void IRFrontend::GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); - ::GetVectorRegs(regs, sz, vectorReg); + GetVectorRegs(regs, sz, vectorReg); ApplyPrefixST(regs, js.prefixT, sz, IRVTEMP_PFX_T); } @@ -179,7 +201,8 @@ namespace MIPSComp { return (prefix >> (i * 2)) & 3; } - // "D" prefix is really a post process. No need to allocate a temporary register. + // "D" prefix is really a post process. No need to allocate a temporary register (except + // dummies to simulate writemask, which is done in GetVectorRegsPrefixD void IRFrontend::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); if (!js.prefixD) @@ -263,17 +286,15 @@ namespace MIPSComp { VectorSize sz = GetVecSize(op); int type = (op >> 16) & 0xF; int vd = _VD; - + int n = GetNumVectorElements(sz); + u8 dregs[4]; + GetVectorRegs(dregs, sz, vd); if (sz == 4 && IsVectorColumn(vd)) { - u8 dregs[4]; - GetVectorRegs(dregs, sz, vd); - ir.Write(IROp::InitVec4, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); - } else if (sz == 1) { - u8 dreg; - GetVectorRegs(&dreg, V_Single, vd); - ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); + ir.Write(IROp::Vec4Init, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); } else { - DISABLE; + for (int i = 0; i < n; i++) { + ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); + } } } @@ -293,7 +314,7 @@ namespace MIPSComp { GetVectorRegs(dregs, sz, vd); int row = vd & 3; Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); - ir.Write(IROp::InitVec4, dregs[0], (int)init); + ir.Write(IROp::Vec4Init, dregs[0], (int)init); } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { @@ -329,7 +350,7 @@ namespace MIPSComp { default: return; } - ir.Write(IROp::InitVec4, vec[0], (int)init); + ir.Write(IROp::Vec4Init, vec[0], (int)init); } return; } @@ -345,24 +366,312 @@ namespace MIPSComp { } void IRFrontend::Comp_VDot(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + + // TODO: Force read one of them into regs? probably not. + u8 sregs[4], tregs[4], dregs[1]; + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixT(tregs, sz, vt); + GetVectorRegsPrefixD(dregs, V_Single, vd); + + int temp0 = IRVTEMP_0; + int temp1 = IRVTEMP_0 + 1; + ir.Write(IROp::FMul, temp0, sregs[0], tregs[0]); + int n = GetNumVectorElements(sz); + for (int i = 1; i < n; i++) { + ir.Write(IROp::FMul, temp1, sregs[i], tregs[i]); + ir.Write(IROp::FAdd, i == (n - 1) ? dregs[0] : temp0, temp0, temp1); + } + ApplyPrefixD(dregs, V_Single); } void IRFrontend::Comp_VecDo3(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix()) + DISABLE; + + // Check that we can support the ops, and prepare temporary values for ops that need it. + bool allowSIMD = true; + switch (op >> 26) { + case 24: //VFPU0 + switch ((op >> 23) & 7) { + case 0: // d[i] = s[i] + t[i]; break; //vadd + case 1: // d[i] = s[i] - t[i]; break; //vsub + case 7: // d[i] = s[i] / t[i]; break; //vdiv + break; + default: + DISABLE; + } + break; + case 25: //VFPU1 + switch ((op >> 23) & 7) { + case 0: // d[i] = s[i] * t[i]; break; //vmul + break; + default: + DISABLE; + } + break; + case 27: //VFPU3 + switch ((op >> 23) & 7) { + case 2: // vmin + case 3: // vmax + allowSIMD = false; + break; + case 6: // vsge + case 7: // vslt + allowSIMD = false; + break; + default: + DISABLE; + } + break; + default: + DISABLE; + break; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], tregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + GetVectorRegsPrefixD(dregs, sz, _VD); + + if (allowSIMD && sz == V_Quad && IsConsecutive4(sregs) && IsConsecutive4(dregs) && IsConsecutive4(sregs)) { + IROp opFunc = IROp::Nop; + bool symmetric = false; + switch (op >> 26) { + case 24: //VFPU0 + switch ((op >> 23) & 7) { + case 0: // d[i] = s[i] + t[i]; break; //vadd + opFunc = IROp::Vec4Add; + symmetric = true; + break; + case 1: // d[i] = s[i] - t[i]; break; //vsub + opFunc = IROp::Vec4Sub; + break; + case 7: // d[i] = s[i] / t[i]; break; //vdiv + opFunc = IROp::Vec4Div; + break; + } + break; + case 25: //VFPU1 + switch ((op >> 23) & 7) + { + case 0: // d[i] = s[i] * t[i]; break; //vmul + opFunc = IROp::Vec4Mul; + symmetric = true; + break; + } + break; + case 27: //VFPU3 + switch ((op >> 23) & 7) + { + case 2: // vmin + case 3: // vmax + case 6: // vsge + case 7: // vslt + DISABLE; + break; + } + break; + } + + if (opFunc != IROp::Nop) { + ir.Write(opFunc, dregs[0], sregs[0], tregs[0]); + } + + ApplyPrefixD(dregs, sz); + return; + } + + for (int i = 0; i < n; ++i) { + switch (op >> 26) { + case 24: //VFPU0 + switch ((op >> 23) & 7) { + case 0: // d[i] = s[i] + t[i]; break; //vadd + ir.Write(IROp::FAdd, dregs[i], sregs[i], tregs[i]); + break; + case 1: // d[i] = s[i] - t[i]; break; //vsub + ir.Write(IROp::FSub, dregs[i], sregs[i], tregs[i]); + break; + case 7: // d[i] = s[i] / t[i]; break; //vdiv + ir.Write(IROp::FDiv, dregs[i], sregs[i], tregs[i]); + break; + } + break; + case 25: //VFPU1 + switch ((op >> 23) & 7) { + case 0: // d[i] = s[i] * t[i]; break; //vmul + ir.Write(IROp::FMul, dregs[i], sregs[i], tregs[i]); + break; + } + break; + case 27: //VFPU3 + switch ((op >> 23) & 7) { + case 2: // vmin + ir.Write(IROp::FMin, dregs[i], sregs[i], tregs[i]); + break; + case 3: // vmax + ir.Write(IROp::FMax, dregs[i], sregs[i], tregs[i]); + break; + case 6: // vsge + case 7: // vslt + DISABLE; + break; + } + break; + } + } + + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_VV2Op(MIPSOpcode op) { - CONDITIONAL_DISABLE; - // Eliminate silly no-op VMOVs, common in Wipeout Pure - if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) { + if (js.HasUnknownPrefix()) + DISABLE; + + int vs = _VS; + int vd = _VD; + + // Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure + if (((op >> 16) & 0x1f) == 0 && vs == vd && js.HasNoPrefix()) { return; } - DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixD(dregs, sz, vd); + + bool canSIMD = false; + // Some can be SIMD'd. + switch ((op >> 16) & 0x1f) { + case 0: // vmov + canSIMD = true; + break; + } + + if (canSIMD && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { + switch ((op >> 16) & 0x1f) { + case 0: // vmov + ir.Write(IROp::Vec4Mov, dregs[0], sregs[0]); + break; + } + ApplyPrefixD(dregs, sz); + return; + } + + for (int i = 0; i < n; ++i) { + switch ((op >> 16) & 0x1f) { + case 0: // d[i] = s[i]; break; //vmov + // Probably for swizzle. + ir.Write(IROp::FMov, dregs[i], sregs[i]); + break; + case 1: // d[i] = fabsf(s[i]); break; //vabs + ir.Write(IROp::FAbs, dregs[i], sregs[i]); + break; + case 2: // d[i] = -s[i]; break; //vneg + ir.Write(IROp::FNeg, dregs[i], sregs[i]); + break; + case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 + ir.Write(IROp::FSat0_1, dregs[i], sregs[i]); + break; + case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 + ir.Write(IROp::FSatMinus1_1, dregs[i], sregs[i]); + break; + case 16: // d[i] = 1.0f / s[i]; break; //vrcp + ir.Write(IROp::FRecip, dregs[i], sregs[i]); + break; + case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq + ir.Write(IROp::FRSqrt, dregs[i], sregs[i]); + break; + case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin + ir.Write(IROp::FSin, dregs[i], sregs[i]); + break; + case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos + ir.Write(IROp::FCos, dregs[i], sregs[i]); + break; + case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 + DISABLE; + break; + case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 + DISABLE; + break; + case 22: // d[i] = sqrtf(s[i]); break; //vsqrt + ir.Write(IROp::FSqrt, dregs[i], sregs[i]); + break; + case 23: // d[i] = asinf(s[i]) / M_PI_2; break; //vasin + ir.Write(IROp::FAsin, dregs[i], sregs[i]); + break; + case 24: // d[i] = -1.0f / s[i]; break; // vnrcp + ir.Write(IROp::FRecip, dregs[i], sregs[i]); + ir.Write(IROp::FNeg, dregs[i], dregs[i]); + break; + case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin + ir.Write(IROp::FSin, dregs[i], sregs[i]); + ir.Write(IROp::FNeg, dregs[i], dregs[i]); + break; + case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 + default: + DISABLE; + break; + } + } + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_Vi2f(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + int imm = (op >> 16) & 0x1f; + const float mult = 1.0f / (float)(1UL << imm); + + u8 sregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, sz, _VD); + + int tempregs[4]; + for (int i = 0; i < n; ++i) { + if (!IsOverlapSafe(dregs[i], i, n, sregs)) { + tempregs[i] = IRVTEMP_PFX_T + i; // Need IRVTEMP_0 for the scaling factor + } else { + tempregs[i] = dregs[i]; + } + } + if (mult != 1.0f) + ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(mult)); + // TODO: Use the SCVTF with builtin scaling where possible. + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCvtSW, tempregs[i], sregs[i]); + if (mult != 1.0f) + ir.Write(IROp::FMul, tempregs[i], tempregs[i], IRVTEMP_0); + } + + for (int i = 0; i < n; ++i) { + if (dregs[i] != tempregs[i]) { + ir.Write(IROp::FMov, dregs[i], tempregs[i]); + } + } + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_Vh2f(MIPSOpcode op) { @@ -414,11 +723,115 @@ namespace MIPSComp { } void IRFrontend::Comp_VScl(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4], treg; + GetVectorRegsPrefixS(sregs, sz, _VS); + // TODO: Prefixes seem strange... + GetVectorRegsPrefixT(&treg, V_Single, _VT); + GetVectorRegsPrefixD(dregs, sz, _VD); + + if (n == 4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { + // In this case, there's zero danger of overlap. + ir.Write(IROp::Vec4Scale, dregs[0], sregs[0], treg); + ApplyPrefixD(dregs, sz); + return; + } + + // For prefixes to work, we just have to ensure that none of the output registers spill + // and that there's no overlap. + int tempregs[4]; + for (int i = 0; i < n; ++i) { + // for vscl, it's fine if dregs[i] = sregs[i] + if (dregs[i] != sregs[i] && !IsOverlapSafe(dregs[i], i, n, sregs)) { + // Need to use temp regs + tempregs[i] = IRVTEMP_0 + i; + } else { + tempregs[i] = dregs[i]; + } + } + + for (int i = 0; i < n; i++) { + ir.Write(IROp::FMul, tempregs[i], sregs[i], treg); + } + + for (int i = 0; i < n; i++) { + // All must be mapped for prefixes to work. + if (dregs[i] != tempregs[i]) { + ir.Write(IROp::FMov, dregs[i], tempregs[i]); + } + } + + ApplyPrefixD(dregs, sz); } + // This may or may not be a win when using the IR interpreter... + // Many more instructions to interpret. void IRFrontend::Comp_Vmmul(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + MatrixSize sz = GetMtxSize(op); + int n = GetMatrixSide(sz); + + MatrixOverlapType soverlap = GetMatrixOverlap(_VS, _VD, sz); + MatrixOverlapType toverlap = GetMatrixOverlap(_VT, _VD, sz); + + u8 sregs[16], tregs[16], dregs[16]; + GetMatrixRegs(sregs, sz, _VS); + GetMatrixRegs(tregs, sz, _VT); + GetMatrixRegs(dregs, sz, _VD); + + if (soverlap || toverlap) { + DISABLE; + } + + if (sz == M_4x4 && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { + logBlocks = 1; + int s0 = IRVTEMP_0; + int s1 = IRVTEMP_PFX_T; + if (!IsConsecutive4(sregs)) { + for (int j = 0; j < 4; j++) { + ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[j * 4]); + for (int i = 1; i < 4; i++) { + ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[j * 4 + i]); + ir.Write(IROp::Vec4Add, s0, s0, s1); + } + ir.Write(IROp::Vec4Mov, dregs[j * 4], s0); + } + return; + } else { + for (int j = 0; j < 4; j++) { + for (int i = 0; i < 4; i++) { + ir.Write(IROp::Vec4Dot, s0 + i, sregs[i], tregs[j * 4]); + } + ir.Write(IROp::Vec4Mov, dregs[j * 4], s0); + } + return; + } + } else { + // logBlocks = 1; + } + + int temp0 = IRVTEMP_0; + int temp1 = IRVTEMP_0 + 1; + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + ir.Write(IROp::FMul, temp0, sregs[b * 4], tregs[a * 4]); + for (int c = 1; c < n; c++) { + ir.Write(IROp::FMul, temp1, sregs[b * 4 + c], tregs[a * 4 + c]); + ir.Write(IROp::FAdd, (c == n - 1) ? dregs[a * 4 + b] : temp0, temp0, temp1); + } + } + } } void IRFrontend::Comp_Vmscl(MIPSOpcode op) { @@ -426,7 +839,78 @@ namespace MIPSComp { } void IRFrontend::Comp_Vtfm(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + VectorSize sz = GetVecSize(op); + MatrixSize msz = GetMtxSize(op); + int n = GetNumVectorElements(sz); + int ins = (op >> 23) & 7; + + bool homogenous = false; + if (n == ins) { + n++; + sz = (VectorSize)((int)(sz)+1); + msz = (MatrixSize)((int)(msz)+1); + homogenous = true; + } + // Otherwise, n should already be ins + 1. + else if (n != ins + 1) { + DISABLE; + } + + u8 sregs[16], dregs[4], tregs[4]; + GetMatrixRegs(sregs, msz, _VS); + GetVectorRegs(tregs, sz, _VT); + GetVectorRegs(dregs, sz, _VD); + + // SIMD-optimized implementations + if (msz == M_4x4 && !homogenous && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { + int s0 = IRVTEMP_0; + int s1 = IRVTEMP_PFX_T; + if (!IsConsecutive4(sregs)) { + ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); + for (int i = 1; i < 4; i++) { + ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]); + ir.Write(IROp::Vec4Add, s0, s0, s1); + } + ir.Write(IROp::Vec4Mov, dregs[0], s0); + return; + } else { + for (int i = 0; i < 4; i++) { + ir.Write(IROp::Vec4Dot, s0 + i, sregs[i], tregs[0]); + } + ir.Write(IROp::Vec4Mov, dregs[0], s0); + return; + } + } else if (msz == M_4x4) { + logBlocks = 1; + } + + // TODO: test overlap, optimize. + int tempregs[4]; + int s0 = IRVTEMP_0; + int temp1 = IRVTEMP_0 + 1; + for (int i = 0; i < n; i++) { + ir.Write(IROp::FMul, s0, sregs[i * 4], tregs[0]); + for (int k = 1; k < n; k++) { + if (!homogenous || k != n - 1) { + ir.Write(IROp::FMul, temp1, sregs[i * 4 + k], tregs[k]); + ir.Write(IROp::FAdd, s0, s0, temp1); + } else { + ir.Write(IROp::FAdd, s0, s0, sregs[i * 4 + k]); + } + } + int temp = IRVTEMP_PFX_T + i; + ir.Write(IROp::FMov, temp, s0); + tempregs[i] = temp; + } + for (int i = 0; i < n; i++) { + u8 temp = tempregs[i]; + ir.Write(IROp::FMov, dregs[i], temp); + } } void IRFrontend::Comp_VCrs(MIPSOpcode op) { @@ -446,19 +930,53 @@ namespace MIPSComp { } void IRFrontend::Comp_VCrossQuat(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], tregs[4], dregs[4]; + GetVectorRegs(sregs, sz, _VS); + GetVectorRegs(tregs, sz, _VT); + GetVectorRegs(dregs, sz, _VD); + + if (sz == V_Triple) { + int temp0 = IRVTEMP_0; + int temp1 = IRVTEMP_0 + 1; + // Compute X + ir.Write(IROp::FMul, temp0, sregs[1], tregs[2]); + ir.Write(IROp::FMul, temp1, sregs[2], tregs[1]); + ir.Write(IROp::FSub, dregs[0], temp0, temp1); + + // Compute Y + ir.Write(IROp::FMul, temp0, sregs[2], tregs[0]); + ir.Write(IROp::FMul, temp1, sregs[0], tregs[2]); + ir.Write(IROp::FSub, dregs[1], temp0, temp1); + + // Compute Z + ir.Write(IROp::FMul, temp0, sregs[0], tregs[1]); + ir.Write(IROp::FMul, temp1, sregs[1], tregs[0]); + ir.Write(IROp::FSub, dregs[2], temp0, temp1); + } else if (sz == V_Quad) { + DISABLE; + } } void IRFrontend::Comp_Vcmp(MIPSOpcode op) { + // Fiendishly hard... DISABLE; } void IRFrontend::Comp_Vcmov(MIPSOpcode op) { + // Fiendishly hard... DISABLE; } void IRFrontend::Comp_Viim(MIPSOpcode op) { - if (!js.HasUnknownPrefix()) + if (js.HasUnknownPrefix()) DISABLE; s32 imm = (s32)(s16)(u16)(op & 0xFFFF); @@ -469,11 +987,37 @@ namespace MIPSComp { } void IRFrontend::Comp_Vfim(MIPSOpcode op) { - DISABLE; + if (js.HasUnknownPrefix()) + DISABLE; + + FP16 half; + half.u = op & 0xFFFF; + FP32 fval = half_to_float_fast5(half); + + u8 dreg; + GetVectorRegsPrefixD(&dreg, V_Single, _VT); + ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat(fval.f)); + ApplyPrefixD(&dreg, V_Single); } void IRFrontend::Comp_Vcst(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix()) + DISABLE; + + int conNum = (op >> 16) & 0x1f; + int vd = _VD; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 dregs[4]; + GetVectorRegsPrefixD(dregs, sz, _VD); + for (int i = 0; i < n; i++) { + ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(cst_constants[conNum])); + } + ApplyPrefixD(dregs, sz); } // Very heavily used by FF:CC. Should be replaced by a fast approximation instead of diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h index 7e813a307d81..7a9a9196120d 100644 --- a/Core/MIPS/IR/IRFrontend.h +++ b/Core/MIPS/IR/IRFrontend.h @@ -95,7 +95,9 @@ class IRFrontend : public MIPSFrontendInterface { void ApplyRoundingMode(bool force = false); void UpdateRoundingMode(); - void EatPrefix() { js.EatPrefix(); } + void EatPrefix() override { + js.EatPrefix(); + } void FlushAll(); void FlushPrefixV(); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 9d41d74fbfdd..92494612201d 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -4,6 +4,7 @@ #include "Core/MIPS/MIPSDebugInterface.h" static const IRMeta irMeta[] = { + { IROp::Nop, "Nop", "" }, { IROp::SetConst, "SetConst", "GC" }, { IROp::SetConstF, "SetConstF", "FC" }, { IROp::Mov, "Mov", "GG" }, @@ -68,6 +69,8 @@ static const IRMeta irMeta[] = { { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, { IROp::FDiv, "FDiv", "FFF" }, + { IROp::FMin, "FMin", "FFF" }, + { IROp::FMax, "FMax", "FFF" }, { IROp::FMov, "FMov", "FF" }, { IROp::FSqrt, "FSqrt", "FF" }, { IROp::FSin, "FSin", "FF" }, @@ -92,8 +95,15 @@ static const IRMeta irMeta[] = { { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, - { IROp::InitVec4, "InitVec4", "Fv" }, - { IROp::ShuffleVec4, "ShuffleVec4", "FFs" }, + { IROp::Vec4Init, "Vec4Init", "Fv" }, + { IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" }, + { IROp::Vec4Mov, "Vec4Mov", "FF" }, + { IROp::Vec4Add, "Vec4Add", "FFF" }, + { IROp::Vec4Sub, "Vec4Sub", "FFF" }, + { IROp::Vec4Div, "Vec4Div", "FFF" }, + { IROp::Vec4Mul, "Vec4Mul", "FFF" }, + { IROp::Vec4Scale, "Vec4Scale", "FFF" }, + { IROp::Vec4Dot, "Vec4Dot", "FFF" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, @@ -219,7 +229,7 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co snprintf(buf, bufSize, "%s", initVec4Names[param]); break; case 's': - snprintf(buf, bufSize, "%s%s%s%s", xyzw[param & 3], xyzw[(param >> 2) & 3], xyzw[(param >> 4) & 3], xyzw[(param >> 6) & 3]); + snprintf(buf, bufSize, "%c%c%c%c", xyzw[param & 3], xyzw[(param >> 2) & 3], xyzw[(param >> 4) & 3], xyzw[(param >> 6) & 3]); break; case '_': case '\0': diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index e9bbc4acbf3b..c296afb96755 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -16,6 +16,8 @@ // MIPS->target JITs. enum class IROp : u8 { + Nop, + SetConst, SetConstF, @@ -103,6 +105,8 @@ enum class IROp : u8 { FSub, FMul, FDiv, + FMin, + FMax, FMov, FSqrt, @@ -134,6 +138,10 @@ enum class IROp : u8 { FCmpLessUnordered, FCmpLessEqualOrdered, FCmpLessEqualUnordered, + FCmpEqualZero, + FCmpNotEqualZero, + + FCmovVfpuCC, // Rounding Mode RestoreRoundingMode, @@ -145,8 +153,15 @@ enum class IROp : u8 { // 4-wide instructions to assist SIMD. // Can of course add a pass to break them up if a target does not // support SIMD. - InitVec4, - ShuffleVec4, + Vec4Init, + Vec4Shuffle, + Vec4Mov, + Vec4Add, + Vec4Sub, + Vec4Mul, + Vec4Div, + Vec4Scale, + Vec4Dot, // Slow special functions. Used on singles. FSin, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 68740ad0a672..4e572745f0a5 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -2,6 +2,9 @@ #include #endif +#include +#include + #include "Core/MemMap.h" #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" @@ -136,7 +139,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } - case IROp::InitVec4: + case IROp::Vec4Init: #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); #else @@ -144,7 +147,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c #endif break; - case IROp::ShuffleVec4: + case IROp::Vec4Shuffle: { // Can't use the SSE shuffle here because it takes an immediate. // Backends with SSE support could use that though. @@ -153,6 +156,69 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } + case IROp::Vec4Mov: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1])); +#else + memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float)); +#endif + break; + + case IROp::Vec4Add: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Sub: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Mul: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Div: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Scale: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; +#endif + break; + + // Not quickly implementable on all platforms, unfortunately. + case IROp::Vec4Dot: + { + float dot = mips->f[inst->src1] * mips->f[inst->src2]; + for (int i = 1; i < 4; i++) + dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; + mips->f[inst->dest] = dot; + break; + } + case IROp::FSin: mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]); break; @@ -299,6 +365,12 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::FDiv: mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2]; break; + case IROp::FMin: + mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]); + break; + case IROp::FMax: + mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]); + break; case IROp::FMov: mips->f[inst->dest] = mips->f[inst->src1]; diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index aa026b0bd8d5..87a8231bff18 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -57,10 +57,14 @@ class IRBlock { void SetInstructions(const std::vector &inst, const std::vector &constants) { instr_ = new IRInst[inst.size()]; numInstructions_ = (u16)inst.size(); - memcpy(instr_, &inst[0], sizeof(IRInst) * inst.size()); + if (!inst.empty()) { + memcpy(instr_, &inst[0], sizeof(IRInst) * inst.size()); + } const_ = new u32[constants.size()]; numConstants_ = (u16)constants.size(); - memcpy(const_, &constants[0], sizeof(u32) * constants.size()); + if (!constants.empty()) { + memcpy(const_, &constants[0], sizeof(u32) * constants.size()); + } } const IRInst *GetInstructions() const { return instr_; } diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index e846c8420a1c..477774f4355a 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -368,7 +368,13 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; - case IROp::InitVec4: + case IROp::Vec4Init: + case IROp::Vec4Add: + case IROp::Vec4Sub: + case IROp::Vec4Mul: + case IROp::Vec4Div: + case IROp::Vec4Scale: + case IROp::Vec4Shuffle: out.Write(inst); break; diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index 5bf3f53fb9eb..72e87ace2150 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -5,4 +5,4 @@ typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out); bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out); -bool PropagateConstants(const IRWriter &in, IRWriter &out); \ No newline at end of file +bool PropagateConstants(const IRWriter &in, IRWriter &out); diff --git a/Core/MIPS/MIPSVFPUUtils.cpp b/Core/MIPS/MIPSVFPUUtils.cpp index 385926d5b421..cd9739be0278 100644 --- a/Core/MIPS/MIPSVFPUUtils.cpp +++ b/Core/MIPS/MIPSVFPUUtils.cpp @@ -395,6 +395,10 @@ MatrixOverlapType GetMatrixOverlap(int mtx1, int mtx2, MatrixSize msize) { if (mtx1 == mtx2) return OVERLAP_EQUAL; + if (msize == M_4x4) { + return (mtx1 == mtx2) ? OVERLAP_EQUAL : OVERLAP_NONE; + } + u8 m1[16]; u8 m2[16]; GetMatrixRegs(m1, msize, mtx1); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 749967f53a61..ec95760d57e4 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -1801,8 +1801,7 @@ void Jit::Comp_Vf2i(MIPSOpcode op) { const double *mult = &mulTableVf2i[imm]; int setMXCSR = -1; - switch ((op >> 21) & 0x1f) - { + switch ((op >> 21) & 0x1f) { case 17: break; //z - truncate. Easy to support. case 16: From cb251ea93fa4296833e4ea7b4eca442bd88bb003 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 12:18:12 +0200 Subject: [PATCH 50/77] Crashfix in savestate (hmmmm...) --- Core/SaveState.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Core/SaveState.cpp b/Core/SaveState.cpp index bf6f30eeeda6..0b4465144c45 100644 --- a/Core/SaveState.cpp +++ b/Core/SaveState.cpp @@ -234,9 +234,14 @@ namespace SaveState if (MIPSComp::jit && p.mode == p.MODE_WRITE) { auto blockCache = MIPSComp::jit->GetBlockCache(); - auto savedBlocks = blockCache->SaveAndClearEmuHackOps(); + std::vector savedBlocks; + if (blockCache) { + savedBlocks = blockCache->SaveAndClearEmuHackOps(); + } Memory::DoState(p); - blockCache->RestoreSavedEmuHackOps(savedBlocks); + if (blockCache) { + blockCache->RestoreSavedEmuHackOps(savedBlocks); + } } else Memory::DoState(p); From 182674cddf7f5f1b03703a1bf14d5b8eee03d558 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 13:10:26 +0200 Subject: [PATCH 51/77] IR: SIMD another matrix orientation. Fix various issues. --- Core/MIPS/IR/IRCompVFPU.cpp | 136 +++++++++++++++++++++++++----------- 1 file changed, 95 insertions(+), 41 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index b9620f822b9d..1670deb967b2 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -34,7 +34,7 @@ // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. // Currently known non working ones should have DISABLE. -// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; } +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; #define DISABLE { Comp_Generic(op); return; } @@ -280,8 +280,9 @@ namespace MIPSComp { } void IRFrontend::Comp_VVectorInit(MIPSOpcode op) { - if (!js.HasNoPrefix()) + if (js.HasUnknownPrefix()) { DISABLE; + } VectorSize sz = GetVecSize(op); int type = (op >> 16) & 0xF; @@ -299,8 +300,9 @@ namespace MIPSComp { } void IRFrontend::Comp_VIdt(MIPSOpcode op) { - if (!js.HasNoPrefix()) + if (js.HasUnknownPrefix()) { DISABLE; + } int vd = _VD; VectorSize sz = GetVecSize(op); @@ -447,7 +449,18 @@ namespace MIPSComp { GetVectorRegsPrefixT(tregs, sz, _VT); GetVectorRegsPrefixD(dregs, sz, _VD); - if (allowSIMD && sz == V_Quad && IsConsecutive4(sregs) && IsConsecutive4(dregs) && IsConsecutive4(sregs)) { + int tempregs[4]; + bool usingTemps = false; + for (int i = 0; i < n; i++) { + if (!IsOverlapSafe(dregs[i], i, n, sregs, n, tregs)) { + tempregs[i] = IRVTEMP_0 + i; + usingTemps = true; + } else { + tempregs[i] = dregs[i]; + } + } + + if (allowSIMD && sz == V_Quad && !usingTemps && IsConsecutive4(sregs) && IsConsecutive4(dregs) && IsConsecutive4(sregs)) { IROp opFunc = IROp::Nop; bool symmetric = false; switch (op >> 26) { @@ -490,7 +503,6 @@ namespace MIPSComp { if (opFunc != IROp::Nop) { ir.Write(opFunc, dregs[0], sregs[0], tregs[0]); } - ApplyPrefixD(dregs, sz); return; } @@ -500,30 +512,30 @@ namespace MIPSComp { case 24: //VFPU0 switch ((op >> 23) & 7) { case 0: // d[i] = s[i] + t[i]; break; //vadd - ir.Write(IROp::FAdd, dregs[i], sregs[i], tregs[i]); + ir.Write(IROp::FAdd, tempregs[i], sregs[i], tregs[i]); break; case 1: // d[i] = s[i] - t[i]; break; //vsub - ir.Write(IROp::FSub, dregs[i], sregs[i], tregs[i]); + ir.Write(IROp::FSub, tempregs[i], sregs[i], tregs[i]); break; case 7: // d[i] = s[i] / t[i]; break; //vdiv - ir.Write(IROp::FDiv, dregs[i], sregs[i], tregs[i]); + ir.Write(IROp::FDiv, tempregs[i], sregs[i], tregs[i]); break; } break; case 25: //VFPU1 switch ((op >> 23) & 7) { case 0: // d[i] = s[i] * t[i]; break; //vmul - ir.Write(IROp::FMul, dregs[i], sregs[i], tregs[i]); + ir.Write(IROp::FMul, tempregs[i], sregs[i], tregs[i]); break; } break; case 27: //VFPU3 switch ((op >> 23) & 7) { case 2: // vmin - ir.Write(IROp::FMin, dregs[i], sregs[i], tregs[i]); + ir.Write(IROp::FMin, tempregs[i], sregs[i], tregs[i]); break; case 3: // vmax - ir.Write(IROp::FMax, dregs[i], sregs[i], tregs[i]); + ir.Write(IROp::FMax, tempregs[i], sregs[i], tregs[i]); break; case 6: // vsge case 7: // vslt @@ -534,6 +546,12 @@ namespace MIPSComp { } } + for (int i = 0; i < n; i++) { + if (dregs[i] != tempregs[i]) { + ir.Write(IROp::FMov, dregs[i], tempregs[i]); + } + } + ApplyPrefixD(dregs, sz); } @@ -556,6 +574,17 @@ namespace MIPSComp { GetVectorRegsPrefixS(sregs, sz, vs); GetVectorRegsPrefixD(dregs, sz, vd); + bool usingTemps = false; + int tempregs[4]; + for (int i = 0; i < n; ++i) { + if (!IsOverlapSafe(dregs[i], i, n, sregs)) { + usingTemps = true; + tempregs[i] = IRVTEMP_0 + i; + } else { + tempregs[i] = dregs[i]; + } + } + bool canSIMD = false; // Some can be SIMD'd. switch ((op >> 16) & 0x1f) { @@ -564,7 +593,7 @@ namespace MIPSComp { break; } - if (canSIMD && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { + if (canSIMD && !usingTemps && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { switch ((op >> 16) & 0x1f) { case 0: // vmov ir.Write(IROp::Vec4Mov, dregs[0], sregs[0]); @@ -578,31 +607,31 @@ namespace MIPSComp { switch ((op >> 16) & 0x1f) { case 0: // d[i] = s[i]; break; //vmov // Probably for swizzle. - ir.Write(IROp::FMov, dregs[i], sregs[i]); + ir.Write(IROp::FMov, tempregs[i], sregs[i]); break; case 1: // d[i] = fabsf(s[i]); break; //vabs - ir.Write(IROp::FAbs, dregs[i], sregs[i]); + ir.Write(IROp::FAbs, tempregs[i], sregs[i]); break; case 2: // d[i] = -s[i]; break; //vneg - ir.Write(IROp::FNeg, dregs[i], sregs[i]); + ir.Write(IROp::FNeg, tempregs[i], sregs[i]); break; case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 - ir.Write(IROp::FSat0_1, dregs[i], sregs[i]); + ir.Write(IROp::FSat0_1, tempregs[i], sregs[i]); break; case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 - ir.Write(IROp::FSatMinus1_1, dregs[i], sregs[i]); + ir.Write(IROp::FSatMinus1_1, tempregs[i], sregs[i]); break; case 16: // d[i] = 1.0f / s[i]; break; //vrcp - ir.Write(IROp::FRecip, dregs[i], sregs[i]); + ir.Write(IROp::FRecip, tempregs[i], sregs[i]); break; case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq - ir.Write(IROp::FRSqrt, dregs[i], sregs[i]); + ir.Write(IROp::FRSqrt, tempregs[i], sregs[i]); break; case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin - ir.Write(IROp::FSin, dregs[i], sregs[i]); + ir.Write(IROp::FSin, tempregs[i], sregs[i]); break; case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos - ir.Write(IROp::FCos, dregs[i], sregs[i]); + ir.Write(IROp::FCos, tempregs[i], sregs[i]); break; case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 DISABLE; @@ -611,18 +640,18 @@ namespace MIPSComp { DISABLE; break; case 22: // d[i] = sqrtf(s[i]); break; //vsqrt - ir.Write(IROp::FSqrt, dregs[i], sregs[i]); + ir.Write(IROp::FSqrt, tempregs[i], sregs[i]); break; case 23: // d[i] = asinf(s[i]) / M_PI_2; break; //vasin - ir.Write(IROp::FAsin, dregs[i], sregs[i]); + ir.Write(IROp::FAsin, tempregs[i], sregs[i]); break; case 24: // d[i] = -1.0f / s[i]; break; // vnrcp - ir.Write(IROp::FRecip, dregs[i], sregs[i]); - ir.Write(IROp::FNeg, dregs[i], dregs[i]); + ir.Write(IROp::FRecip, tempregs[i], sregs[i]); + ir.Write(IROp::FNeg, tempregs[i], tempregs[i]); break; case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin - ir.Write(IROp::FSin, dregs[i], sregs[i]); - ir.Write(IROp::FNeg, dregs[i], dregs[i]); + ir.Write(IROp::FSin, tempregs[i], sregs[i]); + ir.Write(IROp::FNeg, tempregs[i], tempregs[i]); break; case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 default: @@ -630,6 +659,12 @@ namespace MIPSComp { break; } } + for (int i = 0; i < n; i++) { + if (dregs[i] != tempregs[i]) { + ir.Write(IROp::FMov, dregs[i], tempregs[i]); + } + } + ApplyPrefixD(dregs, sz); } @@ -782,20 +817,32 @@ namespace MIPSComp { MatrixSize sz = GetMtxSize(op); int n = GetMatrixSide(sz); - MatrixOverlapType soverlap = GetMatrixOverlap(_VS, _VD, sz); - MatrixOverlapType toverlap = GetMatrixOverlap(_VT, _VD, sz); + int vs = _VS; + int vd = _VD; + int vt = _VT; + MatrixOverlapType soverlap = GetMatrixOverlap(vs, vd, sz); + MatrixOverlapType toverlap = GetMatrixOverlap(vt, vd, sz); + + // A very common arrangment. Rearrange to something we can handle. + if (IsMatrixTransposed(vd) && !IsMatrixTransposed(vs) && IsMatrixTransposed(vt)) { + // Matrix identity says (At * Bt) = (B * A)t + // D = S * T + // Dt = (S * T)t = (Tt * St) + vd = TransposeMatrixReg(vd); + std::swap(vs, vt); + } u8 sregs[16], tregs[16], dregs[16]; - GetMatrixRegs(sregs, sz, _VS); - GetMatrixRegs(tregs, sz, _VT); - GetMatrixRegs(dregs, sz, _VD); + GetMatrixRegs(sregs, sz, vs); + GetMatrixRegs(tregs, sz, vt); + GetMatrixRegs(dregs, sz, vd); if (soverlap || toverlap) { DISABLE; } - if (sz == M_4x4 && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { - logBlocks = 1; + // TODO: The interpreter would like proper matrix ops better. Can generate those, and + // expand them like this as needed on "real" architectures. int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_T; if (!IsConsecutive4(sregs)) { @@ -817,10 +864,12 @@ namespace MIPSComp { } return; } - } else { - // logBlocks = 1; + } else if (sz == M_4x4) { + // Tekken 6 has a case here: MEE + logBlocks = 1; } + // Fallback. Expands a LOT int temp0 = IRVTEMP_0; int temp1 = IRVTEMP_0 + 1; for (int a = 0; a < n; a++) { @@ -867,18 +916,23 @@ namespace MIPSComp { GetVectorRegs(dregs, sz, _VD); // SIMD-optimized implementations - if (msz == M_4x4 && !homogenous && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { + if (msz == M_4x4 && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_T; if (!IsConsecutive4(sregs)) { ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); for (int i = 1; i < 4; i++) { - ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]); - ir.Write(IROp::Vec4Add, s0, s0, s1); + if (!homogenous || (i != n - 1)) { + ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]); + ir.Write(IROp::Vec4Add, s0, s0, s1); + } else { + logBlocks = 1; + ir.Write(IROp::Vec4Add, s0, s0, sregs[i]); + } } ir.Write(IROp::Vec4Mov, dregs[0], s0); return; - } else { + } else if (!homogenous) { for (int i = 0; i < 4; i++) { ir.Write(IROp::Vec4Dot, s0 + i, sregs[i], tregs[0]); } @@ -886,7 +940,7 @@ namespace MIPSComp { return; } } else if (msz == M_4x4) { - logBlocks = 1; + // logBlocks = 1; } // TODO: test overlap, optimize. From 850d0abc91fb29aece41586a3c9e35434c7ac494 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 20:16:15 +0200 Subject: [PATCH 52/77] IR: More VFPU. Support normal fp compares. --- Core/MIPS/IR/IRCompFPU.cpp | 23 ++-- Core/MIPS/IR/IRCompVFPU.cpp | 225 ++++++++++++++++++++++++++++---- Core/MIPS/IR/IRFrontend.cpp | 3 +- Core/MIPS/IR/IRFrontend.h | 8 +- Core/MIPS/IR/IRInst.cpp | 6 + Core/MIPS/IR/IRInst.h | 23 ++-- Core/MIPS/IR/IRInterpreter.cpp | 27 ++++ Core/MIPS/IR/IRPassSimplify.cpp | 8 +- 8 files changed, 261 insertions(+), 62 deletions(-) diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 068a58013a87..f046704ac7c4 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -94,7 +94,7 @@ void IRFrontend::Comp_FPULS(MIPSOpcode op) { } void IRFrontend::Comp_FPUComp(MIPSOpcode op) { - DISABLE; // IROps not yet implemented + CONDITIONAL_DISABLE; int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias @@ -105,35 +105,34 @@ void IRFrontend::Comp_FPUComp(MIPSOpcode op) { int fs = _FS; int ft = _FT; - - IROp irOp; + IRFpCompareMode mode; switch (opc) { case 1: // un, ngle (unordered) - irOp = IROp::FCmpUnordered; + mode = IRFpCompareMode::NotEqualUnordered; break; case 2: // eq, seq (equal, ordered) - irOp = IROp::FCmpEqual; + mode = IRFpCompareMode::EqualOrdered; break; case 3: // ueq, ngl (equal, unordered) - irOp = IROp::FCmpEqualUnordered; + mode = IRFpCompareMode::EqualUnordered; return; case 4: // olt, lt (less than, ordered) - irOp = IROp::FCmpLessOrdered; + mode = IRFpCompareMode::LessOrdered; break; case 5: // ult, nge (less than, unordered) - irOp = IROp::FCmpLessUnordered; + mode = IRFpCompareMode::LessUnordered; break; case 6: // ole, le (less equal, ordered) - irOp = IROp::FCmpLessEqualOrdered; + mode = IRFpCompareMode::LessEqualOrdered; break; case 7: // ule, ngt (less equal, unordered) - irOp = IROp::FCmpLessEqualUnordered; + mode = IRFpCompareMode::LessEqualUnordered; break; default: - Comp_Generic(op); + DISABLE; return; } - ir.Write(irOp, fs, ft); + ir.Write(IROp::FCmp, (int)mode, fs, ft); } void IRFrontend::Comp_FPU2op(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 1670deb967b2..3478114f8ffa 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -81,8 +81,8 @@ namespace MIPSComp { return true; } - static bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) { - return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg; + static bool IsOverlapSafe(int dreg, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) { + return IsOverlapSafeAllowS(dreg, -1, sn, sregs, tn, tregs); } void IRFrontend::Comp_VPFX(MIPSOpcode op) { @@ -364,7 +364,35 @@ namespace MIPSComp { static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f }; void IRFrontend::Comp_Vhoriz(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[1]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, V_Single, _VD); + + // We have to start at +0.000 in case any values are -0.000. + ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(0.0f)); + for (int i = 0; i < n; ++i) { + ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, sregs[i]); + } + + switch ((op >> 16) & 31) { + case 6: // vfad + ir.Write(IROp::FMov, dregs[0], IRVTEMP_0); + break; + case 7: // vavg + ir.Write(IROp::SetConstF, IRVTEMP_0 + 1, ir.AddConstantFloat(vavg_table[n - 1])); + ir.Write(IROp::FMul, dregs[0], IRVTEMP_0, IRVTEMP_0 + 1); + break; + } + + ApplyPrefixD(dregs, V_Single); } void IRFrontend::Comp_VDot(MIPSOpcode op) { @@ -397,7 +425,6 @@ namespace MIPSComp { void IRFrontend::Comp_VecDo3(MIPSOpcode op) { CONDITIONAL_DISABLE; - if (js.HasUnknownPrefix()) DISABLE; @@ -449,10 +476,10 @@ namespace MIPSComp { GetVectorRegsPrefixT(tregs, sz, _VT); GetVectorRegsPrefixD(dregs, sz, _VD); - int tempregs[4]; + u8 tempregs[4]; bool usingTemps = false; for (int i = 0; i < n; i++) { - if (!IsOverlapSafe(dregs[i], i, n, sregs, n, tregs)) { + if (!IsOverlapSafe(dregs[i], n, sregs, n, tregs)) { tempregs[i] = IRVTEMP_0 + i; usingTemps = true; } else { @@ -460,7 +487,7 @@ namespace MIPSComp { } } - if (allowSIMD && sz == V_Quad && !usingTemps && IsConsecutive4(sregs) && IsConsecutive4(dregs) && IsConsecutive4(sregs)) { + if (allowSIMD && sz == V_Quad && !usingTemps && IsConsecutive4(dregs) && IsConsecutive4(sregs) && IsConsecutive4(tregs)) { IROp opFunc = IROp::Nop; bool symmetric = false; switch (op >> 26) { @@ -502,6 +529,8 @@ namespace MIPSComp { if (opFunc != IROp::Nop) { ir.Write(opFunc, dregs[0], sregs[0], tregs[0]); + } else { + DISABLE; } ApplyPrefixD(dregs, sz); return; @@ -558,6 +587,10 @@ namespace MIPSComp { void IRFrontend::Comp_VV2Op(MIPSOpcode op) { if (js.HasUnknownPrefix()) DISABLE; + if (!js.HasNoPrefix()) { + logBlocks = 1; + //DISABLE; // Something subtle is wrong. + } int vs = _VS; int vd = _VD; @@ -577,7 +610,7 @@ namespace MIPSComp { bool usingTemps = false; int tempregs[4]; for (int i = 0; i < n; ++i) { - if (!IsOverlapSafe(dregs[i], i, n, sregs)) { + if (!IsOverlapSafe(dregs[i], n, sregs)) { usingTemps = true; tempregs[i] = IRVTEMP_0 + i; } else { @@ -686,7 +719,7 @@ namespace MIPSComp { int tempregs[4]; for (int i = 0; i < n; ++i) { - if (!IsOverlapSafe(dregs[i], i, n, sregs)) { + if (!IsOverlapSafe(dregs[i], n, sregs)) { tempregs[i] = IRVTEMP_PFX_T + i; // Need IRVTEMP_0 for the scaling factor } else { tempregs[i] = dregs[i]; @@ -697,8 +730,11 @@ namespace MIPSComp { // TODO: Use the SCVTF with builtin scaling where possible. for (int i = 0; i < n; i++) { ir.Write(IROp::FCvtSW, tempregs[i], sregs[i]); - if (mult != 1.0f) + } + if (mult != 1.0f) { + for (int i = 0; i < n; i++) { ir.Write(IROp::FMul, tempregs[i], tempregs[i], IRVTEMP_0); + } } for (int i = 0; i < n; ++i) { @@ -718,6 +754,8 @@ namespace MIPSComp { } void IRFrontend::Comp_Mftv(MIPSOpcode op) { + CONDITIONAL_DISABLE; + int imm = op & 0xFF; MIPSGPReg rt = _RT; switch ((op >> 21) & 0x1f) { @@ -727,7 +765,18 @@ namespace MIPSComp { if (imm < 128) { //R(rt) = VI(imm); ir.Write(IROp::FMovToGPR, rt, vfpuBase + voffset[imm]); } else { - DISABLE; + switch (imm - 128) { + case VFPU_CTRL_DPREFIX: + case VFPU_CTRL_SPREFIX: + case VFPU_CTRL_TPREFIX: + FlushPrefixV(); + break; + } + if (imm - 128 < 16) { + ir.Write(IROp::VfpuCtrlToReg, rt, imm - 128); + } else { + DISABLE; + } } } break; @@ -735,6 +784,8 @@ namespace MIPSComp { case 7: // mtv if (imm < 128) { ir.Write(IROp::FMovFromGPR, vfpuBase + voffset[imm], rt); + } else if ((imm - 128) < 16) { + ir.Write(IROp::SetCtrlVFPU, imm - 128, rt); } else { DISABLE; } @@ -743,18 +794,105 @@ namespace MIPSComp { default: DISABLE; } + // This op is marked not to auto-eat prefix so we must do it manually. + EatPrefix(); } + // Good above + void IRFrontend::Comp_Vmfvc(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + + int vs = _VS; + int imm = op & 0xFF; + if (imm >= 128 && imm < 128 + VFPU_CTRL_MAX) { + //if (imm - 128 == VFPU_CTRL_CC) { + // gpr.MapReg(MIPS_REG_VFPUCC, 0); + // fp.FMOV(fpr.V(vs), gpr.R(MIPS_REG_VFPUCC)); + // } else { + ir.Write(IROp::VfpuCtrlToReg, IRTEMP_0, imm - 128); + ir.Write(IROp::FMovFromGPR, vfpuBase + voffset[vs], IRTEMP_0); + } } void IRFrontend::Comp_Vmtvc(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + int vs = _VS; + int imm = op & 0xFF; + if (imm >= 128 && imm < 128 + VFPU_CTRL_MAX) { + ir.Write(IROp::SetCtrlVFPUFReg, imm - 128, vfpuBase + voffset[vs]); + if (imm - 128 == VFPU_CTRL_SPREFIX) { + js.prefixSFlag = JitState::PREFIX_UNKNOWN; + } else if (imm - 128 == VFPU_CTRL_TPREFIX) { + js.prefixTFlag = JitState::PREFIX_UNKNOWN; + } else if (imm - 128 == VFPU_CTRL_DPREFIX) { + js.prefixDFlag = JitState::PREFIX_UNKNOWN; + } + } } void IRFrontend::Comp_Vmmov(MIPSOpcode op) { + CONDITIONAL_DISABLE; + + int vs = _VS; + int vd = _VD; + // This probably ignores prefixes for all sane intents and purposes. + if (vs == vd) { + // A lot of these no-op matrix moves in Wipeout... Just drop the instruction entirely. + return; + } + + MatrixSize sz = GetMtxSize(op); + if (sz != M_4x4) { + // logBlocks = true; + DISABLE; + } + int n = GetMatrixSide(sz); + + u8 sregs[16], dregs[16]; + GetMatrixRegs(sregs, sz, vs); + GetMatrixRegs(dregs, sz, vd); + + // Rough overlap check. + switch (GetMatrixOverlap(vs, vd, sz)) { + case OVERLAP_EQUAL: + // In-place transpose + DISABLE; + case OVERLAP_PARTIAL: + DISABLE; + case OVERLAP_NONE: + default: + break; + } + if (IsMatrixTransposed(vd) == IsMatrixTransposed(vs) && sz == M_4x4) { + // Untranspose both matrices + if (IsMatrixTransposed(vd)) { + vd = TransposeMatrixReg(vd); + vs = TransposeMatrixReg(vs); + } + // Get the columns + u8 scols[4], dcols[4]; + GetMatrixColumns(vs, sz, scols); + GetMatrixColumns(vd, sz, dcols); + for (int i = 0; i < 4; i++) { + u8 svec[4], dvec[4]; + GetVectorRegs(svec, GetVectorSize(sz), scols[i]); + GetVectorRegs(dvec, GetVectorSize(sz), dcols[i]); + ir.Write(IROp::Vec4Mov, dvec[0], svec[0]); + } + return; + } + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + ir.Write(IROp::FMov, dregs[a * 4 + b], sregs[a * 4 + b]); + } + } + } + + void IRFrontend::Comp_Vmscl(MIPSOpcode op) { DISABLE; + + // TODO: Tricky, can transpose } void IRFrontend::Comp_VScl(MIPSOpcode op) { @@ -772,26 +910,27 @@ namespace MIPSComp { GetVectorRegsPrefixT(&treg, V_Single, _VT); GetVectorRegsPrefixD(dregs, sz, _VD); - if (n == 4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { - // In this case, there's zero danger of overlap. - ir.Write(IROp::Vec4Scale, dregs[0], sregs[0], treg); - ApplyPrefixD(dregs, sz); - return; - } - + bool overlap = false; // For prefixes to work, we just have to ensure that none of the output registers spill // and that there's no overlap. int tempregs[4]; for (int i = 0; i < n; ++i) { - // for vscl, it's fine if dregs[i] = sregs[i] - if (dregs[i] != sregs[i] && !IsOverlapSafe(dregs[i], i, n, sregs)) { + // Conservative, can be improved + if (treg == dregs[i] || !IsOverlapSafe(dregs[i], n, sregs)) { // Need to use temp regs tempregs[i] = IRVTEMP_0 + i; + overlap = true; } else { tempregs[i] = dregs[i]; } } + if (n == 4 && IsConsecutive4(sregs) && IsConsecutive4(dregs) && !overlap) { + ir.Write(IROp::Vec4Scale, dregs[0], sregs[0], treg); + ApplyPrefixD(dregs, sz); + return; + } + for (int i = 0; i < n; i++) { ir.Write(IROp::FMul, tempregs[i], sregs[i], treg); } @@ -866,7 +1005,7 @@ namespace MIPSComp { } } else if (sz == M_4x4) { // Tekken 6 has a case here: MEE - logBlocks = 1; + // logBlocks = 1; } // Fallback. Expands a LOT @@ -883,10 +1022,6 @@ namespace MIPSComp { } } - void IRFrontend::Comp_Vmscl(MIPSOpcode op) { - DISABLE; - } - void IRFrontend::Comp_Vtfm(MIPSOpcode op) { CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) { @@ -926,7 +1061,6 @@ namespace MIPSComp { ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]); ir.Write(IROp::Vec4Add, s0, s0, s1); } else { - logBlocks = 1; ir.Write(IROp::Vec4Add, s0, s0, sregs[i]); } } @@ -1085,7 +1219,40 @@ namespace MIPSComp { } void IRFrontend::Comp_Vocp(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4]; + // Actually, not sure that this instruction accepts an S prefix. We don't apply it in the + // interpreter. But whatever. + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, sz, _VD); + + int tempregs[4]; + for (int i = 0; i < n; ++i) { + if (!IsOverlapSafe(dregs[i], n, sregs)) { + tempregs[i] = IRVTEMP_PFX_T; // using IRTEMP0 for other things + } else { + tempregs[i] = dregs[i]; + } + } + + ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(1.0f)); + for (int i = 0; i < n; ++i) { + ir.Write(IROp::FSub, tempregs[i], IRVTEMP_0, sregs[i]); + } + for (int i = 0; i < n; ++i) { + if (dregs[i] != tempregs[i]) { + ir.Write(IROp::FMov, dregs[i], tempregs[i]); + } + } + + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_ColorConv(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 3b13978b43bf..0a3fc0432909 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -34,7 +34,8 @@ namespace MIPSComp { IRFrontend::IRFrontend(bool startDefaultPrefix) { logBlocks = 0; dontLogBlocks = 0; - js.startDefaultPrefix = startDefaultPrefix; + js.startDefaultPrefix = true; + js.hasSetRounding = false; // js.currentRoundingFunc = convertS0ToSCRATCH1[0]; } diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h index 7a9a9196120d..8ffdf5c9047a 100644 --- a/Core/MIPS/IR/IRFrontend.h +++ b/Core/MIPS/IR/IRFrontend.h @@ -90,15 +90,15 @@ class IRFrontend : public MIPSFrontendInterface { void DoJit(u32 em_address, std::vector &instructions, std::vector &constants); + void EatPrefix() override { + js.EatPrefix(); + } + private: void RestoreRoundingMode(bool force = false); void ApplyRoundingMode(bool force = false); void UpdateRoundingMode(); - void EatPrefix() override { - js.EatPrefix(); - } - void FlushAll(); void FlushPrefixV(); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 92494612201d..74bf24dc4a99 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -87,6 +87,7 @@ static const IRMeta irMeta[] = { { IROp::FFloor, "FFloor", "FF" }, { IROp::FCvtWS, "FCvtWS", "FF" }, { IROp::FCvtSW, "FCvtSW", "FF" }, + { IROp::FCmp, "FCmp", "mFF" }, { IROp::FSat0_1, "FSat(0 - 1)", "FF" }, { IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" }, { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, @@ -94,6 +95,8 @@ static const IRMeta irMeta[] = { { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, + { IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TC" }, + { IROp::SetCtrlVFPUFReg, "SetCtrlVFPUFReg", "TF" }, { IROp::Vec4Init, "Vec4Init", "Fv" }, { IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" }, @@ -222,6 +225,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'I': snprintf(buf, bufSize, "%02x", param); break; + case 'm': + snprintf(buf, bufSize, "%d", param); + break; case 'T': snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]); break; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index c296afb96755..0393eb9d22d7 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -131,15 +131,7 @@ enum class IROp : u8 { VfpuCtrlToReg, ZeroFpCond, - FCmpUnordered, - FCmpEqual, - FCmpEqualUnordered, - FCmpLessOrdered, - FCmpLessUnordered, - FCmpLessEqualOrdered, - FCmpLessEqualUnordered, - FCmpEqualZero, - FCmpNotEqualZero, + FCmp, FCmovVfpuCC, @@ -149,6 +141,8 @@ enum class IROp : u8 { UpdateRoundingMode, SetCtrlVFPU, + SetCtrlVFPUReg, + SetCtrlVFPUFReg, // 4-wide instructions to assist SIMD. // Can of course add a pass to break them up if a target does not @@ -245,6 +239,17 @@ inline IROp ComparisonToExit(IRComparison comp) { } } +enum IRFpCompareMode { + False = 0, + NotEqualUnordered, + EqualOrdered, // eq, seq (equal, ordered) + EqualUnordered, // ueq, ngl (equal, unordered) + LessOrdered, // olt, lt (less than, ordered) + LessUnordered, // ult, nge (less than, unordered) + LessEqualOrdered, // ole, le (less equal, ordered) + LessEqualUnordered, // ule, ngt (less equal, unordered) +}; + enum { IRTEMP_0 = 192, IRTEMP_1, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 4e572745f0a5..63ac1afb99e1 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -421,6 +421,25 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::FFloor: mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1]); break; + case IROp::FCmp: + switch (inst->dest) { + case IRFpCompareMode::False: + mips->fpcond = 0; + break; + case IRFpCompareMode::EqualOrdered: + case IRFpCompareMode::EqualUnordered: + mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2]; + break; + case IRFpCompareMode::LessEqualOrdered: + case IRFpCompareMode::LessEqualUnordered: + mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2]; + break; + case IRFpCompareMode::LessOrdered: + case IRFpCompareMode::LessUnordered: + mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2]; + break; + } + break; case IROp::FCvtSW: mips->f[inst->dest] = (float)mips->fs[inst->src1]; @@ -529,6 +548,14 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c mips->vfpuCtrl[inst->dest] = constPool[inst->src1]; break; + case IROp::SetCtrlVFPUReg: + mips->vfpuCtrl[inst->dest] = mips->r[inst->src1]; + break; + + case IROp::SetCtrlVFPUFReg: + memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4); + break; + default: Crash(); } diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 477774f4355a..d18c92761a00 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -379,13 +379,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::ZeroFpCond: - case IROp::FCmpUnordered: - case IROp::FCmpEqual: - case IROp::FCmpEqualUnordered: - case IROp::FCmpLessOrdered: - case IROp::FCmpLessUnordered: - case IROp::FCmpLessEqualOrdered: - case IROp::FCmpLessEqualUnordered: + case IROp::FCmp: gpr.MapDirty(IRREG_FPCOND); goto doDefault; From c69a8c07dc9b577c82c29e19dd4f62f791551954 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 20:20:59 +0200 Subject: [PATCH 53/77] Forgot this --- Core/MIPS/IR/IRCompVFPU.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 3478114f8ffa..8ca4336c2b1a 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -587,10 +587,6 @@ namespace MIPSComp { void IRFrontend::Comp_VV2Op(MIPSOpcode op) { if (js.HasUnknownPrefix()) DISABLE; - if (!js.HasNoPrefix()) { - logBlocks = 1; - //DISABLE; // Something subtle is wrong. - } int vs = _VS; int vd = _VD; From 1851458628fd5de7f76ebcaa823af499d997fe5e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 20:28:59 +0200 Subject: [PATCH 54/77] Bugfixes --- Core/MIPS/ARM64/Arm64CompVFPU.cpp | 2 ++ Core/MIPS/IR/IRCompVFPU.cpp | 23 +++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index 91500507ae49..3cd5f03d7b90 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -1297,6 +1297,8 @@ namespace MIPSComp { } void Arm64Jit::Comp_Vi2x(MIPSOpcode op) { + CONDITIONAL_DISABLE; + if (!cpu_info.bNEON) { DISABLE; } diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 8ca4336c2b1a..ed2ab8d89b54 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1127,26 +1127,41 @@ namespace MIPSComp { GetVectorRegs(tregs, sz, _VT); GetVectorRegs(dregs, sz, _VD); + int tempregs[4]; + for (int i = 0; i < n; ++i) { + if (!IsOverlapSafe(dregs[i], n, sregs, n, tregs)) { + tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things + } else { + tempregs[i] = dregs[i]; + } + } + if (sz == V_Triple) { int temp0 = IRVTEMP_0; int temp1 = IRVTEMP_0 + 1; // Compute X ir.Write(IROp::FMul, temp0, sregs[1], tregs[2]); ir.Write(IROp::FMul, temp1, sregs[2], tregs[1]); - ir.Write(IROp::FSub, dregs[0], temp0, temp1); + ir.Write(IROp::FSub, tempregs[0], temp0, temp1); // Compute Y ir.Write(IROp::FMul, temp0, sregs[2], tregs[0]); ir.Write(IROp::FMul, temp1, sregs[0], tregs[2]); - ir.Write(IROp::FSub, dregs[1], temp0, temp1); + ir.Write(IROp::FSub, tempregs[1], temp0, temp1); // Compute Z ir.Write(IROp::FMul, temp0, sregs[0], tregs[1]); ir.Write(IROp::FMul, temp1, sregs[1], tregs[0]); - ir.Write(IROp::FSub, dregs[2], temp0, temp1); + ir.Write(IROp::FSub, tempregs[2], temp0, temp1); } else if (sz == V_Quad) { DISABLE; } + + for (int i = 0; i < n; i++) { + if (tempregs[i] != dregs[i]) + ir.Write(IROp::FMov, dregs[i], tempregs[i]); + } + // No D prefix supported } void IRFrontend::Comp_Vcmp(MIPSOpcode op) { @@ -1232,7 +1247,7 @@ namespace MIPSComp { int tempregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafe(dregs[i], n, sregs)) { - tempregs[i] = IRVTEMP_PFX_T; // using IRTEMP0 for other things + tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things } else { tempregs[i] = dregs[i]; } From 7268abec611ba76a4ad218ecbcb537068fc0aa93 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 May 2016 22:35:31 +0200 Subject: [PATCH 55/77] IR: vcmp, vcmov, vhdp --- Core/MIPS/IR/IRCompVFPU.cpp | 93 +++++++++++++++++++++++++++++++--- Core/MIPS/IR/IRInst.cpp | 4 +- Core/MIPS/IR/IRInst.h | 8 +++ Core/MIPS/IR/IRInterpreter.cpp | 46 +++++++++++++++++ 4 files changed, 144 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index ed2ab8d89b54..f4736be521ee 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -358,7 +358,38 @@ namespace MIPSComp { } void IRFrontend::Comp_VHdp(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + + // TODO: Force read one of them into regs? probably not. + u8 sregs[4], tregs[4], dregs[1]; + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixT(tregs, sz, vt); + GetVectorRegsPrefixD(dregs, V_Single, vd); + + // TODO: applyprefixST here somehow (shuffle, etc...) + ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]); + + int n = GetNumVectorElements(sz); + for (int i = 1; i < n; i++) { + // sum += s[i]*t[i]; + if (i == n - 1) { + ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, tregs[i]); + } else { + ir.Write(IROp::FMul, IRVTEMP_0 + 1, sregs[i], tregs[i]); + ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, IRVTEMP_0 + 1); + } + } + + ir.Write(IROp::FMov, dregs[0], IRVTEMP_0); + ApplyPrefixD(dregs, V_Single); } static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f }; @@ -840,7 +871,6 @@ namespace MIPSComp { MatrixSize sz = GetMtxSize(op); if (sz != M_4x4) { - // logBlocks = true; DISABLE; } int n = GetMatrixSide(sz); @@ -1165,16 +1195,66 @@ namespace MIPSComp { } void IRFrontend::Comp_Vcmp(MIPSOpcode op) { - // Fiendishly hard... - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + VCondition cond = (VCondition)(op & 0xF); + + u8 sregs[4], tregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + + int mask = 0; + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCmpVfpuBit, cond | (i << 4), sregs[i], tregs[i]); + mask |= (1 << i); + } + ir.Write(IROp::FCmpVfpuAggregate, mask); } void IRFrontend::Comp_Vcmov(MIPSOpcode op) { - // Fiendishly hard... - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + logBlocks = 1; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, sz, _VD); + int tf = (op >> 19) & 1; + int imm3 = (op >> 16) & 7; + + for (int i = 0; i < n; ++i) { + // Simplification: Disable if overlap unsafe + if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) { + DISABLE; + } + } + if (imm3 < 6) { + // Test one bit of CC. This bit decides whether none or all subregisters are copied. + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (imm3) | ((!tf) << 7)); + } + } else { + // Look at the bottom four bits of CC to individually decide if the subregisters should be copied. + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (i) | ((!tf) << 7)); + } + } + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_Viim(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) DISABLE; @@ -1186,6 +1266,7 @@ namespace MIPSComp { } void IRFrontend::Comp_Vfim(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) DISABLE; diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 74bf24dc4a99..38b8f54e2d2a 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -97,7 +97,9 @@ static const IRMeta irMeta[] = { { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, { IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TC" }, { IROp::SetCtrlVFPUFReg, "SetCtrlVFPUFReg", "TF" }, - + { IROp::FCmovVfpuCC, "FCmovVfpuCC", "FFI" }, + { IROp::FCmpVfpuBit, "FCmpVfpuBit", "IFF" }, + { IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", ""}, { IROp::Vec4Init, "Vec4Init", "Fv" }, { IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" }, { IROp::Vec4Mov, "Vec4Mov", "FF" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 0393eb9d22d7..df434eb113ea 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -134,6 +134,8 @@ enum class IROp : u8 { FCmp, FCmovVfpuCC, + FCmpVfpuBit, + FCmpVfpuAggregate, // Rounding Mode RestoreRoundingMode, @@ -157,6 +159,12 @@ enum class IROp : u8 { Vec4Scale, Vec4Dot, + // vx2i + Vec4ExpandU16ToU32Hi, + Vec4ExpandU8ToU32Hi, + Vec4ExpandS16ToS32Hi, + Vec4ExpandS8ToS32Hi, + // Slow special functions. Used on singles. FSin, FCos, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 63ac1afb99e1..1a719046cdff 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -32,6 +32,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c const IRInst *end = inst + count; while (inst != end) { switch (inst->op) { + case IROp::Nop: + _assert_(false); + break; case IROp::SetConst: mips->r[inst->dest] = constPool[inst->src1]; break; @@ -209,6 +212,49 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c #endif break; + case IROp::FCmpVfpuBit: + { + int op = inst->dest & 0xF; + int bit = inst->dest >> 4; + int result = 0; + switch (op) { + case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break; + case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break; + case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break; + case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break; + case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break; + case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break; + case VC_EZ: result = mips->f[inst->src1] == 0.0f; break; + case VC_NZ: result = mips->f[inst->src1] != 0.0f; break; + case VC_TR: result = 1; break; + case VC_FL: result = 0; break; + default: + result = 0; + } + if (result != 0) { + mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit); + } else { + mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit); + } + } + break; + + case IROp::FCmpVfpuAggregate: + { + int mask = inst->dest; + u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC]; + int a = (cc & mask) ? 0x10 : 0x00; + int b = (cc & mask) == mask ? 0x20 : 0x00; + mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;; + } + break; + + case IROp::FCmovVfpuCC: + if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) { + mips->f[inst->dest] = mips->f[inst->src1]; + } + break; + // Not quickly implementable on all platforms, unfortunately. case IROp::Vec4Dot: { From 99468c6fc157c173a537b53d9d514f8617460d94 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 9 May 2016 21:40:46 -0700 Subject: [PATCH 56/77] jit-ir: Optimize out unused temp regs. This way, if constants have made the temp obsolete (common with ins, for example), it won't even get set anymore. --- Core/MIPS/IR/IRFrontend.cpp | 1 + Core/MIPS/IR/IRInst.cpp | 10 ++--- Core/MIPS/IR/IRInst.h | 4 ++ Core/MIPS/IR/IRPassSimplify.cpp | 76 +++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRPassSimplify.h | 1 + 5 files changed, 87 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 0a3fc0432909..3e6c0e755d6f 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -233,6 +233,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v if (true) { static const IRPassFunc passes[] = { &PropagateConstants, + &PurgeTemps, }; if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) logBlocks = 1; diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 38b8f54e2d2a..b95d5fafa3fd 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -60,11 +60,11 @@ static const IRMeta irMeta[] = { { IROp::Load32, "Load32", "GGC" }, { IROp::LoadFloat, "LoadFloat", "FGC" }, { IROp::LoadVec4, "LoadVec4", "FGC" }, - { IROp::Store8, "Store8", "GGC" }, - { IROp::Store16, "Store16", "GGC" }, - { IROp::Store32, "Store32", "GGC" }, - { IROp::StoreFloat, "StoreFloat", "FGC" }, - { IROp::StoreVec4, "StoreVec4", "FGC" }, + { IROp::Store8, "Store8", "GGC", IRFLAG_SRC3 }, + { IROp::Store16, "Store16", "GGC", IRFLAG_SRC3 }, + { IROp::Store32, "Store32", "GGC", IRFLAG_SRC3 }, + { IROp::StoreFloat, "StoreFloat", "FGC", IRFLAG_SRC3 }, + { IROp::StoreVec4, "StoreVec4", "FGC", IRFLAG_SRC3 }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index df434eb113ea..b4e330aadc6a 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -281,6 +281,10 @@ enum { IRREG_FPCOND = 245, }; +enum IRFlags { + IRFLAG_SRC3 = 1, +}; + struct IRMeta { IROp op; const char *name; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index d18c92761a00..abb06fe89c2f 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -433,3 +433,79 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } return logBlocks; } + +bool IRReadsFromGPR(const IRInst &inst, int reg) { + const IRMeta *m = GetIRMeta(inst.op); + + if (m->types[1] == 'G' && inst.src1 == reg) { + return true; + } + if (m->types[2] == 'G' && inst.src2 == reg) { + return true; + } + if ((m->flags & IRFLAG_SRC3) != 0 && m->types[0] == 'G' && inst.src3 == reg) { + return true; + } + if (inst.op == IROp::Interpret) { + return true; + } + return false; +} + +int IRDestGPR(const IRInst &inst) { + const IRMeta *m = GetIRMeta(inst.op); + + if ((m->flags & IRFLAG_SRC3) == 0 && m->types[0] == 'G') { + return inst.dest; + } + return -1; +} + +bool PurgeTemps(const IRWriter &in, IRWriter &out) { + IRRegCache gpr(&out); + + for (u32 value : in.GetConstants()) { + out.AddConstant(value); + } + + bool logBlocks = false; + for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) { + const IRInst &inst = in.GetInstructions()[i]; + + int dest = IRDestGPR(inst); + bool read = true; + switch (dest) { + case IRTEMP_0: + case IRTEMP_1: + case IRTEMP_LHS: + case IRTEMP_RHS: + // Unlike other ops, these don't need to persist between blocks. + // So we consider them not read unless proven read. + read = false; + for (int j = i + 1; j < n; j++) { + const IRInst &laterInst = in.GetInstructions()[j]; + if (IRReadsFromGPR(laterInst, dest)) { + // Read from, so we can't optimize out. + read = true; + break; + } + if (IRDestGPR(laterInst) == dest) { + // Clobbered, we can optimize out. + break; + } + } + break; + + default: + break; + } + + // TODO: VFPU temps? + + if (read) { + out.Write(inst); + } + } + + return logBlocks; +} diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index 72e87ace2150..496f4e6aad86 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -6,3 +6,4 @@ typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out); bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out); bool PropagateConstants(const IRWriter &in, IRWriter &out); +bool PurgeTemps(const IRWriter &in, IRWriter &out); From d06c6c080cf9b7d53aba958efc30681722eba42d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 9 May 2016 22:25:35 -0700 Subject: [PATCH 57/77] jit-ir: Expand unused regs to regular GPRs. --- Core/MIPS/IR/IRInst.cpp | 26 +++++++++++++------------- Core/MIPS/IR/IRInst.h | 7 ++++++- Core/MIPS/IR/IRPassSimplify.cpp | 24 +++++++++++++++++++++--- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index b95d5fafa3fd..11db5bf1bf53 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -33,8 +33,8 @@ static const IRMeta irMeta[] = { { IROp::SltU, "SltU", "GGG" }, { IROp::SltUConst, "SltUConst", "GGC" }, { IROp::Clz, "Clz", "GG" }, - { IROp::MovZ, "MovZ", "GGG" }, - { IROp::MovNZ, "MovNZ", "GGG" }, + { IROp::MovZ, "MovZ", "GGG", IRFLAG_SRC3DST }, + { IROp::MovNZ, "MovNZ", "GGG", IRFLAG_SRC3DST }, { IROp::Max, "Max", "GGG" }, { IROp::Min, "Min", "GGG" }, { IROp::BSwap16, "BSwap16", "GG" }, @@ -112,19 +112,19 @@ static const IRMeta irMeta[] = { { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, - { IROp::ExitToConst, "Exit", "C" }, - { IROp::ExitToConstIfEq, "ExitIfEq", "CGG" }, - { IROp::ExitToConstIfNeq, "ExitIfNeq", "CGG" }, - { IROp::ExitToConstIfGtZ, "ExitIfGtZ", "CG" }, - { IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG" }, - { IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG" }, - { IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG" }, - { IROp::ExitToReg, "ExitToReg", "G" }, - { IROp::Syscall, "Syscall", "_C" }, - { IROp::Break, "Break", ""}, + { IROp::ExitToConst, "Exit", "C", IRFLAG_EXIT }, + { IROp::ExitToConstIfEq, "ExitIfEq", "CGG", IRFLAG_EXIT }, + { IROp::ExitToConstIfNeq, "ExitIfNeq", "CGG", IRFLAG_EXIT }, + { IROp::ExitToConstIfGtZ, "ExitIfGtZ", "CG", IRFLAG_EXIT }, + { IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG", IRFLAG_EXIT }, + { IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG", IRFLAG_EXIT }, + { IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG", IRFLAG_EXIT }, + { IROp::ExitToReg, "ExitToReg", "G", IRFLAG_EXIT | IRFLAG_SRC3 }, + { IROp::Syscall, "Syscall", "_C", IRFLAG_EXIT }, + { IROp::Break, "Break", "", IRFLAG_EXIT}, { IROp::SetPC, "SetPC", "_G" }, { IROp::SetPCConst, "SetPC", "_C" }, - { IROp::CallReplacement, "CallRepl", "_C"}, + { IROp::CallReplacement, "CallRepl", "_C" }, }; const IRMeta *metaIndex[256]; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index b4e330aadc6a..d2195892ba82 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -282,7 +282,12 @@ enum { }; enum IRFlags { - IRFLAG_SRC3 = 1, + // Uses src3, not dest. + IRFLAG_SRC3 = 0x0001, + // Uses src3 AND dest (i.e. mutates dest.) + IRFLAG_SRC3DST = 0x0002, + // Exit instruction (maybe conditional.) + IRFLAG_EXIT = 0x0004, }; struct IRMeta { diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index abb06fe89c2f..f33afef6acc7 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -443,10 +443,10 @@ bool IRReadsFromGPR(const IRInst &inst, int reg) { if (m->types[2] == 'G' && inst.src2 == reg) { return true; } - if ((m->flags & IRFLAG_SRC3) != 0 && m->types[0] == 'G' && inst.src3 == reg) { + if ((m->flags & (IRFLAG_SRC3 | IRFLAG_SRC3DST)) != 0 && m->types[0] == 'G' && inst.src3 == reg) { return true; } - if (inst.op == IROp::Interpret) { + if (inst.op == IROp::Interpret || inst.op == IROp::CallReplacement) { return true; } return false; @@ -474,6 +474,7 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) { int dest = IRDestGPR(inst); bool read = true; + bool readByExit = true; switch (dest) { case IRTEMP_0: case IRTEMP_1: @@ -482,21 +483,38 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) { // Unlike other ops, these don't need to persist between blocks. // So we consider them not read unless proven read. read = false; + readByExit = false; + // Intentional fall-through. + + default: + if (dest > IRTEMP_RHS) { + // These might sometimes be implicitly read/written by other instructions. + break; + } for (int j = i + 1; j < n; j++) { const IRInst &laterInst = in.GetInstructions()[j]; + const IRMeta *m = GetIRMeta(laterInst.op); if (IRReadsFromGPR(laterInst, dest)) { // Read from, so we can't optimize out. read = true; break; } + if (readByExit && (m->flags & IRFLAG_EXIT) != 0) { + read = true; + break; + } + if (IRDestGPR(laterInst) == dest) { // Clobbered, we can optimize out. + // This happens sometimes with temporaries used for constant addresses. + read = false; break; } } break; - default: + // Not a GPR output. + case -1: break; } From 9f183c97ba53c392ea1fd4ac7bb43fad8dd2584b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 18:27:52 -0700 Subject: [PATCH 58/77] jit-ir: Prevent reading ahead for each reg write. --- Core/MIPS/IR/IRPassSimplify.cpp | 131 +++++++++++++++++++++++++------- Core/MIPS/IR/IRPassSimplify.h | 1 + 2 files changed, 106 insertions(+), 26 deletions(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index f33afef6acc7..cdb766eeceaa 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -462,19 +462,45 @@ int IRDestGPR(const IRInst &inst) { } bool PurgeTemps(const IRWriter &in, IRWriter &out) { - IRRegCache gpr(&out); + std::vector insts; + insts.reserve(in.GetInstructions().size()); - for (u32 value : in.GetConstants()) { - out.AddConstant(value); - } + struct Check { + Check(int r, int i, bool rbx) : reg(r), index(i), readByExit(rbx) { + } + + int reg; + int index; + bool readByExit; + }; + std::vector checks; bool logBlocks = false; for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) { const IRInst &inst = in.GetInstructions()[i]; + const IRMeta *m = GetIRMeta(inst.op); + + for (Check &check : checks) { + if (check.reg == 0) { + continue; + } + + if (IRReadsFromGPR(inst, check.reg)) { + // Read from, so we can't optimize out. + check.reg = 0; + } else if (check.readByExit && (m->flags & IRFLAG_EXIT) != 0) { + check.reg = 0; + } else if (IRDestGPR(inst) == check.reg) { + // Clobbered, we can optimize out. + // This happens sometimes with temporaries used for constant addresses. + insts[check.index].op = IROp::Mov; + insts[check.index].dest = 0; + insts[check.index].src1 = 0; + check.reg = 0; + } + } int dest = IRDestGPR(inst); - bool read = true; - bool readByExit = true; switch (dest) { case IRTEMP_0: case IRTEMP_1: @@ -482,45 +508,98 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) { case IRTEMP_RHS: // Unlike other ops, these don't need to persist between blocks. // So we consider them not read unless proven read. - read = false; - readByExit = false; - // Intentional fall-through. + checks.push_back(Check(dest, i, false)); + break; default: if (dest > IRTEMP_RHS) { // These might sometimes be implicitly read/written by other instructions. break; } + checks.push_back(Check(dest, i, true)); + break; + + // Not a GPR output. + case 0: + case -1: + break; + } + + // TODO: VFPU temps? + + insts.push_back(inst); + } + + for (Check &check : checks) { + if (!check.readByExit && check.reg > 0) { + insts[check.index].op = IROp::Mov; + insts[check.index].dest = 0; + insts[check.index].src1 = 0; + } + } + + for (u32 value : in.GetConstants()) { + out.AddConstant(value); + } + for (const IRInst &inst : insts) { + if (inst.op != IROp::Mov || inst.dest != 0 || inst.src1 != 0) { + out.Write(inst); + } + } + + return logBlocks; +} + +bool ReduceLoads(const IRWriter &in, IRWriter &out) { + for (u32 value : in.GetConstants()) { + out.AddConstant(value); + } + + // This tells us to skip an AND op that has been optimized out. + // Maybe we could skip multiple, but that'd slow things down and is pretty uncommon. + int nextSkip = -1; + + bool logBlocks = false; + for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) { + IRInst inst = in.GetInstructions()[i]; + + if (inst.op == IROp::Load32 || inst.op == IROp::Load16 || inst.op == IROp::Load16Ext) { + int dest = IRDestGPR(inst); for (int j = i + 1; j < n; j++) { const IRInst &laterInst = in.GetInstructions()[j]; const IRMeta *m = GetIRMeta(laterInst.op); - if (IRReadsFromGPR(laterInst, dest)) { - // Read from, so we can't optimize out. - read = true; + + if ((m->flags & IRFLAG_EXIT) != 0) { + // Exit, so we can't do the optimization. break; } - if (readByExit && (m->flags & IRFLAG_EXIT) != 0) { - read = true; + if (IRReadsFromGPR(laterInst, dest)) { + if (IRDestGPR(laterInst) == dest && laterInst.op == IROp::AndConst) { + const u32 mask = in.GetConstants()[laterInst.src2]; + // Here we are, maybe we can reduce the load size based on the mask. + if ((mask & 0xffffff00) == 0) { + inst.op = IROp::Load8; + if (mask == 0xff) { + nextSkip = j; + } + } else if ((mask & 0xffff0000) == 0 && inst.op == IROp::Load32) { + inst.op = IROp::Load16; + if (mask == 0xffff) { + nextSkip = j; + } + } + } + // If it was read, we can't do the optimization. break; } - if (IRDestGPR(laterInst) == dest) { - // Clobbered, we can optimize out. - // This happens sometimes with temporaries used for constant addresses. - read = false; + // Someone else wrote, so we can't do the optimization. break; } } - break; - - // Not a GPR output. - case -1: - break; } - // TODO: VFPU temps? - - if (read) { + if (i != nextSkip) { out.Write(inst); } } diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index 496f4e6aad86..d6dd041259c2 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -7,3 +7,4 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri bool PropagateConstants(const IRWriter &in, IRWriter &out); bool PurgeTemps(const IRWriter &in, IRWriter &out); +bool ReduceLoads(const IRWriter &in, IRWriter &out); From 29ed8d22012342f0dc1c57190bf4c17447e18537 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 18:34:27 -0700 Subject: [PATCH 59/77] jit-ir: ExitToReg doesn't write to registers. --- Core/MIPS/IR/IRCompBranch.cpp | 2 +- Core/MIPS/IR/IRFrontend.cpp | 2 +- Core/MIPS/IR/IRInst.cpp | 2 +- Core/MIPS/IR/IRInterpreter.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 76833bf32906..3dd14cef5152 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -363,7 +363,7 @@ void IRFrontend::Comp_JumpReg(MIPSOpcode op) { break; } - ir.Write(IROp::ExitToReg, destReg, 0, 0); + ir.Write(IROp::ExitToReg, 0, destReg, 0); js.compiling = false; } diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 3e6c0e755d6f..5c154c7df980 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -143,7 +143,7 @@ void IRFrontend::Comp_ReplacementFunc(MIPSOpcode op) { } else { ApplyRoundingMode(); ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); - ir.Write(IROp::ExitToReg, MIPS_REG_RA, 0, 0); + ir.Write(IROp::ExitToReg, 0, MIPS_REG_RA, 0); js.compiling = false; } } else { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 11db5bf1bf53..983a4e6a3a12 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -119,7 +119,7 @@ static const IRMeta irMeta[] = { { IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG", IRFLAG_EXIT }, { IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG", IRFLAG_EXIT }, { IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG", IRFLAG_EXIT }, - { IROp::ExitToReg, "ExitToReg", "G", IRFLAG_EXIT | IRFLAG_SRC3 }, + { IROp::ExitToReg, "ExitToReg", "_G", IRFLAG_EXIT }, { IROp::Syscall, "Syscall", "_C", IRFLAG_EXIT }, { IROp::Break, "Break", "", IRFLAG_EXIT}, { IROp::SetPC, "SetPC", "_G" }, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 1a719046cdff..cd5ea342831c 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -523,7 +523,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c return constPool[inst->dest]; case IROp::ExitToReg: - return mips->r[inst->dest]; + return mips->r[inst->src1]; case IROp::ExitToConstIfEq: if (mips->r[inst->src1] == mips->r[inst->src2]) From a8126ca1321e82ebf0aa2db358161e8cf0febd25 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 20:56:47 -0700 Subject: [PATCH 60/77] jit-ir: Add some missing CONDITIONAL_DISABLEs. --- Core/MIPS/IR/IRCompVFPU.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index f4736be521ee..57433a4d11dc 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -223,6 +223,7 @@ namespace MIPSComp { } void IRFrontend::Comp_SV(MIPSOpcode op) { + CONDITIONAL_DISABLE; s32 offset = (signed short)(op & 0xFFFC); int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5); MIPSGPReg rs = _RS; @@ -241,6 +242,7 @@ namespace MIPSComp { } void IRFrontend::Comp_SVQ(MIPSOpcode op) { + CONDITIONAL_DISABLE; int imm = (signed short)(op & 0xFFFC); int vt = (((op >> 16) & 0x1f)) | ((op & 1) << 5); MIPSGPReg rs = _RS; @@ -280,6 +282,7 @@ namespace MIPSComp { } void IRFrontend::Comp_VVectorInit(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) { DISABLE; } @@ -300,6 +303,7 @@ namespace MIPSComp { } void IRFrontend::Comp_VIdt(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) { DISABLE; } @@ -320,6 +324,7 @@ namespace MIPSComp { } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { + CONDITIONAL_DISABLE; MatrixSize sz = GetMtxSize(op); if (sz != M_4x4) { DISABLE; @@ -616,6 +621,7 @@ namespace MIPSComp { } void IRFrontend::Comp_VV2Op(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) DISABLE; From 57b3dbff7e800550517f24fbf44fe511a42263e9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 20:57:10 -0700 Subject: [PATCH 61/77] jit-ir: Avoid flushing on a few Vec4 ops. --- Core/MIPS/IR/IRCompVFPU.cpp | 2 +- Core/MIPS/IR/IRPassSimplify.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 57433a4d11dc..6caefc6e290b 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -85,7 +85,7 @@ namespace MIPSComp { return IsOverlapSafeAllowS(dreg, -1, sn, sregs, tn, tregs); } - void IRFrontend::Comp_VPFX(MIPSOpcode op) { + void IRFrontend::Comp_VPFX(MIPSOpcode op) { CONDITIONAL_DISABLE; int data = op & 0xFFFFF; int regnum = (op >> 24) & 3; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index cdb766eeceaa..d604f6531475 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -369,10 +369,12 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::Vec4Init: + case IROp::Vec4Mov: case IROp::Vec4Add: case IROp::Vec4Sub: case IROp::Vec4Mul: case IROp::Vec4Div: + case IROp::Vec4Dot: case IROp::Vec4Scale: case IROp::Vec4Shuffle: out.Write(inst); @@ -392,6 +394,8 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1); goto doDefault; + case IROp::CallReplacement: + case IROp::Break: case IROp::Syscall: case IROp::Interpret: case IROp::ExitToConst: From 1ddb2fbfb99f73cf8363a15ca018ff9993961235 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 21:02:56 -0700 Subject: [PATCH 62/77] jit-ir: Fix non-SSE Vec4Scale. --- Core/MIPS/IR/IRInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index cd5ea342831c..4141bc66da6e 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -208,7 +208,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2]))); #else for (int i = 0; i < 4; i++) - mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; + mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2]; #endif break; From c11c0465decc8f5e4bb26ce9ce478dc7a27f2de7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 21:25:57 -0700 Subject: [PATCH 63/77] jir-ir: Correct vftm SIMD regs. --- Core/MIPS/IR/IRCompVFPU.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 6caefc6e290b..3d22a608d3e5 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1082,25 +1082,25 @@ namespace MIPSComp { GetVectorRegs(tregs, sz, _VT); GetVectorRegs(dregs, sz, _VD); - // SIMD-optimized implementations - if (msz == M_4x4 && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { + // SIMD-optimized implementations - if sregs[0..3] is consecutive, the rest are too. + if (msz == M_4x4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_T; - if (!IsConsecutive4(sregs)) { + if (!IsConsecutive4(tregs)) { ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); for (int i = 1; i < 4; i++) { if (!homogenous || (i != n - 1)) { - ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]); + ir.Write(IROp::Vec4Scale, s1, sregs[i * 4], tregs[i]); ir.Write(IROp::Vec4Add, s0, s0, s1); } else { - ir.Write(IROp::Vec4Add, s0, s0, sregs[i]); + ir.Write(IROp::Vec4Add, s0, s0, sregs[i * 4]); } } ir.Write(IROp::Vec4Mov, dregs[0], s0); return; } else if (!homogenous) { for (int i = 0; i < 4; i++) { - ir.Write(IROp::Vec4Dot, s0 + i, sregs[i], tregs[0]); + ir.Write(IROp::Vec4Dot, s0 + i, sregs[i * 4], tregs[0]); } ir.Write(IROp::Vec4Mov, dregs[0], s0); return; From f52120353b6ad4fc9b03bf79b6958f2749032f15 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 22:29:31 -0700 Subject: [PATCH 64/77] jit-ir: Apply prefixes for vector init ops. Without this, Gods Eater Burst is horribly broken. --- Core/MIPS/IR/IRCompVFPU.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 3d22a608d3e5..9d2dc7c1fb5c 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -292,14 +292,16 @@ namespace MIPSComp { int vd = _VD; int n = GetNumVectorElements(sz); u8 dregs[4]; - GetVectorRegs(dregs, sz, vd); - if (sz == 4 && IsVectorColumn(vd)) { + GetVectorRegsPrefixD(dregs, sz, vd); + + if (sz == V_Quad && IsConsecutive4(dregs)) { ir.Write(IROp::Vec4Init, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); } else { for (int i = 0; i < n; i++) { ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); } } + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_VIdt(MIPSOpcode op) { @@ -313,14 +315,16 @@ namespace MIPSComp { if (sz != V_Quad) DISABLE; - if (!IsVectorColumn(vd)) - DISABLE; - u8 dregs[4]; - GetVectorRegs(dregs, sz, vd); + GetVectorRegsPrefixD(dregs, sz, vd); + if (!IsConsecutive4(dregs)) { + DISABLE; + } int row = vd & 3; + // Might not be consecutive if masked. Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); ir.Write(IROp::Vec4Init, dregs[0], (int)init); + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { @@ -794,7 +798,7 @@ namespace MIPSComp { switch ((op >> 21) & 0x1f) { case 3: //mfv / mfvc // rt = 0, imm = 255 appears to be used as a CPU interlock by some games. - if (rt != 0) { + if (rt != MIPS_REG_ZERO) { if (imm < 128) { //R(rt) = VI(imm); ir.Write(IROp::FMovToGPR, rt, vfpuBase + voffset[imm]); } else { From 9e3572dc63b76c7eb06ad02efeb7e86786667e2a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 22:38:56 -0700 Subject: [PATCH 65/77] jit-ir: Improve vidt to handle more cases. --- Core/MIPS/IR/IRCompVFPU.cpp | 30 +++++++++++++++++++++--------- Core/MIPS/IR/IRPassSimplify.cpp | 2 +- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 9d2dc7c1fb5c..ce41c19712ec 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -312,18 +312,30 @@ namespace MIPSComp { int vd = _VD; VectorSize sz = GetVecSize(op); - if (sz != V_Quad) - DISABLE; - u8 dregs[4]; GetVectorRegsPrefixD(dregs, sz, vd); - if (!IsConsecutive4(dregs)) { - DISABLE; + + if (sz == 4 && IsConsecutive4(dregs)) { + int row = vd & 3; + Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); + ir.Write(IROp::Vec4Init, dregs[0], (int)init); + } else { + switch (sz) { + case V_Pair: + ir.Write(IROp::SetConstF, dregs[0], ir.AddConstantFloat((vd & 1) == 0 ? 1.0f : 0.0f)); + ir.Write(IROp::SetConstF, dregs[1], ir.AddConstantFloat((vd & 1) == 1 ? 1.0f : 0.0f)); + break; + case V_Quad: + ir.Write(IROp::SetConstF, dregs[0], ir.AddConstantFloat((vd & 3) == 0 ? 1.0f : 0.0f)); + ir.Write(IROp::SetConstF, dregs[1], ir.AddConstantFloat((vd & 3) == 1 ? 1.0f : 0.0f)); + ir.Write(IROp::SetConstF, dregs[2], ir.AddConstantFloat((vd & 3) == 2 ? 1.0f : 0.0f)); + ir.Write(IROp::SetConstF, dregs[3], ir.AddConstantFloat((vd & 3) == 3 ? 1.0f : 0.0f)); + break; + default: + DISABLE; + } } - int row = vd & 3; - // Might not be consecutive if masked. - Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); - ir.Write(IROp::Vec4Init, dregs[0], (int)init); + ApplyPrefixD(dregs, sz); } diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index d604f6531475..de97635ae8ae 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -529,7 +529,7 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) { break; } - // TODO: VFPU temps? + // TODO: VFPU temps? Especially for masked dregs. insts.push_back(inst); } From 7b43e0e59d561c8b410e9224ceffc4b784ce0904 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 12 May 2016 22:53:21 -0700 Subject: [PATCH 66/77] jit-ir: Add nan/inf compares. Without this, Gods Eater Burst crashes before going in game. --- Core/MIPS/IR/IRInterpreter.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 4141bc66da6e..2b2368ec8c8a 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -226,6 +226,12 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break; case VC_EZ: result = mips->f[inst->src1] == 0.0f; break; case VC_NZ: result = mips->f[inst->src1] != 0.0f; break; + case VC_EN: result = my_isnan(mips->f[inst->src1]); break; + case VC_NN: result = !my_isnan(mips->f[inst->src1]); break; + case VC_EI: result = my_isinf(mips->f[inst->src1]); break; + case VC_NI: result = !my_isinf(mips->f[inst->src1]); break; + case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break; + case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break; case VC_TR: result = 1; break; case VC_FL: result = 0; break; default: From 066b0b7fdfc18e70896935ef199eda13f60d04a1 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 13 May 2016 07:59:39 -0700 Subject: [PATCH 67/77] jit-ir: Optimize out beql; break; sequences. These are often used following divs, and are harmless. Things get a bit easier if we just never compile them. --- Core/MIPS/IR/IRCompBranch.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 3dd14cef5152..a6f8b93cd086 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -47,11 +47,12 @@ #define LOOPOPTIMIZATION 0 +#define MIPS_IS_BREAK(op) (((op) & 0xFC00003F) == 13) + using namespace MIPSAnalyst; namespace MIPSComp { - using namespace Arm64Gen; void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { if (js.inDelaySlot) { @@ -66,6 +67,16 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) { MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); + // Often, div/divu are followed by a likely "break" if the divisor was zero. + // Stalling is not really useful for us, so we optimize this out. + if (likely && offset == 4 && MIPS_IS_BREAK(delaySlotOp)) { + // Okay, let's not actually branch at all. We're done here. + EatInstruction(delaySlotOp); + // Let's not double-count the downcount, though. + js.downcountAmount--; + return; + } + int dcAmount = js.downcountAmount; ir.Write(IROp::Downcount, 0, dcAmount & 0xFF, dcAmount >> 8); js.downcountAmount = 0; @@ -136,7 +147,7 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, } void IRFrontend::Comp_RelBranch(MIPSOpcode op) { - // The CC flags here should be opposite of the actual branch becuase they skip the branching action. + // The CC flags here should be opposite of the actual branch because they skip the branching action. switch (op >> 26) { case 4: BranchRSRTComp(op, IRComparison::NotEqual, false); break;//beq case 5: BranchRSRTComp(op, IRComparison::Equal, false); break;//bne From f636b2a315cff83fcfc150af61536c4f02d5d831 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 13 May 2016 19:31:27 +0200 Subject: [PATCH 68/77] Minor build and other fixes --- Core/MIPS/IR/IRCompBranch.cpp | 1 - Core/MIPS/IR/IRCompLoadStore.cpp | 2 -- Core/MIPS/IR/IRCompVFPU.cpp | 7 ++----- Core/MIPS/IR/IRFrontend.cpp | 4 ++-- Core/MIPS/IR/IRInst.h | 1 + Core/MIPS/IR/IRInterpreter.cpp | 8 ++++---- Core/MIPS/IR/IRPassSimplify.cpp | 2 ++ 7 files changed, 11 insertions(+), 14 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index a6f8b93cd086..f7b875100df4 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -192,7 +192,6 @@ void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { int offset = _IMM16 << 2; u32 targetAddr = GetCompilerPC() + offset + 4; - MIPSOpcode delaySlotOp = GetOffsetInstruction(1); ir.Write(IROp::FpCondToReg, IRTEMP_LHS); if (!likely) CompileDelaySlot(); diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index b890f4ff6808..c57f1ec3235f 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -73,7 +73,6 @@ namespace MIPSComp { CONDITIONAL_DISABLE; int offset = (signed short)(op & 0xFFFF); - bool load = false; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; int o = op >> 26; @@ -113,7 +112,6 @@ namespace MIPSComp { case 34: //lwl case 38: //lwr - load = true; case 42: //swl case 46: //swr DISABLE; diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index ce41c19712ec..74590a213a28 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -541,13 +541,11 @@ namespace MIPSComp { if (allowSIMD && sz == V_Quad && !usingTemps && IsConsecutive4(dregs) && IsConsecutive4(sregs) && IsConsecutive4(tregs)) { IROp opFunc = IROp::Nop; - bool symmetric = false; switch (op >> 26) { case 24: //VFPU0 switch ((op >> 23) & 7) { case 0: // d[i] = s[i] + t[i]; break; //vadd opFunc = IROp::Vec4Add; - symmetric = true; break; case 1: // d[i] = s[i] - t[i]; break; //vsub opFunc = IROp::Vec4Sub; @@ -562,7 +560,6 @@ namespace MIPSComp { { case 0: // d[i] = s[i] * t[i]; break; //vmul opFunc = IROp::Vec4Mul; - symmetric = true; break; } break; @@ -1053,7 +1050,7 @@ namespace MIPSComp { } } else if (sz == M_4x4) { // Tekken 6 has a case here: MEE - // logBlocks = 1; + logBlocks = 1; } // Fallback. Expands a LOT @@ -1244,7 +1241,7 @@ namespace MIPSComp { DISABLE; } - logBlocks = 1; + // logBlocks = 1; VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 5c154c7df980..d615f936f986 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -257,7 +257,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v if (logBlocks > 0 && dontLogBlocks == 0) { ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); - for (int i = 0; i < ir.GetInstructions().size(); i++) { + for (size_t i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); ILOG("%s", buf); @@ -267,7 +267,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v if (logBlocks > 0 && dontLogBlocks == 0) { ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); - for (int i = 0; i < code->GetInstructions().size(); i++) { + for (size_t i = 0; i < code->GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], code->GetConstants().data()); ILOG("%s", buf); diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index d2195892ba82..b33bec732eb5 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "Common/CommonTypes.h" #include "Core/MIPS/MIPS.h" diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 2b2368ec8c8a..c9d2b4ed1857 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -146,7 +146,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); #else - memcpy(&mips->f[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float)); + memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float)); #endif break; @@ -247,16 +247,16 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::FCmpVfpuAggregate: { - int mask = inst->dest; + u32 mask = inst->dest; u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC]; int a = (cc & mask) ? 0x10 : 0x00; int b = (cc & mask) == mask ? 0x20 : 0x00; - mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;; + mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b; } break; case IROp::FCmovVfpuCC: - if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) { + if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == (inst->src2 >> 7)) { mips->f[inst->dest] = mips->f[inst->src1]; } break; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index de97635ae8ae..1fd98a3158f9 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -1,3 +1,5 @@ +#include + #include "Common/Log.h" #include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/IR/IRRegCache.h" From 91a6cf5e44e74e40ec740747a00444b543b30e32 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 13 May 2016 20:14:03 +0200 Subject: [PATCH 69/77] Add a couple more passes (2-op, optimize f<->v fp moves) --- Core/MIPS/IR/IRCompVFPU.cpp | 3 +- Core/MIPS/IR/IRFrontend.cpp | 3 +- Core/MIPS/IR/IRPassSimplify.cpp | 108 ++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRPassSimplify.h | 3 + 4 files changed, 114 insertions(+), 3 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 74590a213a28..e0b5ecccd391 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1221,12 +1221,11 @@ namespace MIPSComp { VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); - VCondition cond = (VCondition)(op & 0xF); - u8 sregs[4], tregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixT(tregs, sz, _VT); + VCondition cond = (VCondition)(op & 0xF); int mask = 0; for (int i = 0; i < n; i++) { ir.Write(IROp::FCmpVfpuBit, cond | (i << 4), sregs[i], tregs[i]); diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index d615f936f986..c41c3cac06a8 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -232,8 +232,10 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v IRWriter *code = &ir; if (true) { static const IRPassFunc passes[] = { + &OptimizeFPMoves, &PropagateConstants, &PurgeTemps, + // &ThreeOpToTwoOp, }; if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) logBlocks = 1; @@ -286,5 +288,4 @@ void IRFrontend::Comp_RunBlock(MIPSOpcode op) { ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); } - } // namespace \ No newline at end of file diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 1fd98a3158f9..4f9da4e49561 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -95,6 +95,114 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri return logBlocks; } +bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { + //FMovToGPR a0, f12 + //FMovFromGPR f14, a0 + // to + //FMovToGPR a0, f12 + //FMov f14, f12 + + const u32 *constants = in.GetConstants().data(); + bool logBlocks = false; + IRInst prev; + prev.op = IROp::Nop; + for (int i = 0; i < (int)in.GetInstructions().size(); i++) { + IRInst inst = in.GetInstructions()[i]; + switch (inst.op) { + case IROp::FMovFromGPR: + if (prev.op == IROp::FMovToGPR && prev.dest == inst.src1) { + inst.op = IROp::FMov; + inst.src1 = prev.src1; + out.Write(inst); + logBlocks = true; + } else { + out.Write(inst); + } + break; + default: + // Remap constants to the new reality + const IRMeta *m = GetIRMeta(inst.op); + switch (m->types[0]) { + case 'C': + inst.dest = out.AddConstant(constants[inst.dest]); + break; + } + switch (m->types[1]) { + case 'C': + inst.src1 = out.AddConstant(constants[inst.src1]); + break; + } + switch (m->types[2]) { + case 'C': + inst.src2 = out.AddConstant(constants[inst.src2]); + break; + } + out.Write(inst); + break; + } + prev = inst; + } + return logBlocks; +} + +// Might be useful later on x86. +bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) { + const u32 *constants = in.GetConstants().data(); + bool logBlocks = false; + for (int i = 0; i < (int)in.GetInstructions().size(); i++) { + IRInst inst = in.GetInstructions()[i]; + const IRMeta *meta = GetIRMeta(inst.op); + switch (inst.op) { + case IROp::Sub: + case IROp::Slt: + case IROp::SltU: + case IROp::Add: + case IROp::And: + case IROp::Or: + case IROp::Xor: + if (inst.src1 != inst.dest && inst.src2 != inst.dest) { + out.Write(IROp::Mov, inst.dest, inst.src1); + out.Write(inst.op, inst.dest, inst.dest, inst.src2); + } else { + out.Write(inst); + } + break; + case IROp::FMul: + case IROp::FAdd: + if (inst.src1 != inst.dest && inst.src2 != inst.dest) { + out.Write(IROp::FMov, inst.dest, inst.src1); + out.Write(inst.op, inst.dest, inst.dest, inst.src2); + } else { + out.Write(inst); + } + break; + default: + { + // Remap constants to the new reality + const IRMeta *m = GetIRMeta(inst.op); + switch (m->types[0]) { + case 'C': + inst.dest = out.AddConstant(constants[inst.dest]); + break; + } + switch (m->types[1]) { + case 'C': + inst.src1 = out.AddConstant(constants[inst.src1]); + break; + } + switch (m->types[2]) { + case 'C': + inst.src2 = out.AddConstant(constants[inst.src2]); + break; + } + out.Write(inst); + break; + } + } + } + return logBlocks; +} + bool PropagateConstants(const IRWriter &in, IRWriter &out) { IRRegCache gpr(&out); diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index d6dd041259c2..80b979fbf88a 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -5,6 +5,9 @@ typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out); bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out); +// Block optimizer passes of varying usefulness. bool PropagateConstants(const IRWriter &in, IRWriter &out); bool PurgeTemps(const IRWriter &in, IRWriter &out); bool ReduceLoads(const IRWriter &in, IRWriter &out); +bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out); +bool OptimizeFPMoves(const IRWriter &in, IRWriter &out); From 5b2504120dabb47d1719b7a3902beb8a6825a9a4 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 13 May 2016 20:15:20 +0200 Subject: [PATCH 70/77] Optimize some common prefixes --- Core/MIPS/IR/IRCompVFPU.cpp | 35 +++++++++++++++++++++++++++++---- Core/MIPS/IR/IRInst.cpp | 2 ++ Core/MIPS/IR/IRInst.h | 2 ++ Core/MIPS/IR/IRInterpreter.cpp | 10 ++++++++++ Core/MIPS/IR/IRPassSimplify.cpp | 4 +++- 5 files changed, 48 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index e0b5ecccd391..6c6a52949a6a 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -108,6 +108,13 @@ namespace MIPSComp { } } + static void InitRegs(u8 *vregs, int reg) { + vregs[0] = reg; + vregs[1] = reg + 1; + vregs[2] = reg + 2; + vregs[3] = reg + 3; + } + void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg) { if (prefix == 0xE4) return; @@ -119,6 +126,27 @@ namespace MIPSComp { for (int i = 0; i < n; i++) origV[i] = vregs[i]; + // Some common vector prefixes + if (sz == V_Quad && IsConsecutive4(vregs)) { + if (prefix == 0xF00E4 && IsConsecutive4(vregs)) { + InitRegs(vregs, tempReg); + ir.Write(IROp::Vec4Neg, vregs[0], origV[0]); + return; + } + if (prefix == 0x00FE4 && IsConsecutive4(vregs)) { + InitRegs(vregs, tempReg); + ir.Write(IROp::Vec4Abs, vregs[0], origV[0]); + return; + } + // Pure shuffle + if (prefix == (prefix & 0xFF)) { + InitRegs(vregs, tempReg); + ir.Write(IROp::Vec4Shuffle, vregs[0], origV[0], prefix); + return; + } + } + + // Alright, fall back to the generic approach. for (int i = 0; i < n; i++) { int regnum = (prefix >> (i * 2)) & 3; int abs = (prefix >> (8 + i)) & 1; @@ -395,7 +423,6 @@ namespace MIPSComp { GetVectorRegsPrefixT(tregs, sz, vt); GetVectorRegsPrefixD(dregs, V_Single, vd); - // TODO: applyprefixST here somehow (shuffle, etc...) ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]); int n = GetNumVectorElements(sz); @@ -1050,7 +1077,7 @@ namespace MIPSComp { } } else if (sz == M_4x4) { // Tekken 6 has a case here: MEE - logBlocks = 1; + // logBlocks = 1; } // Fallback. Expands a LOT @@ -1141,8 +1168,8 @@ namespace MIPSComp { tempregs[i] = temp; } for (int i = 0; i < n; i++) { - u8 temp = tempregs[i]; - ir.Write(IROp::FMov, dregs[i], temp); + if (tempregs[i] != dregs[i]) + ir.Write(IROp::FMov, dregs[i], tempregs[i]); } } diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 983a4e6a3a12..dac93617f235 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -109,6 +109,8 @@ static const IRMeta irMeta[] = { { IROp::Vec4Mul, "Vec4Mul", "FFF" }, { IROp::Vec4Scale, "Vec4Scale", "FFF" }, { IROp::Vec4Dot, "Vec4Dot", "FFF" }, + { IROp::Vec4Neg, "Vec4Neg", "FF" }, + { IROp::Vec4Abs, "Vec4Abs", "FF" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index b33bec732eb5..86e1e31ae8b1 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -159,6 +159,8 @@ enum class IROp : u8 { Vec4Div, Vec4Scale, Vec4Dot, + Vec4Neg, + Vec4Abs, // vx2i Vec4ExpandU16ToU32Hi, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index c9d2b4ed1857..07ce9e1d79e2 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -212,6 +212,16 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c #endif break; + case IROp::Vec4Neg: + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = -mips->f[inst->src1 + i]; + break; + + case IROp::Vec4Abs: + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]); + break; + case IROp::FCmpVfpuBit: { int op = inst->dest & 0xF; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 4f9da4e49561..9572db730978 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -114,11 +114,11 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { inst.op = IROp::FMov; inst.src1 = prev.src1; out.Write(inst); - logBlocks = true; } else { out.Write(inst); } break; + default: // Remap constants to the new reality const IRMeta *m = GetIRMeta(inst.op); @@ -487,6 +487,8 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { case IROp::Vec4Dot: case IROp::Vec4Scale: case IROp::Vec4Shuffle: + case IROp::Vec4Neg: + case IROp::Vec4Abs: out.Write(inst); break; From 5923013d659f5b45eeef7660607c853673559c96 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 13 May 2016 20:21:19 +0200 Subject: [PATCH 71/77] Simple workaround for timing issue with coreState after syscall. Also fixes off by one in ForceCheck. --- Core/CoreTiming.cpp | 4 ++-- Core/MIPS/IR/IRInterpreter.cpp | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index 8e816fcc454d..feece5e5b33d 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -567,10 +567,10 @@ void MoveEvents() void ForceCheck() { - int cyclesExecuted = slicelength - currentMIPS->downcount; + int cyclesExecuted = slicelength - currentMIPS->downcount + 1; globalTimer += cyclesExecuted; // This will cause us to check for new events immediately. - currentMIPS->downcount = 0; + currentMIPS->downcount = -1; // But let's not eat a bunch more time in Advance() because of this. slicelength = 0; } diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 07ce9e1d79e2..c0bcdb91ca3c 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -10,6 +10,8 @@ #include "Core/HLE/ReplaceTables.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/MIPSVFPUUtils.h" +#include "Core/System.h" +#include "Core/CoreTiming.h" #include "math/math_util.h" #include "Common/CommonTypes.h" @@ -583,6 +585,8 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c { MIPSOpcode op(constPool[inst->src1]); CallSyscall(op); + if (coreState != CORE_RUNNING) + CoreTiming::ForceCheck(); return mips->pc; } From b7091a8f5da11bb8c02ebdbbf886f9b214134a10 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 13 May 2016 21:02:23 +0200 Subject: [PATCH 72/77] Simplifications and fixes --- Core/CoreTiming.cpp | 4 +-- Core/MIPS/IR/IRCompVFPU.cpp | 23 +++++++++++++---- Core/MIPS/IR/IRPassSimplify.cpp | 44 ++++++--------------------------- 3 files changed, 28 insertions(+), 43 deletions(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index feece5e5b33d..61956cf1e3a1 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -567,12 +567,12 @@ void MoveEvents() void ForceCheck() { - int cyclesExecuted = slicelength - currentMIPS->downcount + 1; + int cyclesExecuted = slicelength - currentMIPS->downcount; globalTimer += cyclesExecuted; // This will cause us to check for new events immediately. currentMIPS->downcount = -1; // But let's not eat a bunch more time in Advance() because of this. - slicelength = 0; + slicelength = 1; } void Advance() diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 6c6a52949a6a..2160175cd286 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -128,12 +128,12 @@ namespace MIPSComp { // Some common vector prefixes if (sz == V_Quad && IsConsecutive4(vregs)) { - if (prefix == 0xF00E4 && IsConsecutive4(vregs)) { + if (prefix == 0xF00E4) { InitRegs(vregs, tempReg); ir.Write(IROp::Vec4Neg, vregs[0], origV[0]); return; } - if (prefix == 0x00FE4 && IsConsecutive4(vregs)) { + if (prefix == 0x00FE4) { InitRegs(vregs, tempReg); ir.Write(IROp::Vec4Abs, vregs[0], origV[0]); return; @@ -1123,7 +1123,7 @@ namespace MIPSComp { GetVectorRegs(dregs, sz, _VD); // SIMD-optimized implementations - if sregs[0..3] is consecutive, the rest are too. - if (msz == M_4x4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { + if (msz == M_4x4 && IsConsecutive4(sregs)) { int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_T; if (!IsConsecutive4(tregs)) { @@ -1136,13 +1136,26 @@ namespace MIPSComp { ir.Write(IROp::Vec4Add, s0, s0, sregs[i * 4]); } } - ir.Write(IROp::Vec4Mov, dregs[0], s0); + + if (IsConsecutive4(dregs)) { + ir.Write(IROp::Vec4Mov, dregs[0], s0); + } else { + for (int i = 0; i < 4; i++) { + ir.Write(IROp::FMov, dregs[i], s0 + i); + } + } return; } else if (!homogenous) { for (int i = 0; i < 4; i++) { ir.Write(IROp::Vec4Dot, s0 + i, sregs[i * 4], tregs[0]); } - ir.Write(IROp::Vec4Mov, dregs[0], s0); + if (IsConsecutive4(dregs)) { + ir.Write(IROp::Vec4Mov, dregs[0], s0); + } else { + for (int i = 0; i < 4; i++) { + ir.Write(IROp::FMov, dregs[i], s0 + i); + } + } return; } } else if (msz == M_4x4) { diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 9572db730978..637e6b22fae4 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -120,28 +120,15 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { break; default: - // Remap constants to the new reality - const IRMeta *m = GetIRMeta(inst.op); - switch (m->types[0]) { - case 'C': - inst.dest = out.AddConstant(constants[inst.dest]); - break; - } - switch (m->types[1]) { - case 'C': - inst.src1 = out.AddConstant(constants[inst.src1]); - break; - } - switch (m->types[2]) { - case 'C': - inst.src2 = out.AddConstant(constants[inst.src2]); - break; - } out.Write(inst); break; } prev = inst; } + // Can reuse the old constants array - not touching constants in this pass. + for (u32 value : in.GetConstants()) { + out.AddConstant(value); + } return logBlocks; } @@ -177,28 +164,13 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) { } break; default: - { - // Remap constants to the new reality - const IRMeta *m = GetIRMeta(inst.op); - switch (m->types[0]) { - case 'C': - inst.dest = out.AddConstant(constants[inst.dest]); - break; - } - switch (m->types[1]) { - case 'C': - inst.src1 = out.AddConstant(constants[inst.src1]); - break; - } - switch (m->types[2]) { - case 'C': - inst.src2 = out.AddConstant(constants[inst.src2]); - break; - } out.Write(inst); break; } - } + } + // Can reuse the old constants array - not touching constants in this pass. + for (u32 value : in.GetConstants()) { + out.AddConstant(value); } return logBlocks; } From b7224e269c822725ff2dc69220f18e6f31a2acf5 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 13 May 2016 19:50:25 -0700 Subject: [PATCH 73/77] Qt: Remove CPU core option from menu. We've removed on Windows too, and this fixes a build error. --- Qt/Core.pro | 2 ++ Qt/mainwindow.cpp | 2 -- Qt/mainwindow.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Qt/Core.pro b/Qt/Core.pro index 13827a029d01..8a23d36e0e41 100644 --- a/Qt/Core.pro +++ b/Qt/Core.pro @@ -38,6 +38,7 @@ SOURCES += $$P/Core/*.cpp \ # Core $$P/Core/HLE/*.cpp \ $$P/Core/HW/*.cpp \ $$P/Core/MIPS/*.cpp \ + $$P/Core/MIPS/IR/*.cpp \ $$P/Core/MIPS/JitCommon/*.cpp \ $$P/Core/Util/AudioFormat.cpp \ $$P/Core/Util/BlockAllocator.cpp \ @@ -56,6 +57,7 @@ HEADERS += $$P/Core/*.h \ $$P/Core/HLE/*.h \ $$P/Core/HW/*.h \ $$P/Core/MIPS/*.h \ + $$P/Core/MIPS/IR/*.h \ $$P/Core/MIPS/JitCommon/*.h \ $$P/Core/Util/AudioFormat.h \ $$P/Core/Util/BlockAllocator.h \ diff --git a/Qt/mainwindow.cpp b/Qt/mainwindow.cpp index 12862659c021..9ceca618ab45 100644 --- a/Qt/mainwindow.cpp +++ b/Qt/mainwindow.cpp @@ -515,8 +515,6 @@ void MainWindow::createMenus() MenuTree* optionsMenu = new MenuTree(this, menuBar(), QT_TR_NOOP("&Options")); // - Core MenuTree* coreMenu = new MenuTree(this, optionsMenu, QT_TR_NOOP("&Core")); - coreMenu->add(new MenuAction(this, SLOT(dynarecAct()), QT_TR_NOOP("&CPU Dynarec"))) - ->addEventChecked(&g_Config.bJit); coreMenu->add(new MenuAction(this, SLOT(vertexDynarecAct()), QT_TR_NOOP("&Vertex Decoder Dynarec"))) ->addEventChecked(&g_Config.bVertexDecoderJit); coreMenu->add(new MenuAction(this, SLOT(fastmemAct()), QT_TR_NOOP("Fast &Memory (unstable)"))) diff --git a/Qt/mainwindow.h b/Qt/mainwindow.h index ae201054aa1f..1428f4dea8bc 100644 --- a/Qt/mainwindow.h +++ b/Qt/mainwindow.h @@ -87,7 +87,6 @@ private slots: // Options // Core - void dynarecAct() { g_Config.iCpuCore = g_Config.iCpuCore == CPU_CORE_INTERPRETER ? CPU_CORE_JIT : CPU_CORE_INTERPRETER; } void vertexDynarecAct() { g_Config.bVertexDecoderJit = !g_Config.bVertexDecoderJit; } void fastmemAct() { g_Config.bFastMemory = !g_Config.bFastMemory; } void ignoreIllegalAct() { g_Config.bIgnoreBadMemAccess = !g_Config.bIgnoreBadMemAccess; } From efc8a8e3531f27dd2063e6ab7d726da60d487551 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 13 May 2016 20:17:20 -0700 Subject: [PATCH 74/77] Hack to make Symbian build. --- Core/MIPS/IR/IRCompBranch.cpp | 2 -- Core/MIPS/IR/IRFrontend.cpp | 4 ++-- Core/MIPS/IR/IRInst.h | 10 ++++++++++ Core/MIPS/IR/IRJit.h | 26 ++++++++++++++++++++++++++ Core/MIPS/IR/IRPassSimplify.cpp | 4 +--- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index f7b875100df4..609b8f478fd1 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -31,8 +31,6 @@ #include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" -#include "Common/Arm64Emitter.h" - #define _RS MIPS_GET_RS(op) #define _RT MIPS_GET_RT(op) #define _RD MIPS_GET_RD(op) diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index c41c3cac06a8..6353816407a9 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -261,7 +261,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); for (size_t i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; - DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); + DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], &ir.GetConstants()[0]); ILOG("%s", buf); } ILOG("=============== end ================="); @@ -271,7 +271,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); for (size_t i = 0; i < code->GetInstructions().size(); i++) { char buf[256]; - DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], code->GetConstants().data()); + DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], &code->GetConstants()[0]); ILOG("%s", buf); } ILOG("=============== end ================="); diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 86e1e31ae8b1..b1e17a6c45c7 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -6,6 +6,16 @@ #include "Common/CommonTypes.h" #include "Core/MIPS/MIPS.h" +#ifdef __SYMBIAN32__ +// Seems std::move() doesn't exist, so assuming it can't do moves at all. +namespace std { + template + const T &move(const T &x) { + return x; + } +}; +#endif + // Basic IR // // This IR refers implicitly to the MIPS register set and is simple to interpret. diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 87a8231bff18..38f0df6a3e3e 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -19,6 +19,7 @@ #include +#include "Common/Common.h" #include "Common/CPUDetect.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" #include "Core/MIPS/JitCommon/JitCommon.h" @@ -49,6 +50,31 @@ class IRBlock { b.const_ = nullptr; } + IRBlock(const IRBlock &b) { + *this = b; + } + + IRBlock &operator=(const IRBlock &b) { + // No std::move on Symbian... But let's try not to use elsewhere. +#ifndef __SYMBIAN32__ + _assert_(false); +#endif + numInstructions_ = b.numInstructions_; + numConstants_ = b.numConstants_; + instr_ = new IRInst[numInstructions_]; + if (numInstructions_) { + memcpy(instr_, b.instr_, sizeof(IRInst) * numInstructions_); + } + const_ = new u32[numConstants_]; + if (numConstants_) { + memcpy(const_, b.const_, sizeof(u32) * numConstants_); + } + origAddr_ = b.origAddr_; + origFirstOpcode_ = b.origFirstOpcode_; + + return *this; + } + ~IRBlock() { delete[] instr_; delete[] const_; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 637e6b22fae4..eb0b892b06f4 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -102,7 +102,6 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { //FMovToGPR a0, f12 //FMov f14, f12 - const u32 *constants = in.GetConstants().data(); bool logBlocks = false; IRInst prev; prev.op = IROp::Nop; @@ -134,7 +133,6 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { // Might be useful later on x86. bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) { - const u32 *constants = in.GetConstants().data(); bool logBlocks = false; for (int i = 0; i < (int)in.GetInstructions().size(); i++) { IRInst inst = in.GetInstructions()[i]; @@ -178,7 +176,7 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) { bool PropagateConstants(const IRWriter &in, IRWriter &out) { IRRegCache gpr(&out); - const u32 *constants = in.GetConstants().data(); + const u32 *constants = &in.GetConstants()[0]; bool logBlocks = false; for (int i = 0; i < (int)in.GetInstructions().size(); i++) { IRInst inst = in.GetInstructions()[i]; From e2aca38f8b3990adb57951f17f5873e8015f57d2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 13 May 2016 21:48:23 -0700 Subject: [PATCH 75/77] Try enabling Travis caching. --- .travis.sh | 5 ++++- .travis.yml | 43 +++++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/.travis.sh b/.travis.sh index 4123e457294a..60adacf9c2e1 100644 --- a/.travis.sh +++ b/.travis.sh @@ -1,5 +1,6 @@ #/bin/bash +export NDK_CCACHE=ccache NDK_VER=android-ndk-r10d download_extract() { @@ -85,7 +86,7 @@ travis_script() { # Compile PPSSPP if [ "$PPSSPP_BUILD_TYPE" = "Linux" ]; then if [ "$CXX" = "g++" ]; then - export CXX="g++-4.8" CC="gcc-4.8" + export CXX="ccache g++-4.8" CC="ccache gcc-4.8" fi if [ "$QT" = "TRUE" ]; then @@ -123,6 +124,8 @@ travis_script() { } travis_after_success() { + ccache -s + if [ "$PPSSPP_BUILD_TYPE" = "Linux" ]; then ./test.py fi diff --git a/.travis.yml b/.travis.yml index c4192ea5e36f..c6dd38b16d6a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,20 +4,19 @@ sudo: required language: cpp -os: linux - dist: trusty -compiler: - - gcc +addons: + apt: + packages: + - build-essential + - libgl1-mesa-dev + - libglu1-mesa-dev + - cmake -env: - - PPSSPP_BUILD_TYPE=Linux - CMAKE=TRUE - - PPSSPP_BUILD_TYPE=Android - - PPSSPP_BUILD_TYPE=Blackberry - CMAKE=TRUE - - PPSSPP_BUILD_TYPE=Symbian +cache: + - apt + - ccache notifications: irc: @@ -30,15 +29,31 @@ notifications: matrix: include: - - compiler: clang + - os: linux + compiler: "gcc linux" + env: PPSSPP_BUILD_TYPE=Linux + CMAKE=TRUE + - os: linux + compiler: "gcc android" + env: PPSSPP_BUILD_TYPE=Android + - os: linux + compiler: "gcc blackberry" + env: PPSSPP_BUILD_TYPE=Blackberry + CMAKE=TRUE + - os: linux + compiler: "gcc symbian" + env: PPSSPP_BUILD_TYPE=Symbian + - os: linux + compiler: "clang linux" env: PPSSPP_BUILD_TYPE=Linux CMAKE=TRUE - - compiler: gcc + - os: linux + compiler: "gcc qt" env: PPSSPP_BUILD_TYPE=Linux QT=TRUE # Can't get iOS to work. # - os: osx -# compiler: clang +# compiler: "clang ios" # env: PPSSPP_BUILD_TYPE=iOS # CMAKE=TRUE From 7a7c3b9b9fa42a7254e33a3b9738cee831ab4048 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 13 May 2016 22:58:10 +0200 Subject: [PATCH 76/77] More VFPU, vmmul thoughts --- Core/MIPS/IR/IRBackendX86.cpp | 622 ++++++++++++++++++++++++++++++++ Core/MIPS/IR/IRCompVFPU.cpp | 138 ++++--- Core/MIPS/IR/IRPassSimplify.cpp | 78 ++-- 3 files changed, 760 insertions(+), 78 deletions(-) create mode 100644 Core/MIPS/IR/IRBackendX86.cpp diff --git a/Core/MIPS/IR/IRBackendX86.cpp b/Core/MIPS/IR/IRBackendX86.cpp new file mode 100644 index 000000000000..388962d938b4 --- /dev/null +++ b/Core/MIPS/IR/IRBackendX86.cpp @@ -0,0 +1,622 @@ + + +#include "Common/x64Emitter.h" +#include "Core/MIPS/IR/IRInst.h" +#include "Core/MemMap.h" + +// Still need a register cache +struct Mapping { + Gen::OpArg dst; + Gen::OpArg src1; + Gen::OpArg src2; +}; + +class RegisterMap { +public: + Mapping Map(IRInst inst); +private: +}; + +Mapping RegisterMap::Map(IRInst inst) { + Mapping map; + return map; +} + + +class IRBackendX86 : public Gen::XCodeBlock { +public: + void Compile(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count); +}; + +void IRBackendX86::Compile(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) { + RegisterMap regMap; + using namespace Gen; + + const IRInst *end = inst + count; + while (inst != end) { + Mapping map = regMap.Map(*inst); + switch (inst->op) { + /* + case IROp::Nop: + break; + case IROp::SetConst: + MOV(32, map.dst, map.src1); + break; + case IROp::SetConstF: + MOV(32, R(EAX), map.src1); + MOVD_xmm(map.dst, EAX); + break; + case IROp::Add: + if (map.src1.IsSimpleReg() && map.src2.IsSimpleReg()) { + LEA(32, map.dst.GetSimpleReg(), MRegSum(map.src1.GetSimpleReg(), map.src2.GetSimpleReg())); + break; + } + mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; + break; + case IROp::Sub: + mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; + break; + case IROp::And: + mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2]; + break; + case IROp::Or: + mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2]; + break; + case IROp::Xor: + mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2]; + break; + case IROp::Mov: + mips->r[inst->dest] = mips->r[inst->src1]; + break; + case IROp::AddConst: + mips->r[inst->dest] = mips->r[inst->src1] + constPool[inst->src2]; + break; + case IROp::SubConst: + mips->r[inst->dest] = mips->r[inst->src1] - constPool[inst->src2]; + break; + case IROp::AndConst: + mips->r[inst->dest] = mips->r[inst->src1] & constPool[inst->src2]; + break; + case IROp::OrConst: + mips->r[inst->dest] = mips->r[inst->src1] | constPool[inst->src2]; + break; + case IROp::XorConst: + mips->r[inst->dest] = mips->r[inst->src1] ^ constPool[inst->src2]; + break; + case IROp::Neg: + mips->r[inst->dest] = -(s32)mips->r[inst->src1]; + break; + case IROp::Not: + mips->r[inst->dest] = ~mips->r[inst->src1]; + break; + case IROp::Ext8to32: + mips->r[inst->dest] = (s32)(s8)mips->r[inst->src1]; + break; + case IROp::Ext16to32: + mips->r[inst->dest] = (s32)(s16)mips->r[inst->src1]; + break; + + case IROp::Load8: + mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load8Ext: + mips->r[inst->dest] = (s32)(s8)Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load16: + mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load16Ext: + mips->r[inst->dest] = (s32)(s16)Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Load32: + mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::LoadFloat: + mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]); + break; + + case IROp::Store8: + Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Store16: + Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::Store32: + Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + case IROp::StoreFloat: + Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); + break; + + case IROp::LoadVec4: + { + u32 base = mips->r[inst->src1] + constPool[inst->src2]; +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); +#endif + break; + } + case IROp::StoreVec4: + { + u32 base = mips->r[inst->src1] + constPool[inst->src2]; +#if defined(_M_SSE) + _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest])); +#else + for (int i = 0; i < 4; i++) + Memory::WriteUnchecked_Float(mips->f[inst->dest + i], base + 4 * i); +#endif + break; + } + + case IROp::Vec4Init: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); +#else + memcpy(&mips->f[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float)); +#endif + break; + + case IROp::Vec4Shuffle: + { + // Can't use the SSE shuffle here because it takes an immediate. + // Backends with SSE support could use that though. + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)]; + break; + } + + case IROp::Vec4Mov: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1])); +#else + memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float)); +#endif + break; + + case IROp::Vec4Add: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Sub: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Mul: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Div: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i]; +#endif + break; + + case IROp::Vec4Scale: +#if defined(_M_SSE) + _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2]))); +#else + for (int i = 0; i < 4; i++) + mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; +#endif + break; + + case IROp::FCmpVfpuBit: + { + int op = inst->dest & 0xF; + int bit = inst->dest >> 4; + int result = 0; + switch (op) { + case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break; + case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break; + case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break; + case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break; + case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break; + case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break; + case VC_EZ: result = mips->f[inst->src1] == 0.0f; break; + case VC_NZ: result = mips->f[inst->src1] != 0.0f; break; + case VC_TR: result = 1; break; + case VC_FL: result = 0; break; + default: + result = 0; + } + if (result != 0) { + mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit); + } else { + mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit); + } + } + break; + + case IROp::FCmpVfpuAggregate: + { + int mask = inst->dest; + u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC]; + int a = (cc & mask) ? 0x10 : 0x00; + int b = (cc & mask) == mask ? 0x20 : 0x00; + mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;; + } + break; + + case IROp::FCmovVfpuCC: + if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) { + mips->f[inst->dest] = mips->f[inst->src1]; + } + break; + + // Not quickly implementable on all platforms, unfortunately. + case IROp::Vec4Dot: + { + float dot = mips->f[inst->src1] * mips->f[inst->src2]; + for (int i = 1; i < 4; i++) + dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; + mips->f[inst->dest] = dot; + break; + } + + case IROp::FSin: + mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]); + break; + case IROp::FCos: + mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]); + break; + case IROp::FRSqrt: + mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]); + break; + case IROp::FRecip: + mips->f[inst->dest] = 1.0f / mips->f[inst->src1]; + break; + case IROp::FAsin: + mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]); + break; + + case IROp::ShlImm: + mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; + break; + case IROp::ShrImm: + mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2; + break; + case IROp::SarImm: + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2; + break; + case IROp::RorImm: + { + u32 x = mips->r[inst->src1]; + int sa = inst->src2; + mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); + } + break; + + case IROp::Shl: + mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31); + break; + case IROp::Shr: + mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31); + break; + case IROp::Sar: + mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31); + break; + case IROp::Ror: + { + u32 x = mips->r[inst->src1]; + int sa = mips->r[inst->src2] & 31; + mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); + } + break; + + case IROp::Clz: + { + int x = 31; + int count = 0; + int value = mips->r[inst->src1]; + while (x >= 0 && !(value & (1 << x))) { + count++; + x--; + } + mips->r[inst->dest] = count; + break; + } + + case IROp::Slt: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; + break; + + case IROp::SltU: + mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2]; + break; + + case IROp::SltConst: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)constPool[inst->src2]; + break; + + case IROp::SltUConst: + mips->r[inst->dest] = mips->r[inst->src1] < constPool[inst->src2]; + break; + + case IROp::MovZ: + if (mips->r[inst->src1] == 0) + mips->r[inst->dest] = mips->r[inst->src2]; + break; + case IROp::MovNZ: + if (mips->r[inst->src1] != 0) + mips->r[inst->dest] = mips->r[inst->src2]; + break; + + case IROp::Max: + mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; + break; + case IROp::Min: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; + break; + + case IROp::MtLo: + mips->lo = mips->r[inst->src1]; + break; + case IROp::MtHi: + mips->hi = mips->r[inst->src1]; + break; + case IROp::MfLo: + mips->r[inst->dest] = mips->lo; + break; + case IROp::MfHi: + mips->r[inst->dest] = mips->hi; + break; + + case IROp::Mult: + { + s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; + memcpy(&mips->lo, &result, 8); + break; + } + case IROp::MultU: + { + u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; + memcpy(&mips->lo, &result, 8); + break; + } + + case IROp::BSwap16: + { + u32 x = mips->r[inst->src1]; + mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8); + break; + } + case IROp::BSwap32: + { + u32 x = mips->r[inst->src1]; + mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24); + break; + } + + case IROp::FAdd: + mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2]; + break; + case IROp::FSub: + mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2]; + break; + case IROp::FMul: + mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2]; + break; + case IROp::FDiv: + mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2]; + break; + case IROp::FMin: + mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]); + break; + case IROp::FMax: + mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]); + break; + + case IROp::FMov: + mips->f[inst->dest] = mips->f[inst->src1]; + break; + case IROp::FAbs: + mips->f[inst->dest] = fabsf(mips->f[inst->src1]); + break; + case IROp::FSqrt: + mips->f[inst->dest] = sqrtf(mips->f[inst->src1]); + break; + case IROp::FNeg: + mips->f[inst->dest] = -mips->f[inst->src1]; + break; + case IROp::FSat0_1: + mips->f[inst->dest] = clamp_value(mips->f[inst->src1], 0.0f, 1.0f); + break; + case IROp::FSatMinus1_1: + mips->f[inst->dest] = clamp_value(mips->f[inst->src1], -1.0f, 1.0f); + break; + + case IROp::FpCondToReg: + mips->r[inst->dest] = mips->fpcond; + break; + case IROp::VfpuCtrlToReg: + mips->r[inst->dest] = mips->vfpuCtrl[inst->src1]; + break; + case IROp::FRound: + mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1] + 0.5f); + break; + case IROp::FTrunc: + { + float src = mips->f[inst->src1]; + if (src >= 0.0f) { + mips->fs[inst->dest] = (int)floorf(src); + // Overflow, but it was positive. + if (mips->fs[inst->dest] == -2147483648LL) { + mips->fs[inst->dest] = 2147483647LL; + } + } else { + // Overflow happens to be the right value anyway. + mips->fs[inst->dest] = (int)ceilf(src); + } + break; + } + case IROp::FCeil: + mips->fs[inst->dest] = (int)ceilf(mips->f[inst->src1]); + break; + case IROp::FFloor: + mips->fs[inst->dest] = (int)floorf(mips->f[inst->src1]); + break; + case IROp::FCmp: + switch (inst->dest) { + case IRFpCompareMode::False: + mips->fpcond = 0; + break; + case IRFpCompareMode::EqualOrdered: + case IRFpCompareMode::EqualUnordered: + mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2]; + break; + case IRFpCompareMode::LessEqualOrdered: + case IRFpCompareMode::LessEqualUnordered: + mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2]; + break; + case IRFpCompareMode::LessOrdered: + case IRFpCompareMode::LessUnordered: + mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2]; + break; + } + break; + + case IROp::FCvtSW: + mips->f[inst->dest] = (float)mips->fs[inst->src1]; + break; + case IROp::FCvtWS: + { + float src = mips->f[inst->src1]; + if (my_isnanorinf(src)) + { + mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL; + break; + } + switch (mips->fcr31 & 3) + { + case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break; // RINT_0 + case 1: mips->fs[inst->dest] = (int)src; break; // CAST_1 + case 2: mips->fs[inst->dest] = (int)ceilf(src); break; // CEIL_2 + case 3: mips->fs[inst->dest] = (int)floorf(src); break; // FLOOR_3 + } + break; //cvt.w.s + } + + case IROp::ZeroFpCond: + mips->fpcond = 0; + break; + + case IROp::FMovFromGPR: + memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); + break; + case IROp::FMovToGPR: + memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); + break; + + case IROp::ExitToConst: + return constPool[inst->dest]; + + case IROp::ExitToReg: + return mips->r[inst->dest]; + + case IROp::ExitToConstIfEq: + if (mips->r[inst->src1] == mips->r[inst->src2]) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfNeq: + if (mips->r[inst->src1] != mips->r[inst->src2]) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfGtZ: + if ((s32)mips->r[inst->src1] > 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfGeZ: + if ((s32)mips->r[inst->src1] >= 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfLtZ: + if ((s32)mips->r[inst->src1] < 0) + return constPool[inst->dest]; + break; + case IROp::ExitToConstIfLeZ: + if ((s32)mips->r[inst->src1] <= 0) + return constPool[inst->dest]; + break; + + case IROp::Downcount: + mips->downcount -= (inst->src1) | ((inst->src2) << 8); + break; + + case IROp::SetPC: + mips->pc = mips->r[inst->src1]; + break; + + case IROp::SetPCConst: + mips->pc = constPool[inst->src1]; + break; + + case IROp::Syscall: + // SetPC was executed before. + { + MIPSOpcode op(constPool[inst->src1]); + CallSyscall(op); + return mips->pc; + } + + case IROp::Interpret: // SLOW fallback. Can be made faster. + { + MIPSOpcode op(constPool[inst->src1]); + MIPSInterpret(op); + break; + } + + case IROp::CallReplacement: + { + int funcIndex = constPool[inst->src1]; + const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); + int cycles = f->replaceFunc(); + mips->downcount -= cycles; + break; + } + + case IROp::Break: + Crash(); + break; + + case IROp::SetCtrlVFPU: + mips->vfpuCtrl[inst->dest] = constPool[inst->src1]; + break; + + case IROp::SetCtrlVFPUReg: + mips->vfpuCtrl[inst->dest] = mips->r[inst->src1]; + break; + + case IROp::SetCtrlVFPUFReg: + memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4); + break; + */ + default: + Crash(); + } + inst++; + } + + // If we got here, the block was badly constructed. + Crash(); +} diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 2160175cd286..1b484fbb602d 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -483,7 +483,9 @@ namespace MIPSComp { int vd = _VD; int vs = _VS; int vt = _VT; + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); // TODO: Force read one of them into regs? probably not. u8 sregs[4], tregs[4], dregs[1]; @@ -491,10 +493,15 @@ namespace MIPSComp { GetVectorRegsPrefixT(tregs, sz, vt); GetVectorRegsPrefixD(dregs, V_Single, vd); + if (sz == V_Quad && IsConsecutive4(sregs) && IsConsecutive4(tregs) && IsOverlapSafe(dregs[0], n, sregs, n, tregs)) { + ir.Write(IROp::Vec4Dot, dregs[0], sregs[0], tregs[0]); + ApplyPrefixD(dregs, V_Single); + return; + } + int temp0 = IRVTEMP_0; int temp1 = IRVTEMP_0 + 1; ir.Write(IROp::FMul, temp0, sregs[0], tregs[0]); - int n = GetNumVectorElements(sz); for (int i = 1; i < n; i++) { ir.Write(IROp::FMul, temp1, sregs[i], tregs[i]); ir.Write(IROp::FAdd, i == (n - 1) ? dregs[0] : temp0, temp0, temp1); @@ -681,7 +688,7 @@ namespace MIPSComp { GetVectorRegsPrefixD(dregs, sz, vd); bool usingTemps = false; - int tempregs[4]; + u8 tempregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafe(dregs[i], n, sregs)) { usingTemps = true; @@ -790,7 +797,7 @@ namespace MIPSComp { GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); - int tempregs[4]; + u8 tempregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafe(dregs[i], n, sregs)) { tempregs[i] = IRVTEMP_PFX_T + i; // Need IRVTEMP_0 for the scaling factor @@ -976,31 +983,35 @@ namespace MIPSComp { VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); + int vs = _VS; + int vd = _VD; + int vt = _VT; u8 sregs[4], dregs[4], treg; - GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixS(sregs, sz, vs); // TODO: Prefixes seem strange... - GetVectorRegsPrefixT(&treg, V_Single, _VT); - GetVectorRegsPrefixD(dregs, sz, _VD); + GetVectorRegsPrefixT(&treg, V_Single, vt); + GetVectorRegsPrefixD(dregs, sz, vd); bool overlap = false; // For prefixes to work, we just have to ensure that none of the output registers spill // and that there's no overlap. - int tempregs[4]; + u8 tempregs[4]; + memcpy(tempregs, dregs, sizeof(tempregs)); for (int i = 0; i < n; ++i) { // Conservative, can be improved if (treg == dregs[i] || !IsOverlapSafe(dregs[i], n, sregs)) { // Need to use temp regs tempregs[i] = IRVTEMP_0 + i; overlap = true; - } else { - tempregs[i] = dregs[i]; } } - if (n == 4 && IsConsecutive4(sregs) && IsConsecutive4(dregs) && !overlap) { - ir.Write(IROp::Vec4Scale, dregs[0], sregs[0], treg); - ApplyPrefixD(dregs, sz); - return; + if (n == 4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) { + if (!overlap || (vs == vd && IsOverlapSafe(treg, n, dregs))) { + ir.Write(IROp::Vec4Scale, dregs[0], sregs[0], treg); + ApplyPrefixD(dregs, sz); + return; + } } for (int i = 0; i < n; i++) { @@ -1017,6 +1028,21 @@ namespace MIPSComp { ApplyPrefixD(dregs, sz); } + /* + // Capital = straight, lower case = transposed + // 8 possibilities: + ABC 2 + ABc missing + AbC 1 + Abc 1 + + aBC = ACB 2 + swap + aBc = AcB 1 + swap + abC = ACb missing + abc = Acb 1 + swap + + */ + // This may or may not be a win when using the IR interpreter... // Many more instructions to interpret. void IRFrontend::Comp_Vmmul(MIPSOpcode op) { @@ -1035,7 +1061,7 @@ namespace MIPSComp { MatrixOverlapType toverlap = GetMatrixOverlap(vt, vd, sz); // A very common arrangment. Rearrange to something we can handle. - if (IsMatrixTransposed(vd) && !IsMatrixTransposed(vs) && IsMatrixTransposed(vt)) { + if (IsMatrixTransposed(vd)) { // Matrix identity says (At * Bt) = (B * A)t // D = S * T // Dt = (S * T)t = (Tt * St) @@ -1051,12 +1077,16 @@ namespace MIPSComp { if (soverlap || toverlap) { DISABLE; } - if (sz == M_4x4 && IsConsecutive4(tregs) && IsConsecutive4(dregs)) { + + // dregs are always consecutive, thanks to our transpose trick. + // However, not sure this is always worth it. + if (sz == M_4x4 && IsConsecutive4(dregs)) { // TODO: The interpreter would like proper matrix ops better. Can generate those, and // expand them like this as needed on "real" architectures. int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_T; if (!IsConsecutive4(sregs)) { + // METHOD 1: Handles AbC and Abc for (int j = 0; j < 4; j++) { ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[j * 4]); for (int i = 1; i < 4; i++) { @@ -1066,7 +1096,10 @@ namespace MIPSComp { ir.Write(IROp::Vec4Mov, dregs[j * 4], s0); } return; - } else { + } else if (IsConsecutive4(tregs)) { + // METHOD 2: Handles ABC only. Not efficient on CPUs that don't do fast dots. + // Dots only work if tregs are consecutive. + // TODO: Skip this and resort to method one and transpose the output? for (int j = 0; j < 4; j++) { for (int i = 0; i < 4; i++) { ir.Write(IROp::Vec4Dot, s0 + i, sregs[i], tregs[j * 4]); @@ -1074,10 +1107,11 @@ namespace MIPSComp { ir.Write(IROp::Vec4Mov, dregs[j * 4], s0); } return; + } else { + // ABc - s consecutive, t not. + // Tekken uses this. + // logBlocks = 1; } - } else if (sz == M_4x4) { - // Tekken 6 has a case here: MEE - // logBlocks = 1; } // Fallback. Expands a LOT @@ -1126,44 +1160,50 @@ namespace MIPSComp { if (msz == M_4x4 && IsConsecutive4(sregs)) { int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_T; - if (!IsConsecutive4(tregs)) { - ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); - for (int i = 1; i < 4; i++) { - if (!homogenous || (i != n - 1)) { - ir.Write(IROp::Vec4Scale, s1, sregs[i * 4], tregs[i]); - ir.Write(IROp::Vec4Add, s0, s0, s1); - } else { - ir.Write(IROp::Vec4Add, s0, s0, sregs[i * 4]); - } - } - - if (IsConsecutive4(dregs)) { - ir.Write(IROp::Vec4Mov, dregs[0], s0); + // For this algorithm, we don't care if tregs are consecutive or not, + // they are accessed one at a time. This handles homogenous transforms correctly, as well. + ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); + for (int i = 1; i < 4; i++) { + if (!homogenous || (i != n - 1)) { + ir.Write(IROp::Vec4Scale, s1, sregs[i * 4], tregs[i]); + ir.Write(IROp::Vec4Add, s0, s0, s1); } else { - for (int i = 0; i < 4; i++) { - ir.Write(IROp::FMov, dregs[i], s0 + i); - } + ir.Write(IROp::Vec4Add, s0, s0, sregs[i * 4]); } - return; - } else if (!homogenous) { + } + if (IsConsecutive4(dregs)) { + ir.Write(IROp::Vec4Mov, dregs[0], s0); + } else { for (int i = 0; i < 4; i++) { - ir.Write(IROp::Vec4Dot, s0 + i, sregs[i * 4], tregs[0]); + ir.Write(IROp::FMov, dregs[i], s0 + i); } - if (IsConsecutive4(dregs)) { - ir.Write(IROp::Vec4Mov, dregs[0], s0); + } + return; + } else if (msz == M_4x4 && !IsConsecutive4(sregs)) { + int s0 = IRVTEMP_0; + int s1 = IRVTEMP_PFX_S; + // Doesn't make complete sense to me why this works.... + ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); + for (int i = 1; i < 4; i++) { + if (!homogenous || (i != n - 1)) { + ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]); + ir.Write(IROp::Vec4Add, s0, s0, s1); } else { - for (int i = 0; i < 4; i++) { - ir.Write(IROp::FMov, dregs[i], s0 + i); - } + ir.Write(IROp::Vec4Add, s0, s0, sregs[i]); } - return; } - } else if (msz == M_4x4) { - // logBlocks = 1; + if (IsConsecutive4(dregs)) { + ir.Write(IROp::Vec4Mov, dregs[0], s0); + } else { + for (int i = 0; i < 4; i++) { + ir.Write(IROp::FMov, dregs[i], s0 + i); + } + } + return; } // TODO: test overlap, optimize. - int tempregs[4]; + u8 tempregs[4]; int s0 = IRVTEMP_0; int temp1 = IRVTEMP_0 + 1; for (int i = 0; i < n; i++) { @@ -1216,7 +1256,7 @@ namespace MIPSComp { GetVectorRegs(tregs, sz, _VT); GetVectorRegs(dregs, sz, _VD); - int tempregs[4]; + u8 tempregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafe(dregs[i], n, sregs, n, tregs)) { tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things @@ -1383,7 +1423,7 @@ namespace MIPSComp { GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); - int tempregs[4]; + u8 tempregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafe(dregs[i], n, sregs)) { tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index eb0b892b06f4..04c3ea15241d 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -4,6 +4,27 @@ #include "Core/MIPS/IR/IRPassSimplify.h" #include "Core/MIPS/IR/IRRegCache.h" +void WriteInstWithConstants(const IRWriter &in, IRWriter &out, const u32 *constants, IRInst inst) { + // Remap constants to the new reality + const IRMeta *m = GetIRMeta(inst.op); + switch (m->types[0]) { + case 'C': + inst.dest = out.AddConstant(constants[inst.dest]); + break; + } + switch (m->types[1]) { + case 'C': + inst.src1 = out.AddConstant(constants[inst.src1]); + break; + } + switch (m->types[2]) { + case 'C': + inst.src2 = out.AddConstant(constants[inst.src2]); + break; + } + out.Write(inst); +} + u32 Evaluate(u32 a, u32 b, IROp op) { switch (op) { case IROp::Add: case IROp::AddConst: return a + b; @@ -96,12 +117,7 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri } bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { - //FMovToGPR a0, f12 - //FMovFromGPR f14, a0 - // to - //FMovToGPR a0, f12 - //FMov f14, f12 - + const u32 *constants = in.GetConstants().data(); bool logBlocks = false; IRInst prev; prev.op = IROp::Nop; @@ -109,6 +125,11 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { IRInst inst = in.GetInstructions()[i]; switch (inst.op) { case IROp::FMovFromGPR: + //FMovToGPR a0, f12 + //FMovFromGPR f14, a0 + // to + //FMovToGPR a0, f12 + //FMov f14, f12 if (prev.op == IROp::FMovToGPR && prev.dest == inst.src1) { inst.op = IROp::FMov; inst.src1 = prev.src1; @@ -118,16 +139,32 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { } break; - default: + // This will need to scan forward or keep track of more information to be useful. + // Just doing one isn't. + /* + case IROp::LoadVec4: + // AddConst a0, sp, 0x30 + // LoadVec4 v16, a0, 0x0 + // to + // AddConst a0, sp, 0x30 + // LoadVec4 v16, sp, 0x30 + if (prev.op == IROp::AddConst && prev.dest == inst.src1 && prev.dest != prev.src1 && prev.src1 == MIPS_REG_SP) { + inst.src2 = out.AddConstant(constants[prev.src2] + constants[inst.src2]); + inst.src1 = prev.src1; + logBlocks = 1; + } else { + goto doDefault; + } out.Write(inst); break; + */ + default: + doDefault: + WriteInstWithConstants(in, out, constants, inst); + break; } prev = inst; } - // Can reuse the old constants array - not touching constants in this pass. - for (u32 value : in.GetConstants()) { - out.AddConstant(value); - } return logBlocks; } @@ -495,24 +532,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { doDefaultAndFlush: gpr.FlushAll(); doDefault: - // Remap constants to the new reality - const IRMeta *m = GetIRMeta(inst.op); - switch (m->types[0]) { - case 'C': - inst.dest = out.AddConstant(constants[inst.dest]); - break; - } - switch (m->types[1]) { - case 'C': - inst.src1 = out.AddConstant(constants[inst.src1]); - break; - } - switch (m->types[2]) { - case 'C': - inst.src2 = out.AddConstant(constants[inst.src2]); - break; - } - out.Write(inst); + WriteInstWithConstants(in, out, constants, inst); break; } } From 91bc3c31a58a12557a532b09cb81f78ddca75c54 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 14 May 2016 14:01:27 +0200 Subject: [PATCH 77/77] Warning fixes --- Core/MIPS/IR/IRPassSimplify.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 04c3ea15241d..6b29efc24247 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -121,6 +121,9 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { bool logBlocks = false; IRInst prev; prev.op = IROp::Nop; + prev.dest = 0; + prev.src1 = 0; + prev.src2 = 0; for (int i = 0; i < (int)in.GetInstructions().size(); i++) { IRInst inst = in.GetInstructions()[i]; switch (inst.op) { @@ -159,7 +162,6 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { break; */ default: - doDefault: WriteInstWithConstants(in, out, constants, inst); break; }