Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct misbehavior on uninitialized values in IR #16396

Merged
merged 3 commits into from
Nov 19, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 39 additions & 21 deletions Core/MIPS/IR/IRCompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ namespace MIPSComp {
regs[3] == regs[2] + 1;
}

static bool IsVec4(VectorSize sz, const u8 regs[4]) {
return sz == V_Quad && IsConsecutive4(regs) && (regs[0] & 3) == 0;
}

static bool IsMatrixVec4(MatrixSize sz, const u8 regs[16]) {
if (sz != M_4x4)
return false;
if (!IsConsecutive4(&regs[0]) || (regs[0] & 3) != 0)
return false;
if (!IsConsecutive4(&regs[4]) || (regs[4] & 3) != 0)
return false;
if (!IsConsecutive4(&regs[8]) || (regs[8] & 3) != 0)
return false;
if (!IsConsecutive4(&regs[12]) || (regs[12] & 3) != 0)
return false;
return true;
}

// Vector regs can overlap in all sorts of swizzled ways.
// This does allow a single overlap in sregs[i].
static bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) {
Expand Down Expand Up @@ -173,7 +191,7 @@ namespace MIPSComp {
origV[i] = vregs[i];

// Some common vector prefixes
if (sz == V_Quad && IsConsecutive4(vregs)) {
if (IsVec4(sz, vregs)) {
if (prefix == 0xF00E4) {
InitRegs(vregs, tempReg);
ir.Write(IROp::Vec4Neg, vregs[0], origV[0]);
Expand Down Expand Up @@ -327,7 +345,7 @@ namespace MIPSComp {

switch (op >> 26) {
case 54: //lv.q
if (IsConsecutive4(vregs)) {
if (IsVec4(V_Quad, vregs)) {
ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm));
} else {
// Let's not even bother with "vertical" loads for now.
Expand All @@ -341,7 +359,7 @@ namespace MIPSComp {
break;

case 62: //sv.q
if (IsConsecutive4(vregs)) {
if (IsVec4(V_Quad, vregs)) {
ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm));
} else {
// Let's not even bother with "vertical" stores for now.
Expand Down Expand Up @@ -381,7 +399,7 @@ namespace MIPSComp {
u8 dregs[4];
GetVectorRegsPrefixD(dregs, sz, vd);

if (sz == V_Quad && IsConsecutive4(dregs)) {
if (IsVec4(sz, dregs)) {
ir.Write(IROp::Vec4Init, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
} else {
for (int i = 0; i < n; i++) {
Expand All @@ -406,7 +424,7 @@ namespace MIPSComp {
u8 dregs[4];
GetVectorRegsPrefixD(dregs, sz, vd);

if (sz == 4 && IsConsecutive4(dregs)) {
if (IsVec4(sz, dregs)) {
int row = vd & 3;
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
ir.Write(IROp::Vec4Init, dregs[0], (int)init);
Expand Down Expand Up @@ -574,7 +592,7 @@ namespace MIPSComp {
GetVectorRegsPrefixT(tregs, sz, vt);
GetVectorRegsPrefixD(dregs, V_Single, vd);

if (sz == V_Quad && IsConsecutive4(sregs) && IsConsecutive4(tregs) && IsOverlapSafe(dregs[0], n, sregs, n, tregs)) {
if (IsVec4(sz, sregs) && IsVec4(sz, tregs) && IsOverlapSafe(dregs[0], n, sregs, n, tregs)) {
ir.Write(IROp::Vec4Dot, dregs[0], sregs[0], tregs[0]);
ApplyPrefixD(dregs, V_Single);
return;
Expand Down Expand Up @@ -660,7 +678,7 @@ namespace MIPSComp {
}

// If all three are consecutive 4, we're safe regardless of if we use temps so we should not check that here.
if (allowSIMD && sz == V_Quad && IsConsecutive4(dregs) && IsConsecutive4(sregs) && IsConsecutive4(tregs)) {
if (allowSIMD && IsVec4(sz, dregs) && IsVec4(sz, sregs) && IsVec4(sz, tregs)) {
IROp opFunc = IROp::Nop;
switch (op >> 26) {
case 24: //VFPU0
Expand Down Expand Up @@ -788,14 +806,14 @@ namespace MIPSComp {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);

u8 sregs[4], dregs[4];
u8 sregs[4]{}, dregs[4]{};
GetVectorRegsPrefixS(sregs, sz, vs);
GetVectorRegsPrefixD(dregs, sz, vd);

bool usingTemps = false;
u8 tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], n, sregs)) {
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) {
usingTemps = true;
tempregs[i] = IRVTEMP_0 + i;
} else {
Expand All @@ -813,7 +831,7 @@ namespace MIPSComp {
break;
}

if (canSIMD && !usingTemps && IsConsecutive4(sregs) && IsConsecutive4(dregs)) {
if (canSIMD && !usingTemps && IsVec4(sz, sregs) && IsVec4(sz, dregs)) {
switch (optype) {
case 0: // vmov
ir.Write(IROp::Vec4Mov, dregs[0], sregs[0]);
Expand Down Expand Up @@ -1211,7 +1229,7 @@ namespace MIPSComp {
}
}

if (n == 4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) {
if (IsVec4(sz, sregs) && IsVec4(sz, dregs)) {
if (!overlap || (vs == vd && IsOverlapSafe(treg, n, dregs))) {
ir.Write(IROp::Vec4Scale, dregs[0], sregs[0], treg);
ApplyPrefixD(dregs, sz);
Expand Down Expand Up @@ -1296,12 +1314,12 @@ namespace MIPSComp {

// dregs are always consecutive, thanks to our transpose trick.
// However, not sure this is always worth it.
if (sz == M_4x4 && IsConsecutive4(dregs)) {
if (IsMatrixVec4(sz, dregs)) {
// TODO: The interpreter would like proper matrix ops better. Can generate those, and
// expand them like this as needed on "real" architectures.
int s0 = IRVTEMP_0;
int s1 = IRVTEMP_PFX_T;
if (!IsConsecutive4(sregs)) {
if (!IsMatrixVec4(sz, sregs)) {
// METHOD 1: Handles AbC and Abc
for (int j = 0; j < 4; j++) {
ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[j * 4]);
Expand All @@ -1312,7 +1330,7 @@ namespace MIPSComp {
ir.Write(IROp::Vec4Mov, dregs[j * 4], s0);
}
return;
} else if (IsConsecutive4(tregs)) {
} else if (IsMatrixVec4(sz, tregs)) {
// METHOD 2: Handles ABC only. Not efficient on CPUs that don't do fast dots.
// Dots only work if tregs are consecutive.
// TODO: Skip this and resort to method one and transpose the output?
Expand Down Expand Up @@ -1378,7 +1396,7 @@ namespace MIPSComp {
GetVectorRegs(dregs, sz, _VD);

// SIMD-optimized implementations - if sregs[0..3] is non-consecutive, it's transposed.
if (msz == M_4x4 && !IsConsecutive4(sregs)) {
if (msz == M_4x4 && !IsMatrixVec4(msz, sregs)) {
int s0 = IRVTEMP_0;
int s1 = IRVTEMP_PFX_S;
// For this algorithm, we don't care if tregs are consecutive or not,
Expand All @@ -1393,15 +1411,15 @@ namespace MIPSComp {
ir.Write(IROp::Vec4Add, s0, s0, sregs[i]);
}
}
if (IsConsecutive4(dregs)) {
if (IsVec4(sz, dregs)) {
ir.Write(IROp::Vec4Mov, dregs[0], s0);
} else {
for (int i = 0; i < 4; i++) {
ir.Write(IROp::FMov, dregs[i], s0 + i);
}
}
return;
} else if (msz == M_4x4 && IsConsecutive4(sregs)) {
} else if (msz == M_4x4 && IsMatrixVec4(msz, sregs)) {
// Consecutive, which is harder.
DISABLE;
int s0 = IRVTEMP_0;
Expand All @@ -1416,7 +1434,7 @@ namespace MIPSComp {
ir.Write(IROp::Vec4Add, s0, s0, sregs[i]);
}
}
if (IsConsecutive4(dregs)) {
if (IsVec4(sz, dregs)) {
ir.Write(IROp::Vec4Mov, dregs[0], s0);
} else {
for (int i = 0; i < 4; i++) {
Expand Down Expand Up @@ -1516,8 +1534,8 @@ namespace MIPSComp {

int nOut = GetNumVectorElements(outsize);

// If src registers aren't contiguous, make them.
if (sz == V_Quad && !IsConsecutive4(sregs)) {
// If src registers aren't contiguous, make them (mainly for bits == 8.)
if (sz == V_Quad && !IsVec4(sz, sregs)) {
// T prefix is unused.
for (int i = 0; i < 4; i++) {
srcregs[i] = IRVTEMP_PFX_T + i;
Expand Down Expand Up @@ -1623,7 +1641,7 @@ namespace MIPSComp {
}
}
} else if (outsize == V_Quad) {
bool consecutive = IsConsecutive4(dregs);
bool consecutive = IsVec4(outsize, dregs);
if (!consecutive || !IsOverlapSafe(nOut, dregs, nIn, srcregs)) {
for (int i = 0; i < nOut; i++) {
tempregs[i] = IRVTEMP_PFX_T + i;
Expand Down