diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 72688d4fe..ebb6f4f82 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -274,6 +274,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5); break; + case 0x30: + INST_NAME("XOR Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_xor8(dyn, ninst, x1, x2, x4, x5); + EBBACK(); + break; case 0x31: INST_NAME("XOR Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -1002,6 +1011,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni gb1 = TO_LA64(opcode & 3); BSTRINS_D(gb1, x1, 7, 0); break; + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + INST_NAME("MOV xH, Ib"); + u8 = F8; + MOV32w(x1, u8); + if (rex.rex) { + gb1 = TO_LA64((opcode & 7) + (rex.b << 3)); + BSTRINS_D(gb1, x1, 7, 0); + } else { + gb1 = TO_LA64(opcode & 3); + BSTRINS_D(gb1, x1, 15, 8); + } + break; case 0xB8: case 0xB9: case 0xBA: diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index d7184f2c6..d16c88548 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -184,6 +184,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni #undef GO + case 0x57: + INST_NAME("XORPS Gx, Ex"); + nextop = F8; + GETG; + if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) { + // special case for XORPS Gx, Gx + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VXOR_V(q0, q0, q0); + } else { + q0 = sse_get_reg(dyn, ninst, x1, gd, 1); + GETEX(q1, 0, 0); + VXOR_V(q0, q0, q1); + } + break; + #define GO(GETFLAGS, NO, YES, F, I) \ READFLAGS(F); \ i32_ = F32S; \ diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index e7a358eb3..626e33d78 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -65,10 +65,50 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0x39: + INST_NAME("CMP Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6); + break; case 0x81: case 0x83: nextop = F8; switch ((nextop >> 3) & 7) { + case 0: // ADD + if (opcode == 0x81) { + INST_NAME("ADD Ew, Iw"); + } else { + INST_NAME("ADD Ew, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode == 0x81) ? 2 : 1); + if (opcode == 0x81) + i16 = F16S; + else + i16 = F8S; + MOV64x(x5, i16); + emit_add16(dyn, ninst, ed, x5, x2, x4, x6); + EWBACK; + break; + case 5: // SUB + if (opcode == 0x81) { + INST_NAME("SUB Ew, Iw"); + } else { + INST_NAME("SUB Ew, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode == 0x81) ? 2 : 1); + if (opcode == 0x81) + i16 = F16S; + else + i16 = F8S; + MOV32w(x5, i16); + emit_sub16(dyn, ninst, x1, x5, x2, x4, x6); + EWBACK; + break; case 7: // CMP if (opcode == 0x81) { INST_NAME("CMP Ew, Iw"); diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index cdafe2e13..6190108f3 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -55,6 +55,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; FAKEED; break; + case 0x61: + INST_NAME("PUNPCKLWD Gx,Ex"); + nextop = F8; + GETGX(v0, 1); + GETEX(q0, 0, 0); + VILVL_H(v0, q0, v0); + break; case 0x6C: INST_NAME("PUNPCKLQDQ Gx,Ex"); nextop = F8; @@ -133,6 +140,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } BSTRINS_D(gd, x1, 15, 0); break; + case 0xD6: + INST_NAME("MOVQ Ex, Gx"); + nextop = F8; + GETGX(v0, 0); + if (MODREG) { + v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop & 7) + (rex.b << 3)); + VXOR_V(v1, v1, v1); + VEXTRINS_D(v1, v0, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + FST_D(v0, ed, fixedaddress); + SMWRITE2(); + } + break; case 0xEF: INST_NAME("PXOR Gx,Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c index d3affee68..dc6995e49 100644 --- a/src/dynarec/la64/dynarec_la64_emit_logic.c +++ b/src/dynarec/la64/dynarec_la64_emit_logic.c @@ -21,6 +21,42 @@ #include "dynarec_la64_functions.h" #include "dynarec_la64_helper.h" +// emit XOR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) +void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + IFX (X_PEND) { + SET_DF(s4, d_xor8); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + IFXA (X_ALL, la64_lbt) { + X64_XOR_B(s1, s2); + } + + XOR(s1, s1, s2); + ANDI(s1, s1, 0xff); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) return; + + CLEAR_FLAGS(s3); + IFX (X_SF) { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} // emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index 4c5f194f7..1884ea9e5 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -399,6 +399,77 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i } } +// emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + ST_H(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_add16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + IFXA (X_AF | X_OF, !la64_lbt) { + OR(s3, s1, s2); // s3 = op1 | op2 + AND(s4, s1, s2); // s4 = op1 & op2 + } + + IFXA (X_ALL, la64_lbt) { + X64_ADD_DU(s1, s2); + } + + ADD_D(s1, s1, s2); + + IFX (X_PEND) { + ST_W(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) { + BSTRPICK_D(s1, s1, 15, 0); + return; + } + + CLEAR_FLAGS(s5); + IFX (X_AF | X_OF) { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX (X_AF) { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + SRLI_D(s3, s3, 14); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + + IFX (X_CF) { + SRLI_D(s3, s1, 16); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + BSTRPICK_D(s1, s1, 15, 0); + + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_SF) { + SRLI_D(s3, s1, 15); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { @@ -454,6 +525,51 @@ void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i emit_sub8(dyn, ninst, s1, s2, s3, s4, s5); } +// emit SUB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + ST_H(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_sub16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + IFXA (X_AF | X_CF | X_OF, !la64_lbt) { + // for later flag calculation + NOR(s5, xZR, s1); + } + + IFXA (X_ALL, la64_lbt) { + X64_SUB_H(s1, s2); + } + + SUB_W(s1, s1, s2); + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) return; + + CLEAR_FLAGS(s3); + SLLI_D(s1, s1, 48); + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI_D(s1, s1, 48); + + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16); + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index 64c806f9c..12616391a 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -52,6 +52,28 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETREX(); switch (opcode) { + case 0x01: + INST_NAME("LOCK ADD Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + SMDMB(); + if ((nextop & 0xC0) == 0xC0) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + LLxw(x1, wback, 0); + ADDxw(x4, x1, gd); + SCxw(x4, wback, 0); + BEQZ_MARKLOCK(x4); + IFX (X_ALL | X_PEND) { + emit_add32(dyn, ninst, rex, x1, gd, x3, x4, x5); + } + } + SMDMB(); + break; case 0x0F: nextop = F8; switch (nextop) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 2ec42225f..d6b9629ab 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -693,6 +693,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_add32c STEPNAME(emit_add32c) #define emit_add8 STEPNAME(emit_add8) #define emit_add8c STEPNAME(emit_add8c) +#define emit_add16 STEPNAME(emit_add16) +#define emit_sub16 STEPNAME(emit_sub16) #define emit_sub32 STEPNAME(emit_sub32) #define emit_sub32c STEPNAME(emit_sub32c) #define emit_sub8 STEPNAME(emit_sub8) @@ -705,6 +707,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_or32c STEPNAME(emit_or32c) #define emit_or8 STEPNAME(emit_or8) #define emit_or8c STEPNAME(emit_or8c) +#define emit_xor8 STEPNAME(emit_xor8) #define emit_xor8c STEPNAME(emit_xor8c) #define emit_xor32 STEPNAME(emit_xor32) #define emit_xor32c STEPNAME(emit_xor32c) @@ -768,6 +771,8 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4); +void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); @@ -780,6 +785,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_or8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4); +void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index eccf62ff4..04dd5ccd9 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -564,19 +564,19 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) return buff; } if (isMask(opcode, "0010101110iiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.D", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.D", Ft[Rd], Xt[Rj], signExtend(imm, 12)); return buff; } if (isMask(opcode, "0010101100iiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.S", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.S", Ft[Rd], Xt[Rj], signExtend(imm, 12)); return buff; } if (isMask(opcode, "0010101111iiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.D", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.D", Ft[Rd], Xt[Rj], signExtend(imm, 12)); return buff; } if (isMask(opcode, "0010101101iiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.S", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.S", Ft[Rd], Xt[Rj], signExtend(imm, 12)); return buff; } if (isMask(opcode, "00000001000000001kkkkkjjjjjddddd", &a)) {