Skip to content

Commit

Permalink
Added more MMX opcodes and some optimizations too (#1537)
Browse files Browse the repository at this point in the history
* [RV64_DYNAREC] Added 0F DD PADDUSW opcode and optimized 66 0F DD PADDUSW opcode

* [RV64_DYNAREC] Added 0F 3A 0F PALIGNR opcode

* [RV64_DYNAREC] Optimized 66 0F 3A 0F PALIGNR opcode
  • Loading branch information
xctan authored May 30, 2024
1 parent 22bc687 commit 98d9f36
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 22 deletions.
49 changes: 49 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
case 0x3A: // more SSE3 opcodes
opcode = F8;
switch (opcode) {
case 0x0F:
INST_NAME("PALIGNR Gm, Em, Ib");
nextop = F8;
GETGM();
GETEM(x2, 1);
u8 = F8;
if (u8 > 15) {
SD(xZR, gback, gdoffset);
} else if (u8 > 7) {
if (u8 > 8) {
LD(x1, gback, gdoffset);
SRLI(x1, x1, (u8 - 8) * 8);
SD(x1, gback, gdoffset);
}
} else {
if (u8 > 0) {
LD(x3, wback, fixedaddress);
LD(x1, gback, gdoffset);
SRLI(x3, x3, u8 * 8);
SLLI(x1, x1, (8 - u8) * 8);
OR(x1, x1, x3);
SD(x1, gback, gdoffset);
} else {
LD(x1, wback, fixedaddress);
SD(x1, gback, gdoffset);
}
}
break;
case 0xCC:
INST_NAME("SHA1RNDS4 Gx, Ex, Ib");
nextop = F8;
Expand Down Expand Up @@ -2036,6 +2064,27 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SB(x3, gback, gdoffset + i);
}
break;
case 0xDD:
INST_NAME("PADDUSW Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
MOV32w(x5, 65535);
for (int i = 0; i < 4; ++i) {
// tmp32s = (int32_t)GX->uw[i] + EX->uw[i];
// GX->uw[i] = (tmp32s>65535)?65535:tmp32s;
LHU(x3, gback, gdoffset + i * 2);
LHU(x4, wback, fixedaddress + i * 2);
ADDW(x3, x3, x4);
if (rv64_zbb) {
MINU(x3, x3, x5);
} else {
BGE(x5, x3, 8); // tmp32s <= 65535?
MV(x3, x5);
}
SH(x3, gback, gdoffset + i * 2);
}
break;
case 0xE2:
INST_NAME("PSRAD Gm, Em");
nextop = F8;
Expand Down
83 changes: 61 additions & 22 deletions src/dynarec/rv64/dynarec_rv64_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
nextop = F8;
GETGX();
GETEX(x2, 0);
sse_forget_reg(dyn, ninst, x5);

ADDI(x5, xEmu, offsetof(x64emu_t, scratch));

Expand Down Expand Up @@ -1170,28 +1169,64 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETGX();
GETEX(x2, 1);
u8 = F8;
sse_forget_reg(dyn, ninst, x5);
ADDI(x5, xEmu, offsetof(x64emu_t, scratch));
// perserve gd
LD(x3, gback, gdoffset + 0);
LD(x4, gback, gdoffset + 8);
SD(x3, x5, 0);
SD(x4, x5, 8);
if (u8 > 31) {
SD(xZR, gback, gdoffset + 0);
SD(xZR, gback, gdoffset + 8);
} else if (u8 > 23) {
LD(x5, gback, gdoffset + 8);
if (u8 > 24) {
SRLI(x5, x5, 8 * (u8 - 24));
}
SD(x5, gback, gdoffset + 0);
SD(xZR, gback, gdoffset + 8);
} else if (u8 > 15) {
if (u8 > 16) {
LD(x5, gback, gdoffset + 8);
LD(x4, gback, gdoffset + 0);
SRLI(x3, x5, 8 * (u8 - 16)); // lower of higher 64 bits
SLLI(x5, x5, 8 * (24 - u8)); // higher of lower 64 bits
SD(x3, gback, gdoffset + 8);
SRLI(x4, x4, 8 * (u8 - 16)); // lower of lower 64 bits
OR(x4, x4, x5); // lower 64 bits
SD(x4, gback, gdoffset + 0);
}
} else if (u8 > 7) {
if (u8 > 8) {
LD(x5, gback, gdoffset + 8);
LD(x4, gback, gdoffset + 0);
LD(x3, wback, fixedaddress + 8);
SLLI(x5, x5, 8 * (16 - u8)); // higher of higher 64 bits
SRLI(x1, x4, 8 * (u8 - 8)); // lower of higher 64 bits
SLLI(x4, x4, 8 * (16 - u8)); // higher of lower 64 bits
OR(x5, x1, x5); // higher 64 bits
SRLI(x3, x3, 8 * (u8 - 8)); // lower of lower 64 bits
SD(x5, gback, gdoffset + 8);
OR(x4, x4, x3); // lower 64 bits
SD(x4, gback, gdoffset + 0);
} else {
LD(x5, gback, gdoffset + 0);
LD(x4, wback, fixedaddress + 8);
SD(x5, gback, gdoffset + 8);
SD(x4, gback, gdoffset + 0);
}
} else {
for (int i = 0; i < 16; ++i, ++u8) {
if (u8 > 15) {
if (u8 > 31) {
SB(xZR, gback, gdoffset + i);
continue;
} else
LBU(x3, x5, u8 - 16);
} else {
LBU(x3, wback, fixedaddress + u8);
}
SB(x3, gback, gdoffset + i);
if (u8 > 0) {
LD(x5, gback, gdoffset + 0);
LD(x4, wback, fixedaddress + 8);
LD(x3, wback, fixedaddress + 0);
SLLI(x5, x5, 8 * (8 - u8)); // higher of higher 64 bits
SRLI(x1, x4, 8 * (u8 - 0)); // lower of higher 64 bits
SLLI(x4, x4, 8 * (8 - u8)); // higher of lower 64 bits
OR(x5, x1, x5); // higher 64 bits
SRLI(x3, x3, 8 * (u8 - 0)); // lower of lower 64 bits
SD(x5, gback, gdoffset + 8);
OR(x4, x4, x3); // lower 64 bits
SD(x4, gback, gdoffset + 0);
} else {
LD(x5, wback, fixedaddress + 8);
LD(x4, wback, fixedaddress + 0);
SD(x5, gback, gdoffset + 8);
SD(x4, gback, gdoffset + 0);
}
}
break;
Expand Down Expand Up @@ -2776,15 +2811,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
nextop = F8;
GETGX();
GETEX(x2, 0);
MOV32w(x5, 65535);
for (int i = 0; i < 8; ++i) {
// tmp32s = (int32_t)GX->uw[i] + EX->uw[i];
// GX->uw[i] = (tmp32s>65535)?65535:tmp32s;
LHU(x3, gback, gdoffset + i * 2);
LHU(x4, wback, fixedaddress + i * 2);
ADDW(x3, x3, x4);
MOV32w(x4, 65536);
BLT(x3, x4, 8);
ADDIW(x3, x4, -1);
if (rv64_zbb) {
MINU(x3, x3, x5);
} else {
BGE(x5, x3, 8); // tmp32s <= 65535?
MV(x3, x5);
}
SH(x3, gback, gdoffset + i * 2);
}
break;
Expand Down

0 comments on commit 98d9f36

Please sign in to comment.