Skip to content

Commit

Permalink
[ARM64_DYNAREC] Added AVX.66.0F38 90/92 opcodes and added AVX.66.0F38…
Browse files Browse the repository at this point in the history
… 04 opcode
  • Loading branch information
ptitSeb committed Jun 10, 2024
1 parent 072241e commit 8b3cd23
Showing 1 changed file with 32 additions and 5 deletions.
37 changes: 32 additions & 5 deletions src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,33 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
if(!vex.l) YMM0(gd);
break;

case 0x04:
INST_NAME("PMADDUBSW Gx, Vx, Ex");
nextop = F8;
q0 = fpu_get_scratch(dyn, ninst);
q1 = fpu_get_scratch(dyn, ninst);
for(int l=0; l<1+vex.l; ++l) {
if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
if(v0==v1 || v0==v2) {
if(!l) d0 = fpu_get_scratch(dyn, ninst);
} else
d0 = v0;
UXTL_8(q0, v2); // this is unsigned, so 0 extended
SXTL_8(q1, v1); // this is signed
VMULQ_16(q0, q0, q1);
SADDLPQ_16(q1, q0);
UXTL2_8(q0, v2); // this is unsigned
SQXTN_16(d0, q1); // SQXTN reset the vector so need to grab the high part first
SXTL2_8(q1, v1); // this is signed
VMULQ_16(q0, q0, q1);
SADDLPQ_16(q0, q0);
SQXTN2_16(d0, q0);
if(v0!=d0)
VMOVQ(v0, d0);
}
if(!vex.l) YMM0(gd);
break;

case 0x08:
INST_NAME("VPSIGNB Gx, Vx, Ex");
nextop = F8;
Expand Down Expand Up @@ -957,18 +984,18 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
v2 = sse_get_reg(dyn, ninst, x1, vex.v, 1);
v1 = sse_get_reg(dyn, ninst, x1, eb2, 0);
} else {
v0 = ymm_get_reg(dyn, ninst, x1, gd, 1, vex.v, eb2, -1);
v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 1, gd, eb2, -1);
v1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1);
v0 = ymm_get_reg(dyn, ninst, x1, gd, 1, vex.v, (!rex.w)?eb2:-1, -1);
v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 1, gd, (!rex.w)?eb2:-1, -1);
if(!rex.w) v1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1);
}
// prepare mask
if(rex.w) VSSHRQ_64(v2, v2, 63); else VSSHRQ_32(v2, v2, 31); // prescale the values
if(wb1) VSHLQ_32(q1, v1, wb1); else q1 = v1;
if(wb1) { if(!l || !rex.w) VSHLQ_32(q1, v1, wb1); } else q1 = v1;
// slow gather, not much choice here...
if(rex.w) for(int i=0; i<2; ++i) {
VMOVQDto(x4, v2, i);
TBZ(x4, 0, 4+4*4);
SMOVQSto(x4, q1, i);
SMOVQSto(x4, q1, i+l*2);
ADDx_REG(x4, x4, ed);
VLD1_64(v0, i, x4);
VMOVQDfrom(v2, i, xZR);
Expand Down

0 comments on commit 8b3cd23

Please sign in to comment.