diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index f246d5bf7e..5a2a624774 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1079,6 +1079,9 @@ class OpDispatchBuilder final : public IREmitter { template void AVX128_VPHSUB(OpcodeArgs); + Ref AVX128_PHSUBSWImpl(Ref Src1, Ref Src2); + void AVX128_VPHSUBSW(OpcodeArgs); + // End of AVX 128-bit implementation void InvalidOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index 4d814c3322..3513068ec8 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -257,7 +257,7 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::AVX128_VPHSUB<2>}, {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::AVX128_VPHSUB<4>}, - // TODO: {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::VPHSUBSWOp}, + {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::AVX128_VPHSUBSW}, {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::AVX128_VPSIGN<1>}, {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::AVX128_VPSIGN<2>}, @@ -1791,4 +1791,18 @@ void OpDispatchBuilder::AVX128_VPHSUB(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](size_t _ElementSize, Ref Src1, Ref Src2) { return AVX128_PHSUBImpl(Src1, Src2, _ElementSize); }); } +Ref OpDispatchBuilder::AVX128_PHSUBSWImpl(Ref Src1, Ref Src2) { + const uint8_t ElementSize = 2; + + auto Even = _VUnZip(OpSize::i128Bit, ElementSize, Src1, Src2); + auto Odd = _VUnZip2(OpSize::i128Bit, ElementSize, Src1, Src2); + + // Saturate back down to the result + return _VSQSub(OpSize::i128Bit, ElementSize, Even, Odd); +} + +void OpDispatchBuilder::AVX128_VPHSUBSW(OpcodeArgs) { + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, [this](size_t _ElementSize, Ref Src1, Ref Src2) { return AVX128_PHSUBSWImpl(Src1, Src2); }); +} + } // namespace FEXCore::IR