diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index ecde3e8591..46e4f5efb1 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1006,6 +1006,8 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_VMOVDDUP(OpcodeArgs); void AVX128_VMOVSLDUP(OpcodeArgs); void AVX128_VMOVSHDUP(OpcodeArgs); + template + void AVX128_VBROADCAST(OpcodeArgs); // End of AVX 128-bit implementation diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index c56172df38..f434d56a8f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -270,9 +270,9 @@ void OpDispatchBuilder::InstallAVX128Handlers() { // TODO: {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::VPERMDOp}, // TODO: {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::PTestOp}, - // TODO: {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::VBROADCASTOp<4>}, - // TODO: {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::VBROADCASTOp<8>}, - // TODO: {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::VBROADCASTOp<16>}, + {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::AVX128_VBROADCAST<4>}, + {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::AVX128_VBROADCAST<8>}, + {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::AVX128_VBROADCAST<16>}, {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::AVX128_VectorUnary}, {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::AVX128_VectorUnary}, {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::AVX128_VectorUnary}, @@ -317,12 +317,12 @@ void OpDispatchBuilder::InstallAVX128Handlers() { // TODO: {OPD(2, 0b01, 0x46), 1, &OpDispatchBuilder::VPSRAVDOp}, // TODO: {OPD(2, 0b01, 0x47), 1, &OpDispatchBuilder::VPSLLVOp}, - // TODO: {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::VBROADCASTOp<4>}, - // TODO: {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::VBROADCASTOp<8>}, - // TODO: {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::VBROADCASTOp<16>}, + {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::AVX128_VBROADCAST<4>}, + {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::AVX128_VBROADCAST<8>}, + {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::AVX128_VBROADCAST<16>}, - // TODO: {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::VBROADCASTOp<1>}, - // TODO: {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::VBROADCASTOp<2>}, + {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::AVX128_VBROADCAST<1>}, + {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::AVX128_VBROADCAST<2>}, // TODO: {OPD(2, 0b01, 0x8C), 1, &OpDispatchBuilder::VPMASKMOVOp}, // TODO: {OPD(2, 0b01, 0x8E), 1, &OpDispatchBuilder::VPMASKMOVOp}, @@ -895,4 +895,31 @@ void OpDispatchBuilder::AVX128_VMOVSHDUP(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src); } +template +void OpDispatchBuilder::AVX128_VBROADCAST(OpcodeArgs) { + const auto DstSize = GetDstSize(Op); + const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; + RefPair Src {}; + + if (Op->Src[0].IsGPR()) { + Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); + if (ElementSize != OpSize::i128Bit) { + // Only duplicate if not VBROADCASTF128. + Src.Low = _VDupElement(OpSize::i128Bit, ElementSize, Src.Low, 0); + } + } else { + // Get the address to broadcast from into a GPR. + Ref Address = MakeSegmentAddress(Op, Op->Src[0], CTX->GetGPRSize()); + Src.Low = _VBroadcastFromMem(OpSize::i128Bit, ElementSize, Address); + } + + if (Is128Bit) { + Src.High = _LoadNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + } else { + Src.High = Src.Low; + } + + AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src); +} + } // namespace FEXCore::IR