From 8be871a464d39c8b5f7f8b4f5fe60b3239b61a8d Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 19 Jun 2024 07:54:22 -0700 Subject: [PATCH] AVX128: Implement support for vblend{ps,pd}/vpblendvb --- .../Source/Interface/Core/OpcodeDispatcher.h | 3 ++ .../Core/OpcodeDispatcher/AVX_128.cpp | 40 +++++++++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 43865fd6a1..cbc7447af3 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1175,6 +1175,9 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_MASKMOV(OpcodeArgs); + template + void AVX128_VectorVariableBlend(OpcodeArgs); + // End of AVX 128-bit implementation void InvalidOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index 8fbb71c9bd..073d8dcff5 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -368,9 +368,9 @@ void OpDispatchBuilder::InstallAVX128Handlers() { // TODO: {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op}, - // TODO: {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::AVXVectorVariableBlend<4>}, - // TODO: {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::AVXVectorVariableBlend<8>}, - // TODO: {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::AVXVectorVariableBlend<1>}, + {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend<4>}, + {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend<8>}, + {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend<1>}, {OPD(3, 0b01, 0x60), 1, &OpDispatchBuilder::AVX128_VPCMPESTRM}, {OPD(3, 0b01, 0x61), 1, &OpDispatchBuilder::AVX128_VPCMPESTRI}, @@ -2623,4 +2623,38 @@ void OpDispatchBuilder::AVX128_MASKMOV(OpcodeArgs) { _StoreMem(FPRClass, Size, MemDest, XMMReg, 1); } +template +void OpDispatchBuilder::AVX128_VectorVariableBlend(OpcodeArgs) { + const auto Size = GetSrcSize(Op); + const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; + + constexpr auto ElementSizeBits = ElementSize * 8; + + auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); + auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit); + + LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal here"); + const auto Src3Selector = Op->Src[2].Data.Literal.Value; + + // Mask register is encoded within bits [7:4] of the selector + RefPair Mask {.Low = AVX128_LoadXMMRegister((Src3Selector >> 4) & 0b1111, false)}; + + if (!Is128Bit) { + Mask.High = AVX128_LoadXMMRegister((Src3Selector >> 4) & 0b1111, true); + } + + auto Convert = [this](Ref Src1, Ref Src2, Ref Mask) { + Ref Shifted = _VSShrI(OpSize::i128Bit, ElementSize, Mask, ElementSizeBits - 1); + return _VBSL(OpSize::i128Bit, Shifted, Src2, Src1); + }; + + RefPair Result {}; + Result.Low = Convert(Src1.Low, Src2.Low, Mask.Low); + if (!Is128Bit) { + Result.High = Convert(Src1.High, Src2.High, Mask.High); + } + + AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); +} + } // namespace FEXCore::IR