From 51911dfbbe15ca7aa20a8dd0afd30f4395061417 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 21 Jun 2024 05:03:30 -0700 Subject: [PATCH] AVX128: Implement support for VPCLMULQDQ This is just the 128-bit version twice. --- .../Interface/Core/OpcodeDispatcher.cpp | 4 ++- .../Source/Interface/Core/OpcodeDispatcher.h | 2 ++ .../Core/OpcodeDispatcher/AVX_128.cpp | 29 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 7c4b4285ea..4ee31213ac 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5415,13 +5415,15 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { if (CTX->HostFeatures.SupportsSVE256) { InstallToTable(FEXCore::X86Tables::VEXTableOps, AVXTable); InstallToTable(FEXCore::X86Tables::VEXTableGroupOps, VEXTableGroupOps); + if (CTX->HostFeatures.SupportsPMULL_128Bit) { + InstallToTable(FEXCore::X86Tables::VEXTableOps, VEX_PCLMUL); + } } else if (CTX->HostFeatures.SupportsAVX) { InstallAVX128Handlers(); } if (CTX->HostFeatures.SupportsPMULL_128Bit) { InstallToTable(FEXCore::X86Tables::H0F3ATableOps, H0F3A_PCLMUL); - InstallToTable(FEXCore::X86Tables::VEXTableOps, VEX_PCLMUL); } Initialized = true; } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index e862ee1d62..687aad13e5 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1193,6 +1193,8 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_VPERMD(OpcodeArgs); + void AVX128_VPCLMULQDQ(OpcodeArgs); + // End of AVX 128-bit implementation void InvalidOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index e204a90781..f41d640933 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -402,6 +402,12 @@ void OpDispatchBuilder::InstallAVX128Handlers() { }; #undef OPD +#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode)) + constexpr std::tuple VEX128_PCLMUL[] = { + {OPD(3, 0b01, 0x44), 1, &OpDispatchBuilder::AVX128_VPCLMULQDQ}, + }; +#undef OPD + auto InstallToTable = [](auto& FinalTable, auto& LocalTable) { for (auto Op : LocalTable) { auto OpNum = std::get<0>(Op); @@ -415,6 +421,10 @@ void OpDispatchBuilder::InstallAVX128Handlers() { InstallToTable(FEXCore::X86Tables::VEXTableOps, AVX128Table); InstallToTable(FEXCore::X86Tables::VEXTableGroupOps, VEX128TableGroupOps); + if (CTX->HostFeatures.SupportsPMULL_128Bit) { + InstallToTable(FEXCore::X86Tables::VEXTableOps, VEX128_PCLMUL); + } + SaveAVXStateFunc = &OpDispatchBuilder::AVX128_SaveAVXState; RestoreAVXStateFunc = &OpDispatchBuilder::AVX128_RestoreAVXState; DefaultAVXStateFunc = &OpDispatchBuilder::AVX128_DefaultAVXState; @@ -2862,4 +2872,23 @@ void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } +void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) { + LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Selector needs to be literal here"); + + const auto Size = GetDstSize(Op); + const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; + + auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); + auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit); + + const auto Selector = static_cast(Op->Src[2].Data.Literal.Value); + + RefPair Result {}; + Result.Low = _PCLMUL(OpSize::i128Bit, Src1.Low, Src2.Low, Selector); + if (!Is128Bit) { + Result.High = _PCLMUL(OpSize::i128Bit, Src1.High, Src2.High, Selector); + } + AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); +} + } // namespace FEXCore::IR