From ca43e2a61cfa82874a72633e42010cd16118137c Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:39:45 -0700 Subject: [PATCH 01/13] OpcodeDispatcher: Convert PUNPCKL to Bind handler --- .../Interface/Core/OpcodeDispatcher.cpp | 30 +++++++++---------- .../Source/Interface/Core/OpcodeDispatcher.h | 6 ++-- .../Core/OpcodeDispatcher/Vector.cpp | 16 ++-------- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 9ac6a881f5..d26267cd61 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5033,8 +5033,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::VMOVLPOp}, {OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::VMOVLPOp}, - {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::VPUNPCKLOp<4>}, - {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::VPUNPCKLOp<8>}, + {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 4>}, + {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::VPUNPCKHOp<4>}, {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::VPUNPCKHOp<8>}, @@ -5132,9 +5132,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::VPUNPCKLOp<1>}, - {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::VPUNPCKLOp<2>}, - {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::VPUNPCKLOp<4>}, + {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 1>}, + {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 2>}, + {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 4>}, {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::VPACKSSOp<2>}, {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 1>}, {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 2>}, @@ -5144,7 +5144,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::VPUNPCKHOp<2>}, {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::VPUNPCKHOp<4>}, {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::VPACKSSOp<4>}, - {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::VPUNPCKLOp<8>}, + {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::VPUNPCKHOp<8>}, {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>}, @@ -5637,7 +5637,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { // SSE {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x12, 2, &OpDispatchBuilder::MOVLPOp}, - {0x14, 1, &OpDispatchBuilder::PUNPCKLOp<4>}, + {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, {0x15, 1, &OpDispatchBuilder::PUNPCKHOp<4>}, {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp}, @@ -5662,9 +5662,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, 4>}, {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, 4>}, {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, 4>}, - {0x60, 1, &OpDispatchBuilder::PUNPCKLOp<1>}, - {0x61, 1, &OpDispatchBuilder::PUNPCKLOp<2>}, - {0x62, 1, &OpDispatchBuilder::PUNPCKLOp<4>}, + {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 1>}, + {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 2>}, + {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, {0x63, 1, &OpDispatchBuilder::PACKSSOp<2>}, {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 1>}, {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 2>}, @@ -5925,7 +5925,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { constexpr std::tuple OpSizeModOpTable[] = { {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x12, 2, &OpDispatchBuilder::MOVLPOp}, - {0x14, 1, &OpDispatchBuilder::PUNPCKLOp<8>}, + {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 8>}, {0x15, 1, &OpDispatchBuilder::PUNPCKHOp<8>}, {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, {0x19, 7, &OpDispatchBuilder::NOPOp}, @@ -5951,9 +5951,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, 8>}, {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, 8>}, {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, 8>}, - {0x60, 1, &OpDispatchBuilder::PUNPCKLOp<1>}, - {0x61, 1, &OpDispatchBuilder::PUNPCKLOp<2>}, - {0x62, 1, &OpDispatchBuilder::PUNPCKLOp<4>}, + {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 1>}, + {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 2>}, + {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, {0x63, 1, &OpDispatchBuilder::PACKSSOp<2>}, {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 1>}, {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 2>}, @@ -5963,7 +5963,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x69, 1, &OpDispatchBuilder::PUNPCKHOp<2>}, {0x6A, 1, &OpDispatchBuilder::PUNPCKHOp<4>}, {0x6B, 1, &OpDispatchBuilder::PACKSSOp<4>}, - {0x6C, 1, &OpDispatchBuilder::PUNPCKLOp<8>}, + {0x6C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 8>}, {0x6D, 1, &OpDispatchBuilder::PUNPCKHOp<8>}, {0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>}, {0x6F, 1, &OpDispatchBuilder::MOVVectorAlignedOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 067fcbb7b8..e0a554826d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -464,8 +464,7 @@ class OpDispatchBuilder final : public IREmitter { template void MOVMSKOp(OpcodeArgs); void MOVMSKOpOne(OpcodeArgs); - template - void PUNPCKLOp(OpcodeArgs); + void PUNPCKLOp(OpcodeArgs, size_t ElementSize); template void PUNPCKHOp(OpcodeArgs); void PSHUFBOp(OpcodeArgs); @@ -724,8 +723,7 @@ class OpDispatchBuilder final : public IREmitter { template void VPUNPCKHOp(OpcodeArgs); - template - void VPUNPCKLOp(OpcodeArgs); + void VPUNPCKLOp(OpcodeArgs, size_t ElementSize); template void VPSRLIOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 7b82f51359..3b1e70e35f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -780,8 +780,7 @@ void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) { StoreResult(GPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs) { +void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs, size_t ElementSize) { auto Size = GetSrcSize(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); @@ -791,13 +790,7 @@ void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs) { StoreResult(FPRClass, Op, ALUOp, -1); } -template void OpDispatchBuilder::PUNPCKLOp<1>(OpcodeArgs); -template void OpDispatchBuilder::PUNPCKLOp<2>(OpcodeArgs); -template void OpDispatchBuilder::PUNPCKLOp<4>(OpcodeArgs); -template void OpDispatchBuilder::PUNPCKLOp<8>(OpcodeArgs); - -template -void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs) { +void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, size_t ElementSize) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -817,11 +810,6 @@ void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPUNPCKLOp<1>(OpcodeArgs); -template void OpDispatchBuilder::VPUNPCKLOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPUNPCKLOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPUNPCKLOp<8>(OpcodeArgs); - template void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs) { auto Size = GetSrcSize(Op); From a6ab2ca30d0cf5805f1630c19ca33b6b4ac619fa Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:42:24 -0700 Subject: [PATCH 02/13] OpcodeDispatcher: Convert PUNPCKH to Bind handler --- .../Interface/Core/OpcodeDispatcher.cpp | 30 +++++++++---------- .../Source/Interface/Core/OpcodeDispatcher.h | 6 ++-- .../Core/OpcodeDispatcher/Vector.cpp | 16 ++-------- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index d26267cd61..2afb410848 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5036,8 +5036,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 4>}, {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, - {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::VPUNPCKHOp<4>}, - {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::VPUNPCKHOp<8>}, + {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 4>}, + {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 8>}, {OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::VMOVHPOp}, {OPD(1, 0b01, 0x16), 1, &OpDispatchBuilder::VMOVHPOp}, @@ -5140,12 +5140,12 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 2>}, {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 4>}, {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::VPACKUSOp<2>}, - {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::VPUNPCKHOp<1>}, - {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::VPUNPCKHOp<2>}, - {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::VPUNPCKHOp<4>}, + {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 1>}, + {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 2>}, + {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 4>}, {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::VPACKSSOp<4>}, {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, - {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::VPUNPCKHOp<8>}, + {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 8>}, {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>}, {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp}, @@ -5638,7 +5638,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x12, 2, &OpDispatchBuilder::MOVLPOp}, {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, - {0x15, 1, &OpDispatchBuilder::PUNPCKHOp<4>}, + {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 4>}, {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp}, {0x2A, 1, &OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float}, @@ -5670,9 +5670,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 2>}, {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 4>}, {0x67, 1, &OpDispatchBuilder::PACKUSOp<2>}, - {0x68, 1, &OpDispatchBuilder::PUNPCKHOp<1>}, - {0x69, 1, &OpDispatchBuilder::PUNPCKHOp<2>}, - {0x6A, 1, &OpDispatchBuilder::PUNPCKHOp<4>}, + {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 1>}, + {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 2>}, + {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 4>}, {0x6B, 1, &OpDispatchBuilder::PACKSSOp<4>}, {0x70, 1, &OpDispatchBuilder::PSHUFW8ByteOp}, @@ -5926,7 +5926,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x12, 2, &OpDispatchBuilder::MOVLPOp}, {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 8>}, - {0x15, 1, &OpDispatchBuilder::PUNPCKHOp<8>}, + {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 8>}, {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, {0x19, 7, &OpDispatchBuilder::NOPOp}, {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp}, @@ -5959,12 +5959,12 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 2>}, {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 4>}, {0x67, 1, &OpDispatchBuilder::PACKUSOp<2>}, - {0x68, 1, &OpDispatchBuilder::PUNPCKHOp<1>}, - {0x69, 1, &OpDispatchBuilder::PUNPCKHOp<2>}, - {0x6A, 1, &OpDispatchBuilder::PUNPCKHOp<4>}, + {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 1>}, + {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 2>}, + {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 4>}, {0x6B, 1, &OpDispatchBuilder::PACKSSOp<4>}, {0x6C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 8>}, - {0x6D, 1, &OpDispatchBuilder::PUNPCKHOp<8>}, + {0x6D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 8>}, {0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>}, {0x6F, 1, &OpDispatchBuilder::MOVVectorAlignedOp}, {0x70, 1, &OpDispatchBuilder::PSHUFDOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index e0a554826d..b49d93bfa4 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -465,8 +465,7 @@ class OpDispatchBuilder final : public IREmitter { void MOVMSKOp(OpcodeArgs); void MOVMSKOpOne(OpcodeArgs); void PUNPCKLOp(OpcodeArgs, size_t ElementSize); - template - void PUNPCKHOp(OpcodeArgs); + void PUNPCKHOp(OpcodeArgs, size_t ElementSize); void PSHUFBOp(OpcodeArgs); template void PSHUFWOp(OpcodeArgs); @@ -720,8 +719,7 @@ class OpDispatchBuilder final : public IREmitter { void VPSRLDOp(OpcodeArgs); void VPSRLDQOp(OpcodeArgs); - template - void VPUNPCKHOp(OpcodeArgs); + void VPUNPCKHOp(OpcodeArgs, size_t ElementSize); void VPUNPCKLOp(OpcodeArgs, size_t ElementSize); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 3b1e70e35f..c5766998ae 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -810,8 +810,7 @@ void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, size_t ElementSize) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs) { +void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs, size_t ElementSize) { auto Size = GetSrcSize(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -820,13 +819,7 @@ void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs) { StoreResult(FPRClass, Op, ALUOp, -1); } -template void OpDispatchBuilder::PUNPCKHOp<1>(OpcodeArgs); -template void OpDispatchBuilder::PUNPCKHOp<2>(OpcodeArgs); -template void OpDispatchBuilder::PUNPCKHOp<4>(OpcodeArgs); -template void OpDispatchBuilder::PUNPCKHOp<8>(OpcodeArgs); - -template -void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs) { +void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs, size_t ElementSize) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -846,11 +839,6 @@ void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPUNPCKHOp<1>(OpcodeArgs); -template void OpDispatchBuilder::VPUNPCKHOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPUNPCKHOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPUNPCKHOp<8>(OpcodeArgs); - Ref OpDispatchBuilder::GeneratePSHUFBMask(uint8_t SrcSize) { // PSHUFB doesn't 100% match VTBL behaviour // VTBL will set the element zero if the index is greater than From 1aff3acbb9f18fb4f3180d2652455f98cde446d5 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:45:04 -0700 Subject: [PATCH 03/13] OpcodeDispatcher: Convert PSHUFW to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 10 +++++----- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 6 ++---- .../Interface/Core/OpcodeDispatcher/Vector.cpp | 12 ++---------- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 2afb410848..b431141264 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5151,9 +5151,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp}, {OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp}, - {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::VPSHUFWOp<4, true>}, - {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::VPSHUFWOp<2, false>}, - {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::VPSHUFWOp<2, true>}, + {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, 4, true>}, + {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, 2, false>}, + {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, 2, true>}, {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 1>}, {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 2>}, @@ -5884,7 +5884,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x6F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, - {0x70, 1, &OpDispatchBuilder::PSHUFWOp}, + {0x70, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSHUFWOp, false>}, {0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>}, {0x7F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0xB8, 1, &OpDispatchBuilder::PopcountOp}, @@ -5912,7 +5912,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x70, 1, &OpDispatchBuilder::PSHUFWOp}, + {0x70, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSHUFWOp, true>}, {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, 4>}, {0x7D, 1, &OpDispatchBuilder::HSUBP<4>}, {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<4>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index b49d93bfa4..31e4b4491b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -467,8 +467,7 @@ class OpDispatchBuilder final : public IREmitter { void PUNPCKLOp(OpcodeArgs, size_t ElementSize); void PUNPCKHOp(OpcodeArgs, size_t ElementSize); void PSHUFBOp(OpcodeArgs); - template - void PSHUFWOp(OpcodeArgs); + void PSHUFWOp(OpcodeArgs, bool Low); void PSHUFW8ByteOp(OpcodeArgs); void PSHUFDOp(OpcodeArgs); template @@ -696,8 +695,7 @@ class OpDispatchBuilder final : public IREmitter { void VPSHUFBOp(OpcodeArgs); - template - void VPSHUFWOp(OpcodeArgs); + void VPSHUFWOp(OpcodeArgs, size_t ElementSize, bool Low); template void VPSLLOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index c5766998ae..ec16870d68 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -925,8 +925,7 @@ void OpDispatchBuilder::PSHUFW8ByteOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -template -void OpDispatchBuilder::PSHUFWOp(OpcodeArgs) { +void OpDispatchBuilder::PSHUFWOp(OpcodeArgs, bool Low) { constexpr auto IdentityCopy = 0b11'10'01'00; uint16_t Shuffle = Op->Src[1].Data.Literal.Value; @@ -976,9 +975,6 @@ void OpDispatchBuilder::PSHUFWOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -template void OpDispatchBuilder::PSHUFWOp(OpcodeArgs); -template void OpDispatchBuilder::PSHUFWOp(OpcodeArgs); - Ref OpDispatchBuilder::Single128Bit4ByteVectorShuffle(Ref Src, uint8_t Shuffle) { constexpr auto IdentityCopy = 0b11'10'01'00; @@ -1198,8 +1194,7 @@ void OpDispatchBuilder::PSHUFDOp(OpcodeArgs) { StoreResult(FPRClass, Op, Single128Bit4ByteVectorShuffle(Src, Shuffle), -1); } -template -void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs) { +void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, size_t ElementSize, bool Low) { const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; auto Shuffle = Op->Src[1].Literal(); @@ -1247,9 +1242,6 @@ void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSHUFWOp<2, false>(OpcodeArgs); -template void OpDispatchBuilder::VPSHUFWOp<2, true>(OpcodeArgs); -template void OpDispatchBuilder::VPSHUFWOp<4, true>(OpcodeArgs); Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle) { // Since 256-bit variants and up don't lane cross, we can construct From 0dd687a7a1b5b40eca80872397952a36cd59a0df Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:47:30 -0700 Subject: [PATCH 04/13] OpcodeDispatcher: Convert SHUFOp to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 8 ++++---- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 6 ++---- .../Source/Interface/Core/OpcodeDispatcher/Vector.cpp | 10 ++-------- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index b431141264..fbdc4099b3 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5180,8 +5180,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::VPINSRWOp}, {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::PExtrOp<2>}, - {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::VSHUFOp<4>}, - {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::VSHUFOp<8>}, + {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, 4>}, + {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, 8>}, {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<8>}, {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<4>}, @@ -5682,7 +5682,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x77, 1, &OpDispatchBuilder::X87EMMS}, {0xC2, 1, &OpDispatchBuilder::VFCMPOp<4>}, - {0xC6, 1, &OpDispatchBuilder::SHUFOp<4>}, + {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 4>}, {0xD1, 1, &OpDispatchBuilder::PSRLDOp<2>}, {0xD2, 1, &OpDispatchBuilder::PSRLDOp<4>}, @@ -5980,7 +5980,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xC2, 1, &OpDispatchBuilder::VFCMPOp<8>}, {0xC4, 1, &OpDispatchBuilder::PINSROp<2>}, {0xC5, 1, &OpDispatchBuilder::PExtrOp<2>}, - {0xC6, 1, &OpDispatchBuilder::SHUFOp<8>}, + {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 8>}, {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<8>}, {0xD1, 1, &OpDispatchBuilder::PSRLDOp<2>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 31e4b4491b..5813300c80 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -505,8 +505,7 @@ class OpDispatchBuilder final : public IREmitter { void LZCNT(OpcodeArgs); template void VFCMPOp(OpcodeArgs); - template - void SHUFOp(OpcodeArgs); + void SHUFOp(OpcodeArgs, size_t ElementSize); template void PINSROp(OpcodeArgs); void InsertPSOp(OpcodeArgs); @@ -724,8 +723,7 @@ class OpDispatchBuilder final : public IREmitter { template void VPSRLIOp(OpcodeArgs); - template - void VSHUFOp(OpcodeArgs); + void VSHUFOp(OpcodeArgs, size_t ElementSize); template void VTESTPOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index ec16870d68..86d68fbf13 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -1434,8 +1434,7 @@ Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t DstSize, size_t ElementSize return Dest; } -template -void OpDispatchBuilder::SHUFOp(OpcodeArgs) { +void OpDispatchBuilder::SHUFOp(OpcodeArgs, size_t ElementSize) { Ref Src1Node = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2Node = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); uint8_t Shuffle = Op->Src[1].Literal(); @@ -1443,11 +1442,8 @@ void OpDispatchBuilder::SHUFOp(OpcodeArgs) { Ref Result = SHUFOpImpl(Op, GetDstSize(Op), ElementSize, Src1Node, Src2Node, Shuffle); StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::SHUFOp<4>(OpcodeArgs); -template void OpDispatchBuilder::SHUFOp<8>(OpcodeArgs); -template -void OpDispatchBuilder::VSHUFOp(OpcodeArgs) { +void OpDispatchBuilder::VSHUFOp(OpcodeArgs, size_t ElementSize) { Ref Src1Node = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2Node = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); uint8_t Shuffle = Op->Src[2].Literal(); @@ -1455,8 +1451,6 @@ void OpDispatchBuilder::VSHUFOp(OpcodeArgs) { Ref Result = SHUFOpImpl(Op, GetDstSize(Op), ElementSize, Src1Node, Src2Node, Shuffle); StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VSHUFOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VSHUFOp<8>(OpcodeArgs); void OpDispatchBuilder::VANDNOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); From f98b18800c5692104366839e2be1f48928f612b3 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:49:57 -0700 Subject: [PATCH 05/13] OpcodeDispatcher: Convert MOVMSK to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 8 ++++---- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 3 +-- FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp | 6 +----- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index fbdc4099b3..4e44caef3f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5067,8 +5067,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<4>}, {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<8>}, - {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::MOVMSKOp<4>}, - {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::MOVMSKOp<8>}, + {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 4>}, + {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 8>}, {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, 4>}, {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, 8>}, @@ -5646,7 +5646,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, false>}, {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, true>}, {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<4>}, - {0x50, 1, &OpDispatchBuilder::MOVMSKOp<4>}, + {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 4>}, {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, 4>}, {0x52, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, 4>}, {0x53, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, 4>}, @@ -5937,7 +5937,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<8>}, {0x40, 16, &OpDispatchBuilder::CMOVOp}, - {0x50, 1, &OpDispatchBuilder::MOVMSKOp<8>}, + {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 8>}, {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, 8>}, {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, 16>}, {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 5813300c80..918a0f5b7f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -461,8 +461,7 @@ class OpDispatchBuilder final : public IREmitter { void MOVQOp(OpcodeArgs, VectorOpType VectorType); void MOVQMMXOp(OpcodeArgs); - template - void MOVMSKOp(OpcodeArgs); + void MOVMSKOp(OpcodeArgs, size_t ElementSize); void MOVMSKOpOne(OpcodeArgs); void PUNPCKLOp(OpcodeArgs, size_t ElementSize); void PUNPCKHOp(OpcodeArgs, size_t ElementSize); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 86d68fbf13..ad7c6244d9 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -702,8 +702,7 @@ void OpDispatchBuilder::MOVQMMXOp(OpcodeArgs) { StoreResult(FPRClass, Op, Src, 1); } -template -void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) { +void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, size_t ElementSize) { auto Size = GetSrcSize(Op); uint8_t NumElements = Size / ElementSize; @@ -752,9 +751,6 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) { } } -template void OpDispatchBuilder::MOVMSKOp<4>(OpcodeArgs); -template void OpDispatchBuilder::MOVMSKOp<8>(OpcodeArgs); - void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; From 25cc2bdcb82bfa73b04ef797c45d59d5ad364838 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:51:02 -0700 Subject: [PATCH 06/13] OpcodeDispatcher: Convert VPERMILImm to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 4 ++-- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 3 +-- FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp | 8 ++------ 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 4e44caef3f..0c37a8198e 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5371,8 +5371,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x00), 1, &OpDispatchBuilder::VPERMQOp}, {OPD(3, 0b01, 0x01), 1, &OpDispatchBuilder::VPERMQOp}, {OPD(3, 0b01, 0x02), 1, &OpDispatchBuilder::VPBLENDDOp}, - {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::VPERMILImmOp<4>}, - {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::VPERMILImmOp<8>}, + {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, 4>}, + {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, 8>}, {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::VPERM2Op}, {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVXVectorRound<4>}, {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVXVectorRound<8>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 918a0f5b7f..a6ba7704f5 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -658,8 +658,7 @@ class OpDispatchBuilder final : public IREmitter { void VPERMDOp(OpcodeArgs); void VPERMQOp(OpcodeArgs); - template - void VPERMILImmOp(OpcodeArgs); + void VPERMILImmOp(OpcodeArgs, size_t ElementSize); Ref VPERMILRegOpImpl(OpSize DstSize, size_t ElementSize, Ref Src, Ref Indices); template diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index ad7c6244d9..d0bd46612c 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -4847,8 +4847,7 @@ void OpDispatchBuilder::VZEROOp(OpcodeArgs) { } } -template -void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs) { +void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Selector = Op->Src[1].Literal() & 0xFF; @@ -4856,7 +4855,7 @@ void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = LoadZeroVector(DstSize); - if constexpr (ElementSize == 8) { + if (ElementSize == 8) { Result = _VInsElement(DstSize, ElementSize, 0, Selector & 0b0001, Result, Src); Result = _VInsElement(DstSize, ElementSize, 1, (Selector & 0b0010) >> 1, Result, Src); @@ -4881,9 +4880,6 @@ void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPERMILImmOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPERMILImmOp<8>(OpcodeArgs); - Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, size_t ElementSize, Ref Src, Ref Indices) { // NOTE: See implementation of VPERMD for the gist of what we do to make this work. // From 66520bce0a74d0aabe4cde049eb45327830837bf Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:52:41 -0700 Subject: [PATCH 07/13] OpcodeDispatcher: Convert VPACK{U,S}S to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 8 ++++---- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 6 ++---- .../Interface/Core/OpcodeDispatcher/Vector.cpp | 12 ++---------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 0c37a8198e..96bca89d05 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5135,15 +5135,15 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 1>}, {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 2>}, {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 4>}, - {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::VPACKSSOp<2>}, + {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, 2>}, {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 1>}, {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 2>}, {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 4>}, - {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::VPACKUSOp<2>}, + {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, 2>}, {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 1>}, {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 2>}, {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 4>}, - {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::VPACKSSOp<4>}, + {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, 4>}, {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 8>}, {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>}, @@ -5281,7 +5281,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::VPMULLOp<4, true>}, {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 8>}, {OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp}, - {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::VPACKUSOp<4>}, + {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, 4>}, {OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::VMASKMOVOp<4, false>}, {OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::VMASKMOVOp<8, false>}, {OPD(2, 0b01, 0x2E), 1, &OpDispatchBuilder::VMASKMOVOp<4, true>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index a6ba7704f5..f5ced804f4 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -637,11 +637,9 @@ class OpDispatchBuilder final : public IREmitter { void VMPSADBWOp(OpcodeArgs); - template - void VPACKSSOp(OpcodeArgs); + void VPACKSSOp(OpcodeArgs, size_t ElementSize); - template - void VPACKUSOp(OpcodeArgs); + void VPACKUSOp(OpcodeArgs, size_t ElementSize); void VPALIGNROp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index d0bd46612c..3183a36ab7 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -2974,8 +2974,7 @@ void OpDispatchBuilder::PACKUSOp(OpcodeArgs) { template void OpDispatchBuilder::PACKUSOp<2>(OpcodeArgs); template void OpDispatchBuilder::PACKUSOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPACKUSOp(OpcodeArgs) { +void OpDispatchBuilder::VPACKUSOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; @@ -2991,9 +2990,6 @@ void OpDispatchBuilder::VPACKUSOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPACKUSOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPACKUSOp<4>(OpcodeArgs); - template void OpDispatchBuilder::PACKSSOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); @@ -3006,8 +3002,7 @@ void OpDispatchBuilder::PACKSSOp(OpcodeArgs) { template void OpDispatchBuilder::PACKSSOp<2>(OpcodeArgs); template void OpDispatchBuilder::PACKSSOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPACKSSOp(OpcodeArgs) { +void OpDispatchBuilder::VPACKSSOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; @@ -3023,9 +3018,6 @@ void OpDispatchBuilder::VPACKSSOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPACKSSOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPACKSSOp<4>(OpcodeArgs); - Ref OpDispatchBuilder::PMULLOpImpl(OpSize Size, size_t ElementSize, bool Signed, Ref Src1, Ref Src2) { if (Size == OpSize::i64Bit) { if (Signed) { From 12b3c82d831660adde57e487e35e062b6a45cee3 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:54:26 -0700 Subject: [PATCH 08/13] OpcodeDispatcher: Convert PExtr to Bind handler --- .../Interface/Core/OpcodeDispatcher.cpp | 24 +++++++++---------- .../Source/Interface/Core/OpcodeDispatcher.h | 3 +-- .../Core/OpcodeDispatcher/Vector.cpp | 10 ++------ 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 96bca89d05..9345f16e3f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5178,7 +5178,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<8>}, {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::VPINSRWOp}, - {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::PExtrOp<2>}, + {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, 4>}, {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, 8>}, @@ -5383,10 +5383,10 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x0E), 1, &OpDispatchBuilder::VPBLENDWOp}, {OPD(3, 0b01, 0x0F), 1, &OpDispatchBuilder::VPALIGNROp}, - {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::PExtrOp<1>}, - {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::PExtrOp<2>}, - {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::PExtrOp<4>}, - {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::PExtrOp<4>}, + {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 1>}, + {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, + {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, + {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, {OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::VINSERTOp}, {OPD(3, 0b01, 0x19), 1, &OpDispatchBuilder::VEXTRACT128Op}, @@ -5631,7 +5631,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xC0, 2, &OpDispatchBuilder::XADDOp}, {0xC3, 1, &OpDispatchBuilder::MOVGPRNTOp}, {0xC4, 1, &OpDispatchBuilder::PINSROp<2>}, - {0xC5, 1, &OpDispatchBuilder::PExtrOp<2>}, + {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, {0xC8, 8, &OpDispatchBuilder::BSWAPOp}, // SSE @@ -5979,7 +5979,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x7F, 1, &OpDispatchBuilder::MOVVectorAlignedOp}, {0xC2, 1, &OpDispatchBuilder::VFCMPOp<8>}, {0xC4, 1, &OpDispatchBuilder::PINSROp<2>}, - {0xC5, 1, &OpDispatchBuilder::PExtrOp<2>}, + {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 8>}, {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<8>}, @@ -6757,11 +6757,11 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, {OPD(1, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, - {OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::PExtrOp<1>}, - {OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::PExtrOp<2>}, - {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::PExtrOp<4>}, - {OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::PExtrOp<8>}, - {OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::PExtrOp<4>}, + {OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 1>}, + {OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, + {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, + {OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 8>}, + {OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, {OPD(0, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<1>}, {OPD(0, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index f5ced804f4..4e6eac8413 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -508,8 +508,7 @@ class OpDispatchBuilder final : public IREmitter { template void PINSROp(OpcodeArgs); void InsertPSOp(OpcodeArgs); - template - void PExtrOp(OpcodeArgs); + void PExtrOp(OpcodeArgs, size_t ElementSize); template void PSIGN(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 3183a36ab7..2d7ba4e95f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -1618,8 +1618,7 @@ void OpDispatchBuilder::VINSERTPSOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PExtrOp(OpcodeArgs) { +void OpDispatchBuilder::PExtrOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -1630,7 +1629,7 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs) { // is the same except that REX.W or VEX.W is set to 1. Incredibly frustrating. // Use the destination size as the element size in this case. size_t OverridenElementSize = ElementSize; - if constexpr (ElementSize == 4) { + if (ElementSize == 4) { OverridenElementSize = DstSize; } @@ -1651,11 +1650,6 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs) { _VStoreVectorElement(16, OverridenElementSize, Src, Index, Dest); } -template void OpDispatchBuilder::PExtrOp<1>(OpcodeArgs); -template void OpDispatchBuilder::PExtrOp<2>(OpcodeArgs); -template void OpDispatchBuilder::PExtrOp<4>(OpcodeArgs); -template void OpDispatchBuilder::PExtrOp<8>(OpcodeArgs); - void OpDispatchBuilder::VEXTRACT128Op(OpcodeArgs) { const auto DstIsXMM = Op->Dest.IsGPR(); const auto StoreSize = DstIsXMM ? 32 : 16; From 7c5a9b5d6a1709072d4e379a0145135613307fb4 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:55:34 -0700 Subject: [PATCH 09/13] OpcodeDispatcher: Convert AVXVectorVariableBlend to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 6 +++--- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 3 +-- FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp | 8 ++------ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 9345f16e3f..9a8b703cc1 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5404,9 +5404,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op}, - {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::AVXVectorVariableBlend<4>}, - {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::AVXVectorVariableBlend<8>}, - {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::AVXVectorVariableBlend<1>}, + {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, 4>}, + {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, 8>}, + {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, 1>}, {OPD(3, 0b01, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp}, {OPD(3, 0b01, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 4e6eac8413..0f83d48962 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1392,8 +1392,7 @@ class OpDispatchBuilder final : public IREmitter { void AVXVectorALUOp(OpcodeArgs, IROps IROp, size_t ElementSize); void AVXVectorUnaryOp(OpcodeArgs, IROps IROp, size_t ElementSize); - template - void AVXVectorVariableBlend(OpcodeArgs); + void AVXVectorVariableBlend(OpcodeArgs, size_t ElementSize); void AVXVariableShiftImpl(OpcodeArgs, IROps IROp); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 2d7ba4e95f..ead9f7363f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -3996,10 +3996,9 @@ template void OpDispatchBuilder::VectorVariableBlend<1>(OpcodeArgs); template void OpDispatchBuilder::VectorVariableBlend<4>(OpcodeArgs); template void OpDispatchBuilder::VectorVariableBlend<8>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs) { +void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, size_t ElementSize) { const auto SrcSize = GetSrcSize(Op); - constexpr auto ElementSizeBits = ElementSize * 8; + const auto ElementSizeBits = ElementSize * 8; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); @@ -4012,9 +4011,6 @@ void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs) { Ref Result = _VBSL(SrcSize, Shifted, Src2, Src1); StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::AVXVectorVariableBlend<1>(OpcodeArgs); -template void OpDispatchBuilder::AVXVectorVariableBlend<4>(OpcodeArgs); -template void OpDispatchBuilder::AVXVectorVariableBlend<8>(OpcodeArgs); void OpDispatchBuilder::PTestOpImpl(OpSize Size, Ref Dest, Ref Src) { // Invalidate deferred flags early From b4093a88885b306150d3107e52d92bbbca5f4270 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:57:40 -0700 Subject: [PATCH 10/13] OpcodeDispatcher: Convert packed HSub to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 8 ++++---- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 6 ++---- .../Interface/Core/OpcodeDispatcher/Vector.cpp | 12 ++---------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 9a8b703cc1..11cebae1ee 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5163,8 +5163,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::VHADDPOp}, {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::VHADDPOp}, - {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::VHSUBPOp<8>}, - {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::VHSUBPOp<4>}, + {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, 8>}, + {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, 4>}, {OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>}, {OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>}, @@ -5248,8 +5248,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(2, 0b01, 0x03), 1, &OpDispatchBuilder::VPHADDSWOp}, {OPD(2, 0b01, 0x04), 1, &OpDispatchBuilder::VPMADDUBSWOp}, - {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::VPHSUBOp<2>}, - {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::VPHSUBOp<4>}, + {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, 2>}, + {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, 4>}, {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::VPHSUBSWOp}, {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::VPSIGN<1>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 0f83d48962..777fa2d69d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -612,8 +612,7 @@ class OpDispatchBuilder final : public IREmitter { template void VHADDPOp(OpcodeArgs); - template - void VHSUBPOp(OpcodeArgs); + void VHSUBPOp(OpcodeArgs, size_t ElementSize); void VINSERTOp(OpcodeArgs); void VINSERTPSOp(OpcodeArgs); @@ -663,8 +662,7 @@ class OpDispatchBuilder final : public IREmitter { void VPHADDSWOp(OpcodeArgs); - template - void VPHSUBOp(OpcodeArgs); + void VPHSUBOp(OpcodeArgs, size_t ElementSize); void VPHSUBSWOp(OpcodeArgs); void VPINSRBOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index ead9f7363f..540599c8b5 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -3450,8 +3450,7 @@ void OpDispatchBuilder::HSUBP(OpcodeArgs) { template void OpDispatchBuilder::HSUBP<4>(OpcodeArgs); template void OpDispatchBuilder::HSUBP<8>(OpcodeArgs); -template -void OpDispatchBuilder::VHSUBPOp(OpcodeArgs) { +void OpDispatchBuilder::VHSUBPOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; @@ -3468,9 +3467,6 @@ void OpDispatchBuilder::VHSUBPOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -template void OpDispatchBuilder::VHSUBPOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VHSUBPOp<8>(OpcodeArgs); - Ref OpDispatchBuilder::PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, size_t ElementSize) { auto Even = _VUnZip(Size, ElementSize, Src1, Src2); auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2); @@ -3488,8 +3484,7 @@ void OpDispatchBuilder::PHSUB(OpcodeArgs) { template void OpDispatchBuilder::PHSUB<2>(OpcodeArgs); template void OpDispatchBuilder::PHSUB<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPHSUBOp(OpcodeArgs) { +void OpDispatchBuilder::VPHSUBOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; @@ -3503,9 +3498,6 @@ void OpDispatchBuilder::VPHSUBOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPHSUBOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPHSUBOp<4>(OpcodeArgs); - Ref OpDispatchBuilder::PHADDSOpImpl(OpSize Size, Ref Src1, Ref Src2) { const uint8_t ElementSize = 2; From 57eacab6548e7828f1548939527924b720466b36 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 14:59:25 -0700 Subject: [PATCH 11/13] OpcodeDispatcher: Convert VBROADCASTOp to Bind handler --- .../Source/Interface/Core/OpcodeDispatcher.cpp | 16 ++++++++-------- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 3 +-- .../Interface/Core/OpcodeDispatcher/Vector.cpp | 9 +-------- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 11cebae1ee..6ba3cf8e0a 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5264,9 +5264,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(2, 0b01, 0x13), 1, &OpDispatchBuilder::VCVTPH2PSOp}, {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::VPERMDOp}, {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::PTestOp}, - {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::VBROADCASTOp<4>}, - {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::VBROADCASTOp<8>}, - {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::VBROADCASTOp<16>}, + {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 4>}, + {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 8>}, + {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 16>}, {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, 1>}, {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, 2>}, {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, 4>}, @@ -5311,12 +5311,12 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(2, 0b01, 0x46), 1, &OpDispatchBuilder::VPSRAVDOp}, {OPD(2, 0b01, 0x47), 1, &OpDispatchBuilder::VPSLLVOp}, - {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::VBROADCASTOp<4>}, - {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::VBROADCASTOp<8>}, - {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::VBROADCASTOp<16>}, + {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 4>}, + {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 8>}, + {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 16>}, - {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::VBROADCASTOp<1>}, - {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::VBROADCASTOp<2>}, + {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 1>}, + {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 2>}, {OPD(2, 0b01, 0x8C), 1, &OpDispatchBuilder::VPMASKMOVOp}, {OPD(2, 0b01, 0x8E), 1, &OpDispatchBuilder::VPMASKMOVOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 777fa2d69d..8c9e16cbd3 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -602,8 +602,7 @@ class OpDispatchBuilder final : public IREmitter { void VPBLENDDOp(OpcodeArgs); void VPBLENDWOp(OpcodeArgs); - template - void VBROADCASTOp(OpcodeArgs); + void VBROADCASTOp(OpcodeArgs, size_t ElementSize); template void VDPPOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 540599c8b5..4e75bc142f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -1482,8 +1482,7 @@ template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template -void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { +void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); Ref Result {}; @@ -1502,12 +1501,6 @@ void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VBROADCASTOp<1>(OpcodeArgs); -template void OpDispatchBuilder::VBROADCASTOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VBROADCASTOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VBROADCASTOp<8>(OpcodeArgs); -template void OpDispatchBuilder::VBROADCASTOp<16>(OpcodeArgs); - Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) { const auto Size = GetDstSize(Op); From 23a076c31368bdf9654f2f7821f400a529cef1bd Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 15:07:09 -0700 Subject: [PATCH 12/13] OpcodeDispatcher: Convert packed vector shifts to Bind handler --- .../Interface/Core/OpcodeDispatcher.cpp | 96 +++++++++---------- .../Source/Interface/Core/OpcodeDispatcher.h | 36 +++---- .../Core/OpcodeDispatcher/Vector.cpp | 80 +++------------- 3 files changed, 72 insertions(+), 140 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 6ba3cf8e0a..722a6a34e9 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5186,9 +5186,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<8>}, {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<4>}, - {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::VPSRLDOp<2>}, - {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::VPSRLDOp<4>}, - {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::VPSRLDOp<8>}, + {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, 2>}, + {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, 4>}, + {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, 8>}, {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, 8>}, {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, 2>}, {OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>}, @@ -5204,8 +5204,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xDF), 1, &OpDispatchBuilder::VANDNOp}, {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, 1>}, - {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::VPSRAOp<2>}, - {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::VPSRAOp<4>}, + {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, 2>}, + {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, 4>}, {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, 2>}, {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::VPMULHWOp}, {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::VPMULHWOp}, @@ -5226,9 +5226,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xEF), 1, &OpDispatchBuilder::AVXVectorXOROp}, {OPD(1, 0b11, 0xF0), 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, - {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::VPSLLOp<2>}, - {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::VPSLLOp<4>}, - {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::VPSLLOp<8>}, + {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, 2>}, + {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, 4>}, + {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, 8>}, {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::VPMULLOp<4, false>}, {OPD(1, 0b01, 0xF5), 1, &OpDispatchBuilder::VPMADDWDOp}, {OPD(1, 0b01, 0xF6), 1, &OpDispatchBuilder::VPSADBWOp}, @@ -5419,17 +5419,17 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { #define OPD(group, pp, opcode) (((group - X86Tables::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode)) static constexpr std::tuple VEXTableGroupOps[] { - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, &OpDispatchBuilder::VPSRLIOp<2>}, - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, &OpDispatchBuilder::VPSLLIOp<2>}, - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, &OpDispatchBuilder::VPSRAIOp<2>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, 2>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, 2>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, 2>}, - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, &OpDispatchBuilder::VPSRLIOp<4>}, - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, &OpDispatchBuilder::VPSLLIOp<4>}, - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, &OpDispatchBuilder::VPSRAIOp<4>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, 4>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, 4>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, 4>}, - {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, &OpDispatchBuilder::VPSRLIOp<8>}, + {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, 8>}, {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b011), 1, &OpDispatchBuilder::VPSRLDQOp}, - {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, &OpDispatchBuilder::VPSLLIOp<8>}, + {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, 8>}, {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b111), 1, &OpDispatchBuilder::VPSLLDQOp}, {OPD(X86Tables::TYPE_VEX_GROUP_15, 0, 0b010), 1, &OpDispatchBuilder::LDMXCSR}, @@ -5684,9 +5684,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xC2, 1, &OpDispatchBuilder::VFCMPOp<4>}, {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 4>}, - {0xD1, 1, &OpDispatchBuilder::PSRLDOp<2>}, - {0xD2, 1, &OpDispatchBuilder::PSRLDOp<4>}, - {0xD3, 1, &OpDispatchBuilder::PSRLDOp<8>}, + {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 2>}, + {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 4>}, + {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 8>}, {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 8>}, {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, 2>}, {0xD7, 1, &OpDispatchBuilder::MOVMSKOpOne}, // PMOVMSKB @@ -5699,8 +5699,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, 1>}, {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 1>}, - {0xE1, 1, &OpDispatchBuilder::PSRAOp<2>}, - {0xE2, 1, &OpDispatchBuilder::PSRAOp<4>}, + {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 2>}, + {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 4>}, {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 2>}, {0xE4, 1, &OpDispatchBuilder::PMULHW}, {0xE5, 1, &OpDispatchBuilder::PMULHW}, @@ -5714,9 +5714,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, 2>}, {0xEF, 1, &OpDispatchBuilder::VectorXOROp}, - {0xF1, 1, &OpDispatchBuilder::PSLL<2>}, - {0xF2, 1, &OpDispatchBuilder::PSLL<4>}, - {0xF3, 1, &OpDispatchBuilder::PSLL<8>}, + {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 2>}, + {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 4>}, + {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 8>}, {0xF4, 1, &OpDispatchBuilder::PMULLOp<4, false>}, {0xF5, 1, &OpDispatchBuilder::PMADDWD}, {0xF6, 1, &OpDispatchBuilder::PSADBW}, @@ -5983,9 +5983,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 8>}, {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<8>}, - {0xD1, 1, &OpDispatchBuilder::PSRLDOp<2>}, - {0xD2, 1, &OpDispatchBuilder::PSRLDOp<4>}, - {0xD3, 1, &OpDispatchBuilder::PSRLDOp<8>}, + {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 2>}, + {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 4>}, + {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 8>}, {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 8>}, {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, 2>}, {0xD6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>}, @@ -5999,8 +5999,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, 1>}, {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 1>}, - {0xE1, 1, &OpDispatchBuilder::PSRAOp<2>}, - {0xE2, 1, &OpDispatchBuilder::PSRAOp<4>}, + {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 2>}, + {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 4>}, {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 2>}, {0xE4, 1, &OpDispatchBuilder::PMULHW}, {0xE5, 1, &OpDispatchBuilder::PMULHW}, @@ -6015,9 +6015,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, 2>}, {0xEF, 1, &OpDispatchBuilder::VectorXOROp}, - {0xF1, 1, &OpDispatchBuilder::PSLL<2>}, - {0xF2, 1, &OpDispatchBuilder::PSLL<4>}, - {0xF3, 1, &OpDispatchBuilder::PSLL<8>}, + {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 2>}, + {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 4>}, + {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 8>}, {0xF4, 1, &OpDispatchBuilder::PMULLOp<4, false>}, {0xF5, 1, &OpDispatchBuilder::PMADDWD}, {0xF6, 1, &OpDispatchBuilder::PSADBW}, @@ -6094,30 +6094,30 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_F3, 7), 1, &OpDispatchBuilder::RDPIDOp}, // GROUP 12 - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 2), 1, &OpDispatchBuilder::PSRLI<2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 4), 1, &OpDispatchBuilder::PSRAIOp<2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 6), 1, &OpDispatchBuilder::PSLLI<2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 2), 1, &OpDispatchBuilder::PSRLI<2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 4), 1, &OpDispatchBuilder::PSRAIOp<2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 6), 1, &OpDispatchBuilder::PSLLI<2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 2>}, // GROUP 13 - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 2), 1, &OpDispatchBuilder::PSRLI<4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 4), 1, &OpDispatchBuilder::PSRAIOp<4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 6), 1, &OpDispatchBuilder::PSLLI<4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 2), 1, &OpDispatchBuilder::PSRLI<4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 4), 1, &OpDispatchBuilder::PSRAIOp<4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 6), 1, &OpDispatchBuilder::PSLLI<4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 4>}, // GROUP 14 - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 2), 1, &OpDispatchBuilder::PSRLI<8>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 6), 1, &OpDispatchBuilder::PSLLI<8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 8>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 2), 1, &OpDispatchBuilder::PSRLI<8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 8>}, {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 3), 1, &OpDispatchBuilder::PSRLDQ}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 6), 1, &OpDispatchBuilder::PSLLI<8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 8>}, {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 7), 1, &OpDispatchBuilder::PSLLDQ}, // GROUP 15 diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 8c9e16cbd3..c243fba5d8 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -469,20 +469,14 @@ class OpDispatchBuilder final : public IREmitter { void PSHUFWOp(OpcodeArgs, bool Low); void PSHUFW8ByteOp(OpcodeArgs); void PSHUFDOp(OpcodeArgs); - template - void PSRLDOp(OpcodeArgs); - template - void PSRLI(OpcodeArgs); - template - void PSLLI(OpcodeArgs); - template - void PSLL(OpcodeArgs); - template - void PSRAOp(OpcodeArgs); + void PSRLDOp(OpcodeArgs, size_t ElementSize); + void PSRLI(OpcodeArgs, size_t ElementSize); + void PSLLI(OpcodeArgs, size_t ElementSize); + void PSLL(OpcodeArgs, size_t ElementSize); + void PSRAOp(OpcodeArgs, size_t ElementSize); void PSRLDQ(OpcodeArgs); void PSLLDQ(OpcodeArgs); - template - void PSRAIOp(OpcodeArgs); + void PSRAIOp(OpcodeArgs, size_t ElementSize); void MOVDDUPOp(OpcodeArgs); template void CVTGPR_To_FPR(OpcodeArgs); @@ -688,32 +682,26 @@ class OpDispatchBuilder final : public IREmitter { void VPSHUFWOp(OpcodeArgs, size_t ElementSize, bool Low); - template - void VPSLLOp(OpcodeArgs); + void VPSLLOp(OpcodeArgs, size_t ElementSize); void VPSLLDQOp(OpcodeArgs); - template - void VPSLLIOp(OpcodeArgs); + void VPSLLIOp(OpcodeArgs, size_t ElementSize); void VPSLLVOp(OpcodeArgs); - template - void VPSRAOp(OpcodeArgs); + void VPSRAOp(OpcodeArgs, size_t ElementSize); - template - void VPSRAIOp(OpcodeArgs); + void VPSRAIOp(OpcodeArgs, size_t ElementSize); void VPSRAVDOp(OpcodeArgs); void VPSRLVOp(OpcodeArgs); - template - void VPSRLDOp(OpcodeArgs); + void VPSRLDOp(OpcodeArgs, size_t ElementSize); void VPSRLDQOp(OpcodeArgs); void VPUNPCKHOp(OpcodeArgs, size_t ElementSize); void VPUNPCKLOp(OpcodeArgs, size_t ElementSize); - template - void VPSRLIOp(OpcodeArgs); + void VPSRLIOp(OpcodeArgs, size_t ElementSize); void VSHUFOp(OpcodeArgs, size_t ElementSize); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 4e75bc142f..75f77a06fd 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -1706,8 +1706,7 @@ Ref OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref return _VUShrSWide(Size, ElementSize, Src, ShiftVec); } -template -void OpDispatchBuilder::PSRLDOp(OpcodeArgs) { +void OpDispatchBuilder::PSRLDOp(OpcodeArgs, size_t ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSRLDOpImpl(Op, ElementSize, Dest, Src); @@ -1715,12 +1714,7 @@ void OpDispatchBuilder::PSRLDOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::PSRLDOp<2>(OpcodeArgs); -template void OpDispatchBuilder::PSRLDOp<4>(OpcodeArgs); -template void OpDispatchBuilder::PSRLDOp<8>(OpcodeArgs); - -template -void OpDispatchBuilder::VPSRLDOp(OpcodeArgs) { +void OpDispatchBuilder::VPSRLDOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1734,12 +1728,7 @@ void OpDispatchBuilder::VPSRLDOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSRLDOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPSRLDOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPSRLDOp<8>(OpcodeArgs); - -template -void OpDispatchBuilder::PSRLI(OpcodeArgs) { +void OpDispatchBuilder::PSRLI(OpcodeArgs, size_t ElementSize) { const uint64_t ShiftConstant = Op->Src[1].Literal(); if (ShiftConstant == 0) [[unlikely]] { // Nothing to do, value is already in Dest. @@ -1753,12 +1742,7 @@ void OpDispatchBuilder::PSRLI(OpcodeArgs) { StoreResult(FPRClass, Op, Shift, -1); } -template void OpDispatchBuilder::PSRLI<2>(OpcodeArgs); -template void OpDispatchBuilder::PSRLI<4>(OpcodeArgs); -template void OpDispatchBuilder::PSRLI<8>(OpcodeArgs); - -template -void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) { +void OpDispatchBuilder::VPSRLIOp(OpcodeArgs, size_t ElementSize) { const auto Size = GetSrcSize(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; const uint64_t ShiftConstant = Op->Src[1].Literal(); @@ -1777,10 +1761,6 @@ void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSRLIOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPSRLIOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPSRLIOp<8>(OpcodeArgs); - Ref OpDispatchBuilder::PSLLIImpl(OpcodeArgs, size_t ElementSize, Ref Src, uint64_t Shift) { if (Shift == 0) [[unlikely]] { // If zero-shift then just return the source. @@ -1790,8 +1770,7 @@ Ref OpDispatchBuilder::PSLLIImpl(OpcodeArgs, size_t ElementSize, Ref Src, uint64 return _VShlI(Size, ElementSize, Src, Shift); } -template -void OpDispatchBuilder::PSLLI(OpcodeArgs) { +void OpDispatchBuilder::PSLLI(OpcodeArgs, size_t ElementSize) { const uint64_t ShiftConstant = Op->Src[1].Literal(); if (ShiftConstant == 0) [[unlikely]] { // Nothing to do, value is already in Dest. @@ -1804,12 +1783,7 @@ void OpDispatchBuilder::PSLLI(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::PSLLI<2>(OpcodeArgs); -template void OpDispatchBuilder::PSLLI<4>(OpcodeArgs); -template void OpDispatchBuilder::PSLLI<8>(OpcodeArgs); - -template -void OpDispatchBuilder::VPSLLIOp(OpcodeArgs) { +void OpDispatchBuilder::VPSLLIOp(OpcodeArgs, size_t ElementSize) { const uint64_t ShiftConstant = Op->Src[1].Literal(); const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1823,10 +1797,6 @@ void OpDispatchBuilder::VPSLLIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSLLIOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPSLLIOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPSLLIOp<8>(OpcodeArgs); - Ref OpDispatchBuilder::PSLLImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec) { const auto Size = GetDstSize(Op); @@ -1834,8 +1804,7 @@ Ref OpDispatchBuilder::PSLLImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref Shi return _VUShlSWide(Size, ElementSize, Src, ShiftVec); } -template -void OpDispatchBuilder::PSLL(OpcodeArgs) { +void OpDispatchBuilder::PSLL(OpcodeArgs, size_t ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSLLImpl(Op, ElementSize, Dest, Src); @@ -1843,12 +1812,7 @@ void OpDispatchBuilder::PSLL(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::PSLL<2>(OpcodeArgs); -template void OpDispatchBuilder::PSLL<4>(OpcodeArgs); -template void OpDispatchBuilder::PSLL<8>(OpcodeArgs); - -template -void OpDispatchBuilder::VPSLLOp(OpcodeArgs) { +void OpDispatchBuilder::VPSLLOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1862,10 +1826,6 @@ void OpDispatchBuilder::VPSLLOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSLLOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPSLLOp<4>(OpcodeArgs); -template void OpDispatchBuilder::VPSLLOp<8>(OpcodeArgs); - Ref OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec) { const auto Size = GetDstSize(Op); @@ -1873,8 +1833,7 @@ Ref OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref S return _VSShrSWide(Size, ElementSize, Src, ShiftVec); } -template -void OpDispatchBuilder::PSRAOp(OpcodeArgs) { +void OpDispatchBuilder::PSRAOp(OpcodeArgs, size_t ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSRAOpImpl(Op, ElementSize, Dest, Src); @@ -1882,11 +1841,7 @@ void OpDispatchBuilder::PSRAOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::PSRAOp<2>(OpcodeArgs); -template void OpDispatchBuilder::PSRAOp<4>(OpcodeArgs); - -template -void OpDispatchBuilder::VPSRAOp(OpcodeArgs) { +void OpDispatchBuilder::VPSRAOp(OpcodeArgs, size_t ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1900,9 +1855,6 @@ void OpDispatchBuilder::VPSRAOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSRAOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPSRAOp<4>(OpcodeArgs); - void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { const uint64_t Shift = Op->Src[1].Literal(); if (Shift == 0) [[unlikely]] { @@ -2004,8 +1956,7 @@ void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PSRAIOp(OpcodeArgs) { +void OpDispatchBuilder::PSRAIOp(OpcodeArgs, size_t ElementSize) { const uint64_t Shift = Op->Src[1].Literal(); if (Shift == 0) [[unlikely]] { // Nothing to do, value is already in Dest. @@ -2019,11 +1970,7 @@ void OpDispatchBuilder::PSRAIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::PSRAIOp<2>(OpcodeArgs); -template void OpDispatchBuilder::PSRAIOp<4>(OpcodeArgs); - -template -void OpDispatchBuilder::VPSRAIOp(OpcodeArgs) { +void OpDispatchBuilder::VPSRAIOp(OpcodeArgs, size_t ElementSize) { const uint64_t Shift = Op->Src[1].Literal(); const auto Size = GetDstSize(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; @@ -2042,9 +1989,6 @@ void OpDispatchBuilder::VPSRAIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VPSRAIOp<2>(OpcodeArgs); -template void OpDispatchBuilder::VPSRAIOp<4>(OpcodeArgs); - void OpDispatchBuilder::AVXVariableShiftImpl(OpcodeArgs, IROps IROp) { const auto DstSize = GetDstSize(Op); const auto SrcSize = GetSrcSize(Op); From 1d00ad6030d00075ae8a318ce12d149f765ff662 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 22 Aug 2024 15:09:44 -0700 Subject: [PATCH 13/13] OpcodeDispatcher: Convert VectorVariableBlend to Bind handler --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 6 +++--- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 3 +-- FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp | 6 +----- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 722a6a34e9..5a160c5007 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -6694,9 +6694,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(PF_38_66, 0x0A), 1, &OpDispatchBuilder::PSIGN<4>}, {OPD(PF_38_NONE, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, {OPD(PF_38_66, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, - {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::VectorVariableBlend<1>}, - {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::VectorVariableBlend<4>}, - {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::VectorVariableBlend<8>}, + {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, 1>}, + {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, 4>}, + {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, 8>}, {OPD(PF_38_66, 0x17), 1, &OpDispatchBuilder::PTestOp}, {OPD(PF_38_NONE, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 1>}, {OPD(PF_38_66, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 1>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index c243fba5d8..bebce9b9ea 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -916,8 +916,7 @@ class OpDispatchBuilder final : public IREmitter { template void VectorBlend(OpcodeArgs); - template - void VectorVariableBlend(OpcodeArgs); + void VectorVariableBlend(OpcodeArgs, size_t ElementSize); void PTestOpImpl(OpSize Size, Ref Dest, Ref Src); void PTestOp(OpcodeArgs); void PHMINPOSUWOp(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 75f77a06fd..14318975ab 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -3901,8 +3901,7 @@ template void OpDispatchBuilder::VectorBlend<2>(OpcodeArgs); template void OpDispatchBuilder::VectorBlend<4>(OpcodeArgs); template void OpDispatchBuilder::VectorBlend<8>(OpcodeArgs); -template -void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs) { +void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, size_t ElementSize) { auto Size = GetSrcSize(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); @@ -3921,9 +3920,6 @@ void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template void OpDispatchBuilder::VectorVariableBlend<1>(OpcodeArgs); -template void OpDispatchBuilder::VectorVariableBlend<4>(OpcodeArgs); -template void OpDispatchBuilder::VectorVariableBlend<8>(OpcodeArgs); void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, size_t ElementSize) { const auto SrcSize = GetSrcSize(Op);