Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Working PR for AVX128 #3720

Closed
wants to merge 87 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
87 commits
Select commit Hold shift + click to select a range
9a4611c
AVX128: Implement support for v{u,}comis{s,d}
Sonicadvance1 Jun 18, 2024
839a1ec
AVX128: Implement support for v{add,sub,mul,fmin,fmax,fdiv,sqrt,rsqrt…
Sonicadvance1 Jun 18, 2024
d26e30b
AVX128: Implement support for vcmpp{s,d}
Sonicadvance1 Jun 18, 2024
1301620
AVX128: Implement support for vcmps{s,d}
Sonicadvance1 Jun 18, 2024
bc5ab9b
AVX128: Implement vmov{d,q}
Sonicadvance1 Jun 18, 2024
907cb41
AVX128: Implement support for vpextr{b,w,d,q}
Sonicadvance1 Jun 18, 2024
e92afae
AVX128: Implement support for vpmov{s,z}{b,w,d}{w,d,q}
Sonicadvance1 Jun 18, 2024
64ef843
AVX128: Implement support for vmovmskp{s,d}
Sonicadvance1 Jun 18, 2024
c17a7f6
AVX128: Implement support for vpmovmskb
Sonicadvance1 Jun 18, 2024
a9c2727
AVX128: Implement support for vpinsr{b,w,d,q}
Sonicadvance1 Jun 18, 2024
934d7e0
AVX128: Implements the various vector shift instructions
Sonicadvance1 Jun 18, 2024
c5e46e7
AVX128: Implement support for vinsert{f128,i128}
Sonicadvance1 Jun 18, 2024
d62345b
AVX128: Implement support for vinsertps
Sonicadvance1 Jun 18, 2024
d8f21e7
AVX128: Implement support for vphsub{w,d}
Sonicadvance1 Jun 18, 2024
d23bbe5
AVX128: Implement support for vpsubsw
Sonicadvance1 Jun 18, 2024
6cff4ad
AVX128: Implement support for vaddsubp{s,d}
Sonicadvance1 Jun 18, 2024
d8c74fc
AVX128: Implement support for vpmul{u,}dq
Sonicadvance1 Jun 18, 2024
a56b925
AVX128: Implements support for vpmulhrsw
Sonicadvance1 Jun 18, 2024
35cfdae
AVX128: Implement support for vpmulh{u,}w
Sonicadvance1 Jun 18, 2024
9342cd2
AVX128: Implement support for vcvt{ss2sd,sd2ss}
Sonicadvance1 Jun 18, 2024
3e7fa2f
JIT: Implement missing Vector_FToF2
Sonicadvance1 Jun 19, 2024
04dc713
AVX128: Implement support for vcvt{pd2ps,ps2pd}
Sonicadvance1 Jun 19, 2024
2d81dc8
AVX128: Implement support for cvt{t,}pd2pq
Sonicadvance1 Jun 19, 2024
7339c79
AVX128: Implement support for cvtdq2{ps,pd}
Sonicadvance1 Jun 19, 2024
6f757e0
AVX128: Implement support for vextract{i,f}128
Sonicadvance1 Jun 19, 2024
c8fc403
AVX128: Implement support for a trinary operation with a passed in ve…
Sonicadvance1 Jun 19, 2024
f017616
AVX128: Implements support for vector AES instructions
Sonicadvance1 Jun 19, 2024
7392b8e
AVX128: Implements support for AVX string ops
Sonicadvance1 Jun 19, 2024
21a1487
AVX128: Implement support for vphminposuw
Sonicadvance1 Jun 19, 2024
63bb9cf
AVX128: Implement support for vround{ps,pd}
Sonicadvance1 Jun 19, 2024
27ffa0f
AVX128: Implement support for round{ss,sd}
Sonicadvance1 Jun 19, 2024
bb2f57e
VectorOps: Restructure DPPOpImpl. This will get reused by AVX128
Sonicadvance1 Jun 19, 2024
9f5729d
AVX128: Implement support for vdd{ps,pd}
Sonicadvance1 Jun 19, 2024
47f1b07
AVX128: Implement support for vperm{q,pd}
Sonicadvance1 Jun 19, 2024
a48b50f
AVX128: Implement support for vpshuf{lw,hw,d}
Sonicadvance1 Jun 19, 2024
4e200e4
AVX128: Implement support for vshuf{ps,pd}
Sonicadvance1 Jun 19, 2024
6805943
AVX128: Implement support for imm vpermil{ps,pd}
Sonicadvance1 Jun 19, 2024
0d3b1f7
AVX128: Implement support for vhaddpd/vphadd{w,d}
Sonicadvance1 Jun 19, 2024
ff7735a
AVX128: Implement support for vphaddsw
Sonicadvance1 Jun 19, 2024
2028402
AVX128: Implement support for vpmaddubsw
Sonicadvance1 Jun 19, 2024
ff7da5f
AVX128: Implement support for vpmaddwd
Sonicadvance1 Jun 19, 2024
ec9b542
AVX128: Implement support for vpblendw/vpblendd/vblendps/vblendpd
Sonicadvance1 Jun 19, 2024
a4eb341
AVX128: Implement support for hsub{ps,pd}
Sonicadvance1 Jun 19, 2024
40ea283
AVX128: Implement support for vpshufb
Sonicadvance1 Jun 19, 2024
c21fa16
AVX128: Implement support for vpsadbw
Sonicadvance1 Jun 19, 2024
808e131
AVX128: Implement support for vmpsadbw
Sonicadvance1 Jun 19, 2024
f7f55dc
AVX128: Implement support for vpalignr
Sonicadvance1 Jun 19, 2024
79a25dd
AVX128: Implement vmaskmov{ps,pd}, vpmaskmov{d,q} using SVE2 gather l…
Sonicadvance1 Jun 19, 2024
a519c93
AVX128: Implement support for vmaskmovdqu
Sonicadvance1 Jun 19, 2024
803013c
AVX128: Implement support for vblend{ps,pd}/vpblendvb
Sonicadvance1 Jun 19, 2024
4b58f24
FEXCore: Implement support for fetching/setting YMM registers
Sonicadvance1 Jun 20, 2024
7edfe37
SignalDelegator: Use new YMM register reconstruction helpers
Sonicadvance1 Jun 20, 2024
929c1ce
TestHarnessRunner: Reconverge YMM registers if AVX is supported
Sonicadvance1 Jun 20, 2024
e76db7c
AVX128: Implement support for xsave/xrstor
Sonicadvance1 Jun 20, 2024
ede22a0
AVX128: Reenable {ldm,stm}mxcsr. Can use the regular implementation.
Sonicadvance1 Jun 20, 2024
65ef194
AVX128: Implement support for vperm2{f128,i128}
Sonicadvance1 Jun 20, 2024
77940ba
HostFeatures: Always disable AVX in 32-bit mode to protect from stack…
Sonicadvance1 Jun 20, 2024
d399e5e
AVX128: Implement support for vtest{ps,pd}
Sonicadvance1 Jun 20, 2024
365b9f3
AVX128: Implement support for vptest
Sonicadvance1 Jun 20, 2024
8f2f77b
Arm64: Revert #2865 optimization
Sonicadvance1 Jun 20, 2024
eaed0d6
AVX128: Implement support for variable vpermil{ps,pd}
Sonicadvance1 Jun 20, 2024
823f09d
AVX128: Implement support for vperm{d,ps}!
Sonicadvance1 Jun 20, 2024
6b2e7ad
AVX128: Implement support for VPCLMULQDQ
Sonicadvance1 Jun 21, 2024
60ad298
unittests: Adds support for 256-bit vpclmulqdq
Sonicadvance1 Jun 21, 2024
545915d
CPUID: Expose support for VPCLMULQDQ
Sonicadvance1 Jun 21, 2024
85f6978
Frontend: Expose AVX W flag
Sonicadvance1 Jun 21, 2024
487c9c6
Arm64: Implement VLoadVectorGatherMasked operation
Sonicadvance1 Jun 22, 2024
3fce072
X86Tables: Describe VPGather in the VEX tables
Sonicadvance1 Jun 22, 2024
3eb6d12
AVX128: Implement support for gather load instructions
Sonicadvance1 Jun 22, 2024
615e4ae
AVX128: Fix SPDX license. Which commit messed this up?
Sonicadvance1 Jun 22, 2024
77b6040
OpcodeDispatcher: Implement AVX gathers with SVE256
Sonicadvance1 Jun 22, 2024
1818908
InstcountCI: Update for SVE256 gathers!
Sonicadvance1 Jun 22, 2024
13c2338
IR: Adds support for new SUBADD FMA constants
Sonicadvance1 Jun 23, 2024
752aee9
X86Tables: Describe FMA3 instructions
Sonicadvance1 Jun 23, 2024
4cda611
ARM64: Adds new FMA vector instructions
Sonicadvance1 Jun 23, 2024
b011699
AVX128: Implement FMA3 instructions
Sonicadvance1 Jun 23, 2024
71901d3
SVE258: Implement support for FMA3
Sonicadvance1 Jun 23, 2024
18510cb
unittests: Convert HostFeatures from AVX2 to AVX
Sonicadvance1 Jun 23, 2024
d1995e7
unittests: Adds FMA3 unittests
Sonicadvance1 Jun 23, 2024
47af41f
HostFeatures: Removes distinction between AVX and AVX2
Sonicadvance1 Jun 23, 2024
b679996
HostFeatures: Allow enabling AVX without SVE256
Sonicadvance1 Jun 23, 2024
17a6a24
CPUID: Enable support for FMA3 when AVX is enabled
Sonicadvance1 Jun 23, 2024
6b1f4a9
CPUID: Enable support for AVX2 when AVX is enabled
Sonicadvance1 Jun 23, 2024
ab1814e
CPUID: Implement support for XCR0 when AVX is enabled
Sonicadvance1 Jun 23, 2024
58a8b81
InstcountCI: Update for SVE256 FMA implementation
Sonicadvance1 Jun 23, 2024
14f8b4d
InstcountCI: Support AVX flag
Sonicadvance1 Jun 24, 2024
fd10436
InstCountCI: Adds AVX128 tests
Sonicadvance1 Jun 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions FEXCore/Source/Interface/Config/Config.json.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
"DISABLESVE": "disablesve",
"ENABLEAVX": "enableavx",
"DISABLEAVX": "disableavx",
"ENABLEAVX2": "enableavx2",
"DISABLEAVX2": "disableavx2",
"ENABLEAFP": "enableafp",
"DISABLEAFP": "disableafp",
"ENABLELRCPC": "enablelrcpc",
Expand Down Expand Up @@ -86,7 +84,6 @@
"\toff: Default CPU features queried from CPU features",
"\t{enable,disable}sve: Will force enable or disable sve even if the host doesn't support it",
"\t{enable,disable}avx: Will force enable or disable avx even if the host doesn't support it",
"\t{enable,disable}avx2: Will force enable or disable avx2 even if the host doesn't support it",
"\t{enable,disable}afp: Will force enable or disable afp even if the host doesn't support it",
"\t{enable,disable}lrcpc: Will force enable or disable lrcpc even if the host doesn't support it",
"\t{enable,disable}lrcpc2: Will force enable or disable lrcpc2 even if the host doesn't support it",
Expand Down
5 changes: 5 additions & 0 deletions FEXCore/Source/Interface/Context/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ class ContextImpl final : public FEXCore::Context::Context {
uint32_t ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, bool WasInJIT, uint64_t* HostGPRs, uint64_t PSTATE) override;
void SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, uint32_t EFLAGS) override;

Context::XMMRecoveryMode ReconstructXMMRegisters(const FEXCore::Core::InternalThreadState* Thread, __uint128_t* XMM_Low,
__uint128_t* YMM_High, Context::XMMRecoveryMode Mode) override;
void SetXMMRegistersFromState(FEXCore::Core::InternalThreadState* Thread, const __uint128_t* XMM_Low, const __uint128_t* YMM_High,
Context::XMMRecoveryMode Mode) override;

/**
* @brief Used to create FEX thread objects in preparation for creating a true OS thread. Does set a TID or PID.
*
Expand Down
4 changes: 4 additions & 0 deletions FEXCore/Source/Interface/Core/CPUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ namespace CPU {
{0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT_UPPER
{0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT
{0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT_UPPER
{0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPS_INVERT
{0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPS_INVERT_UPPER
{0x0000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPD_INVERT
{0x0000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPD_INVERT_UPPER
{0x0000'0001'0000'0000ULL, 0x0000'0003'0000'0002ULL}, // NAMED_VECTOR_MOVMSKPS_SHIFT
{0x040B'0E01'0B0E'0104ULL, 0x0C03'0609'0306'090CULL}, // NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE
{0x0706'0504'FFFF'FFFFULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0110B
Expand Down
10 changes: 5 additions & 5 deletions FEXCore/Source/Interface/Core/CPUID.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,7 @@ void CPUIDEmu::SetupHostHybridFlag() {}


void CPUIDEmu::SetupFeatures() {
// TODO: Enable once AVX is supported.
if (false && CTX->HostFeatures.SupportsAVX) {
if (CTX->HostFeatures.SupportsAVX) {
XCR0 |= XCR0_AVX;
}

Expand Down Expand Up @@ -417,7 +416,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_01h(uint32_t Leaf) const {
(1 << 9) | // SSSE3
(0 << 10) | // L1 context ID
(0 << 11) | // Silicon debug
(0 << 12) | // FMA3
(SupportsAVX() << 12) | // FMA3
(1 << 13) | // CMPXCHG16B
(0 << 14) | // xTPR update control
(0 << 15) | // Perfmon and debug capability
Expand Down Expand Up @@ -601,6 +600,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const {
// This is due to LRCPC performance on Cortex being abysmal.
// Only enable EnhancedREPMOVS if SoftwareTSO isn't required OR if MemcpySetTSO is not enabled.
const uint32_t SupportsEnhancedREPMOVS = CTX->SoftwareTSORequired() == false || MemcpySetTSOEnabled() == false;
const uint32_t SupportsVPCLMULQDQ = CTX->HostFeatures.SupportsPMULL_128Bit && SupportsAVX();

// Number of subfunctions
Res.eax = 0x0;
Expand All @@ -609,7 +609,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const {
(0 << 2) | // SGX
(SupportsAVX() << 3) | // BMI1
(0 << 4) | // Intel Hardware Lock Elison
(0 << 5) | // AVX2 support
(SupportsAVX() << 5) | // AVX2 support
(1 << 6) | // FPU data pointer updated only on exception
(1 << 7) | // SMEP support
(SupportsAVX() << 8) | // BMI2
Expand Down Expand Up @@ -647,7 +647,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const {
(0 << 7) | // CET shadow stack
(0 << 8) | // GFNI
(CTX->HostFeatures.SupportsAES256 << 9) | // VAES
(0 << 10) | // VPCLMULQDQ
(SupportsVPCLMULQDQ << 10) | // VPCLMULQDQ
(0 << 11) | // AVX512_VNNI
(0 << 12) | // AVX512_BITALG
(0 << 13) | // Intel Total Memory Encryption
Expand Down
57 changes: 57 additions & 0 deletions FEXCore/Source/Interface/Core/Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,63 @@ uint32_t ContextImpl::ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadSt
return EFLAGS;
}

Context::XMMRecoveryMode ContextImpl::ReconstructXMMRegisters(const FEXCore::Core::InternalThreadState* Thread, __uint128_t* XMM_Low,
__uint128_t* YMM_High, Context::XMMRecoveryMode Mode) {
const bool SupportsAVX = HostFeatures.SupportsAVX;
const size_t MaximumRegisters = Config.Is64BitMode ? FEXCore::Core::CPUState::NUM_XMMS : 8;

if (Mode == Context::XMMRecoveryMode::YMM && SupportsAVX) {
const bool SupportsConvergedRegisters = HostFeatures.SupportsSVE256;

if (SupportsConvergedRegisters) {
///< Output wants to de-interleave
for (size_t i = 0; i < MaximumRegisters; ++i) {
memcpy(&XMM_Low[i], &Thread->CurrentFrame->State.xmm.avx.data[i][0], sizeof(__uint128_t));
memcpy(&YMM_High[i], &Thread->CurrentFrame->State.xmm.avx.data[i][2], sizeof(__uint128_t));
}
} else {
///< Matches what FEX wants with non-converged registers
for (size_t i = 0; i < MaximumRegisters; ++i) {
memcpy(&XMM_Low[i], &Thread->CurrentFrame->State.xmm.sse.data[i][0], sizeof(__uint128_t));
memcpy(&YMM_High[i], &Thread->CurrentFrame->State.avx_high[i][0], sizeof(__uint128_t));
}
}

return Mode;
}

// Only support SSE, no AVX here, even if requested.
memcpy(XMM_Low, Thread->CurrentFrame->State.xmm.sse.data, MaximumRegisters * sizeof(__uint128_t));
return Context::XMMRecoveryMode::XMM;
}

void ContextImpl::SetXMMRegistersFromState(FEXCore::Core::InternalThreadState* Thread, const __uint128_t* XMM_Low,
const __uint128_t* YMM_High, Context::XMMRecoveryMode Mode) {
const bool SupportsAVX = HostFeatures.SupportsAVX;
const size_t MaximumRegisters = Config.Is64BitMode ? FEXCore::Core::CPUState::NUM_XMMS : 8;
if (Mode == Context::XMMRecoveryMode::YMM && SupportsAVX) {
const bool SupportsConvergedRegisters = HostFeatures.SupportsSVE256;

if (SupportsConvergedRegisters) {
///< Output wants to de-interleave
for (size_t i = 0; i < MaximumRegisters; ++i) {
memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][0], &XMM_Low[i], sizeof(__uint128_t));
memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][2], &YMM_High[i], sizeof(__uint128_t));
}
} else {
///< Matches what FEX wants with non-converged registers
for (size_t i = 0; i < MaximumRegisters; ++i) {
memcpy(&Thread->CurrentFrame->State.xmm.sse.data[i][0], &XMM_Low[i], sizeof(__uint128_t));
memcpy(&Thread->CurrentFrame->State.avx_high[i][0], &YMM_High[i], sizeof(__uint128_t));
}
}
return;
}

// Only support SSE, no AVX here, even if requested.
memcpy(Thread->CurrentFrame->State.xmm.sse.data, XMM_Low, MaximumRegisters * sizeof(__uint128_t));
}

void ContextImpl::SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, uint32_t EFLAGS) {
const auto Frame = Thread->CurrentFrame;
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) {
Expand Down
3 changes: 3 additions & 0 deletions FEXCore/Source/Interface/Core/Frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,9 @@ bool Decoder::NormalOpHeader(const FEXCore::X86Tables::X86InstInfo* Info, uint16
if (CTX->Config.Is64BitMode && (Byte1 & 0b00100000) == 0) {
DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_B;
}
if (options.w) {
DecodeInst->Flags |= DecodeFlags::FLAG_OPTION_AVX_W;
}
if (!(map_select >= 1 && map_select <= 3)) {
LogMan::Msg::EFmt("We don't understand a map_select of: {}", map_select);
return false;
Expand Down
12 changes: 5 additions & 7 deletions FEXCore/Source/Interface/Core/HostFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ static void OverrideFeatures(HostFeatures* Features) {
LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive");

ENABLE_DISABLE_OPTION(SupportsAVX, AVX, AVX);
ENABLE_DISABLE_OPTION(SupportsAVX2, AVX2, AVX2);
ENABLE_DISABLE_OPTION(SupportsSVE128, SVE, SVE);
ENABLE_DISABLE_OPTION(SupportsAFP, AFP, AFP);
ENABLE_DISABLE_OPTION(SupportsRCPC, LRCPC, LRCPC);
Expand Down Expand Up @@ -119,9 +118,6 @@ static void OverrideFeatures(HostFeatures* Features) {

///< Only force enable SVE256 if SVE is already enabled and ForceSVEWidth is set to >= 256.
Features->SupportsSVE256 = ForceSVEWidth() && ForceSVEWidth() >= 256;
if (!Features->SupportsSVE256) {
Features->SupportsAVX = false;
}
}

HostFeatures::HostFeatures() {
Expand Down Expand Up @@ -169,8 +165,6 @@ HostFeatures::HostFeatures() {

SupportsAES256 = SupportsAVX && SupportsAES;

// TODO: AVX2 is currently unsupported. Disable until the remaining features are implemented.
SupportsAVX2 = false;
SupportsBMI1 = true;
SupportsBMI2 = true;
SupportsCLWB = true;
Expand Down Expand Up @@ -257,7 +251,6 @@ HostFeatures::HostFeatures() {
Supports3DNow = X86Features.has(Xbyak::util::Cpu::t3DN) && X86Features.has(Xbyak::util::Cpu::tE3DN);
SupportsSSE4A = X86Features.has(Xbyak::util::Cpu::tSSE4a);
SupportsAVX = true;
SupportsAVX2 = true;
SupportsSHA = X86Features.has(Xbyak::util::Cpu::tSHA);
SupportsBMI1 = X86Features.has(Xbyak::util::Cpu::tBMI1);
SupportsBMI2 = X86Features.has(Xbyak::util::Cpu::tBMI2);
Expand All @@ -282,5 +275,10 @@ HostFeatures::HostFeatures() {
#endif
SupportsPreserveAllABI = FEXCORE_HAS_PRESERVE_ALL_ATTR;
OverrideFeatures(this);
FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
if (!Is64BitMode()) {
///< Always disable AVX in 32-bit mode.
SupportsAVX = false;
}
}
} // namespace FEXCore
25 changes: 25 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,31 @@ DEF_OP(Vector_FToF) {
}
}

DEF_OP(Vector_FToF2) {
const auto Op = IROp->C<IR::IROp_Vector_FToF2>();

const auto ElementSize = Op->Header.ElementSize;
const auto SubEmitSize = ConvertSubRegSize248(IROp);
const auto Conv = (ElementSize << 8) | Op->SrcElementSize;

const auto Dst = GetVReg(Node);
const auto Vector = GetVReg(Op->Vector.ID());

switch (Conv) {
case 0x0402: // Float <- Half
case 0x0804: { // Double <- Float
fcvtl2(SubEmitSize, Dst.D(), Vector.D());
break;
}
case 0x0204: // Half <- Float
case 0x0408: { // Float <- Double
fcvtn2(SubEmitSize, Dst.D(), Vector.D());
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Vector_FToF2 Type : 0x{:04x}", Conv); break;
}
}

DEF_OP(Vector_FToI) {
const auto Op = IROp->C<IR::IROp_Vector_FToI>();
const auto OpSize = IROp->Size;
Expand Down
Loading