Skip to content

Commit

Permalink
AVX128: Implement support for vcvt{ss2sd,sd2ss}
Browse files Browse the repository at this point in the history
  • Loading branch information
Sonicadvance1 committed Jun 18, 2024
1 parent b4c578b commit 94c22d5
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
3 changes: 3 additions & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,9 @@ class OpDispatchBuilder final : public IREmitter {
template<bool Signed>
void AVX128_VPMULHW(OpcodeArgs);

template<size_t DstElementSize, size_t SrcElementSize>
void AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs);

// End of AVX 128-bit implementation

void InvalidOp(OpcodeArgs);
Expand Down
21 changes: 19 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() {

// TODO: {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Float<8, 4>},
// TODO: {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Float<4, 8>},
// TODO: {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<8, 4>},
// TODO: {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<4, 8>},
{OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float<8, 4>},
{OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float<4, 8>},

// TODO: {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, false>},
// TODO: {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, true>},
Expand Down Expand Up @@ -1893,4 +1893,21 @@ void OpDispatchBuilder::AVX128_VPMULHW(OpcodeArgs) {
});
}

template<size_t DstElementSize, size_t SrcElementSize>
void OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs) {
// Gotta be careful with this operation.
// It inserts in to the lowest element, retaining the remainder of the lower 128-bits.
// Then zero extends the top 128-bit.
const auto SrcSize = GetSrcSize(Op);
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags, {.AllowUpperGarbage = true});

RefPair Result {};
Result.Low = _VFToFScalarInsert(OpSize::i128Bit, DstElementSize, SrcElementSize, Src1.Low, Src2, false);
Result.High = LoadZeroVector(OpSize::i128Bit);

AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}


} // namespace FEXCore::IR

0 comments on commit 94c22d5

Please sign in to comment.