Skip to content

Commit

Permalink
OpcodeDispatcher: optimize 8/16-bit adc
Browse files Browse the repository at this point in the history
Signed-off-by: Alyssa Rosenzweig <[email protected]>
  • Loading branch information
alyssarosenzweig committed Apr 30, 2024
1 parent 28fa88f commit 76b5ca4
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,16 +340,19 @@ OrderedNode* OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode*
HandleNZCV_RMW();
Res = _AdcWithFlags(OpSize, Src1, Src2);
} else {
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
Res = _Adc(OpSize, Src1, Src2);
Res = _Bfe(OpSize, SrcSize * 8, 0, Res);

// Need to zero-extend for correct comparisons below
Src2 = _Bfe(OpSize, SrcSize * 8, 0, Src2);

auto SelectOpLT = _Select(FEXCore::IR::COND_ULT, Res, Src2, One, Zero);
auto SelectOpLE = _Select(FEXCore::IR::COND_ULE, Res, Src2, One, Zero);
auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, One, SelectOpLE, SelectOpLT);
// Note that we do not extend Src2PlusCF, since we depend on proper
// 32-bit arithmetic to correctly handle the Src2 = 0xffff case.
OrderedNode* Src2PlusCF = _Adc(OpSize, _Constant(0), Src2);

// Need to zero-extend for the comparison.
Res = _Add(OpSize, Src1, Src2PlusCF);
Res = _Bfe(OpSize, SrcSize * 8, 0, Res);

// TODO: We can fold that second Bfe in (cmp uxth).
auto SelectCF = _Select(FEXCore::IR::COND_ULT, Res, Src2PlusCF, One, Zero);

SetNZ_ZeroCV(SrcSize, Res);
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(SelectCF);
Expand Down Expand Up @@ -378,16 +381,17 @@ OrderedNode* OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode*
// Rectify output carry
CarryInvert();
} else {
// Zero extend for correct comparison behaviour with Src1 = 0xffff.
Src1 = _Bfe(OpSize, SrcSize * 8, 0, Src1);

auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
Res = _Sub(OpSize, Src1, _Add(OpSize, Src2, CF));
auto Src1MinusCF = _Sub(OpSize, Src1, CF);

Res = _Sub(OpSize, Src1MinusCF, Src2);
Res = _Bfe(OpSize, SrcSize * 8, 0, Res);

// Need to zero-extend for correct comparisons below
Src1 = _Bfe(OpSize, SrcSize * 8, 0, Src1);

auto SelectOpLT = _Select(FEXCore::IR::COND_UGT, Res, Src1, One, Zero);
auto SelectOpLE = _Select(FEXCore::IR::COND_UGE, Res, Src1, One, Zero);
auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, One, SelectOpLE, SelectOpLT);
auto SelectCF = _Select(FEXCore::IR::COND_ULT, Src1MinusCF, Res, One, Zero);

SetNZ_ZeroCV(SrcSize, Res);
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(SelectCF);
Expand Down

0 comments on commit 76b5ca4

Please sign in to comment.