Skip to content

Commit

Permalink
x87 refactor slowpath wip
Browse files Browse the repository at this point in the history
  • Loading branch information
pmatos committed Apr 24, 2024
1 parent 7ebba82 commit 4419e74
Show file tree
Hide file tree
Showing 5 changed files with 482 additions and 353 deletions.
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ class OpDispatchBuilder final : public IREmitter {
void VZEROOp(OpcodeArgs);

// X87 Ops
OrderedNode* ReconstructFSW();
OrderedNode* ReconstructFSW(OrderedNode* T = nullptr);
// Returns new x87 stack top from FSW.
OrderedNode* ReconstructX87StateFromFSW(OrderedNode* FSW);
template<size_t width>
Expand Down
22 changes: 0 additions & 22 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,24 +89,6 @@ void OpDispatchBuilder::SetX87Top(OrderedNode* Value) {
_StoreContext(1, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC);
}

OrderedNode* OpDispatchBuilder::ReconstructFSW() {
// We must construct the FSW from our various bits
OrderedNode* FSW = _Constant(0);
auto Top = GetX87Top();
FSW = _Bfi(OpSize::i64Bit, 3, 11, FSW, Top);

auto C0 = GetRFLAG(FEXCore::X86State::X87FLAG_C0_LOC);
auto C1 = GetRFLAG(FEXCore::X86State::X87FLAG_C1_LOC);
auto C2 = GetRFLAG(FEXCore::X86State::X87FLAG_C2_LOC);
auto C3 = GetRFLAG(FEXCore::X86State::X87FLAG_C3_LOC);

FSW = _Orlshl(OpSize::i64Bit, FSW, C0, 8);
FSW = _Orlshl(OpSize::i64Bit, FSW, C1, 9);
FSW = _Orlshl(OpSize::i64Bit, FSW, C2, 10);
FSW = _Orlshl(OpSize::i64Bit, FSW, C3, 14);
return FSW;
}

OrderedNode* OpDispatchBuilder::ReconstructX87StateFromFSW(OrderedNode* FSW) {
auto Top = _Bfe(OpSize::i32Bit, 3, 11, FSW);
SetX87Top(Top);
Expand Down Expand Up @@ -343,10 +325,6 @@ void OpDispatchBuilder::X87LDSW(OpcodeArgs) {
ReconstructX87StateFromFSW(NewFSW);
}

void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) {
StoreResult(GPRClass, Op, ReconstructFSW(), -1);
}

void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) {
// 14 bytes for 16bit
// 2 Bytes : FCW
Expand Down
42 changes: 37 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87New.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,13 +541,10 @@ void OpDispatchBuilder::F80SCALE(OpcodeArgs) {
template<bool Inc>
void OpDispatchBuilder::X87ModifySTP(OpcodeArgs) {
CurrentHeader->HasX87 = true;
auto orig_top = GetX87Top();
if (Inc) {
auto top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7));
SetX87Top(top);
_IncStackTop();
} else {
auto top = _And(OpSize::i32Bit, _Sub(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7));
SetX87Top(top);
_DecStackTop();
}
}

Expand Down Expand Up @@ -582,4 +579,39 @@ void OpDispatchBuilder::F80F2XM1(OpcodeArgs) {
_F80F2XM1Stack();
}

// Operations dealing with loading and storing environment pieces


// Reconstruct as a constant the Status Word of the FPU.
// We only track stack top and each of the code conditions (C flags)
// Top is 3 bits at bit 11.
// C0 is 1 bit at bit 8.
// C1 is 1 bit at bit 9.
// C2 is 1 bit at bit 10.
// C3 is 1 bit at bit 14.
// Optionally we can pass a pre calculated value for Top, otherwise we calculate it
// during the function runtime.
OrderedNode* OpDispatchBuilder::ReconstructFSW(OrderedNode* T) {
// We must construct the FSW from our various bits
OrderedNode* FSW = _Constant(0);
auto* Top = T ? T : GetX87Top();
FSW = _Bfi(OpSize::i64Bit, 3, 11, FSW, Top);

auto C0 = GetRFLAG(FEXCore::X86State::X87FLAG_C0_LOC);
auto C1 = GetRFLAG(FEXCore::X86State::X87FLAG_C1_LOC);
auto C2 = GetRFLAG(FEXCore::X86State::X87FLAG_C2_LOC);
auto C3 = GetRFLAG(FEXCore::X86State::X87FLAG_C3_LOC);

FSW = _Orlshl(OpSize::i64Bit, FSW, C0, 8);
FSW = _Orlshl(OpSize::i64Bit, FSW, C1, 9);
FSW = _Orlshl(OpSize::i64Bit, FSW, C2, 10);
FSW = _Orlshl(OpSize::i64Bit, FSW, C3, 14);
return FSW;
}

void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) {
CurrentHeader->HasX87 = true;
StoreResult(GPRClass, Op, ReconstructFSW(_SyncStack()), -1);
}

} // namespace FEXCore::IR
63 changes: 38 additions & 25 deletions FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -2368,6 +2368,29 @@
}
},
"F80": {
"GPR = SyncStack": {
"Desc": [
"Synchronizes the stack environment with the MMX registers.",
"Returns the current stack top."
],
"DestSize": "8",
"JITDispatch": false,
"HasSideEffects": true
},
"IncStackTop": {
"Desc": [
"Increase stack top-pointer."
],
"JITDispatch": false,
"HasSideEffects": true
},
"DecStackTop": {
"Desc": [
"Decrease stack top-pointer."
],
"JITDispatch": false,
"HasSideEffects": true
},
"FPR = PushStack FPR:$X80Src, OpSize:$OpSize, i1:$Float, u8:$LoadSize": {
"Desc": [
"Pushes the provided source on to the x87 stack.",
Expand Down Expand Up @@ -2415,51 +2438,46 @@
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80AddStack u8:$SrcStack1, u8:$SrcStack2": {
"F80AddStack u8:$SrcStack1, u8:$SrcStack2": {
"Desc": [
"Adds two stack locations together, storing the result in to the first stack location"
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80AddValue u8:$SrcStack, FPR:$X80Src": {
"F80AddValue u8:$SrcStack, FPR:$X80Src": {
"Desc": [
"Adds a operand value to a stack location. The result stored in to the stack location provided."
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80Add FPR:$X80Src1, FPR:$X80Src2": {
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80SubStack u8:$DstStack, u8:$SrcStack1, u8:$SrcStack2": {
"F80SubStack u8:$DstStack, u8:$SrcStack1, u8:$SrcStack2": {
"Desc": [
"Subtracts the value in stack location TOP+$SrcStack2 from the value in stack location TOP+$SrcStack1.",
"The result is stored in stack location TOP+$DstStack and returned."
"The result is stored in stack location TOP+$DstStack."
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80SubValue u8:$SrcStack, FPR:$X80Src": {
"F80SubValue u8:$SrcStack, FPR:$X80Src": {
"Desc": [
"Subtracts the value $X80Src from the value in stack location TOP+$SrcStack.",
"The result is stored in stack location TOP and returned."
"The result is stored in stack location TOP."
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80SubRValue FPR:$X80Src, u8:$SrcStack": {
"F80SubRValue FPR:$X80Src, u8:$SrcStack": {
"Desc": [
"Subtracts the value in stack location TOP+$SrcStack from the value $X80Src.",
"The result is stored in stack location TOP and returned."
"The result is stored in stack location TOP."
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80Sub FPR:$X80Src1, FPR:$X80Src2": {
Expand All @@ -2471,54 +2489,49 @@
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80MulStack u8:$SrcStack1, u8:$SrcStack2": {
"F80MulStack u8:$SrcStack1, u8:$SrcStack2": {
"Desc": [
"Multiplies two stack locations together, storing the result in to the first stack location"
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80MulValue u8:$SrcStack, FPR:$X80Src": {
"F80MulValue u8:$SrcStack, FPR:$X80Src": {
"Desc": [
"Multiplies a operand value to a stack location. The result stored in to the stack location provided."
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80Mul FPR:$X80Src1, FPR:$X80Src2": {
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80DivStack u8:$DstStack, u8:$SrcStack1, u8:$SrcStack2": {
"F80DivStack u8:$DstStack, u8:$SrcStack1, u8:$SrcStack2": {
"Desc": [
"Divides the value in stack location TOP+$SrcStack1 by the value in stack location TOP+$SrcStack2.",
"The result is stored in stack location TOP+$DstStack and returned.",
"The result is stored in stack location TOP+$DstStack.",
"`FPR|Stack[TOP+DstStack] = Stack[TOP+SrcStack1] / Stack[TOP+SrcStack2]`"
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80DivValue u8:$SrcStack, FPR:$X80Src": {
"F80DivValue u8:$SrcStack, FPR:$X80Src": {
"Desc": [
"Divides the value in stack location TOP+$SrcStack by the value $X80Src.",
"The result is stored in stack location TOP and returned.",
"`FPR|Stack[TOP] = Stack[TOP+SrcStack] / X80Src`"
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80DivRValue FPR:$X80Src, u8:$SrcStack": {
"F80DivRValue FPR:$X80Src, u8:$SrcStack": {
"Desc": [
"Divides the value X80Src by the value in stack location TOP+$SrcStack.",
"The result is stored in stack location TOP and returned.",
"The result is stored in stack location TOP.",
"`FPR|Stack[TOP] = X80Src / Stack[TOP+SrcStack]`"
],
"HasSideEffects": true,
"DestSize": "16",
"JITDispatch": false
},
"FPR = F80Div FPR:$X80Src1, FPR:$X80Src2": {
Expand Down
Loading

0 comments on commit 4419e74

Please sign in to comment.