Skip to content

Commit

Permalink
improve tag store when it's a single tag
Browse files Browse the repository at this point in the history
  • Loading branch information
pmatos committed May 13, 2024
1 parent 6d246ae commit 51cd94d
Show file tree
Hide file tree
Showing 6 changed files with 331 additions and 386 deletions.
53 changes: 29 additions & 24 deletions FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,12 +380,7 @@ OrderedNode* X87StackOptimization::SynchronizeStackValues() {
if (Valid == StackSlot::UNUSED) {
continue;
}
OrderedNode* TopIndex = nullptr;
if (i == 0) {
TopIndex = TopValue;
} else {
TopIndex = IREmit->_And(OpSize::i32Bit, IREmit->_Add(OpSize::i32Bit, TopValue, GetConstant(i)), GetConstant(0x7));
}
OrderedNode* TopIndex = GetOffsetTopWithCache_Slow(i);
if (Valid == StackSlot::VALID) {
LogMan::Msg::DFmt("Writing StackData[{}]", i);
IREmit->_StoreContextIndexed(StackMember.StackDataNode, TopIndex, 16, MMBaseOffset(), 16, FPRClass);
Expand All @@ -398,16 +393,21 @@ OrderedNode* X87StackOptimization::SynchronizeStackValues() {
if (Mask == 0xff) {
IREmit->_StoreContext(1, GPRClass, GetConstant(Mask), offsetof(FEXCore::Core::CPUState, AbridgedFTW));
} else if (Mask != 0) {
// perform a rotate right on mask by top
// since we can operate on 32bits as a minimum:
// ror (mask, top) = ((M << 8) | M) >>r TOP given that we have rotate rights on arm (>>r)
auto* TopValue = GetTopWithCache_Slow();
OrderedNode* RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), TopValue);
OrderedNode* RMask = IREmit->_Or(OpSize::i32Bit, MaskC, IREmit->_Lshl(OpSize::i32Bit, MaskC, GetConstant(8)));
RMask = IREmit->_Lshr(OpSize::i32Bit, RMask, RotAmount);
OrderedNode* AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
OrderedNode* NewAbridgedFTW = IREmit->_Or(OpSize::i32Bit, AbridgedFTW, RMask);
IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
if (std::popcount(Mask) == 1) {
uint8_t BitIdx = __builtin_ctz(Mask);
SetX87ValidTag(GetOffsetTopWithCache_Slow(BitIdx), true);
} else {
// perform a rotate right on mask by top
// since we must operate on 32bits as a minimum:
// ror (mask, top) = ((M << 8) | M) >> TOP
auto* TopValue = GetTopWithCache_Slow();
OrderedNode* RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), TopValue);
OrderedNode* RMask = IREmit->_Or(OpSize::i32Bit, MaskC, IREmit->_Lshl(OpSize::i32Bit, MaskC, GetConstant(8)));
RMask = IREmit->_Lshr(OpSize::i32Bit, RMask, RotAmount);
OrderedNode* AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
OrderedNode* NewAbridgedFTW = IREmit->_Or(OpSize::i32Bit, AbridgedFTW, RMask);
IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
}
} else {
LogMan::Msg::DFmt("No valid tags written");
}
Expand All @@ -419,14 +419,19 @@ OrderedNode* X87StackOptimization::SynchronizeStackValues() {
if (Mask == 0xff) {
IREmit->_StoreContext(1, GPRClass, GetConstant(0), offsetof(FEXCore::Core::CPUState, AbridgedFTW));
} else if (Mask != 0) {
// Same rotate right as above but this time on the invalid mask
auto* TopValue = GetTopWithCache_Slow();
OrderedNode* RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), TopValue);
OrderedNode* RMask = IREmit->_Or(OpSize::i32Bit, MaskC, IREmit->_Lshl(OpSize::i32Bit, MaskC, GetConstant(8)));
RMask = IREmit->_Lshr(OpSize::i32Bit, RMask, RotAmount);
OrderedNode* AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
OrderedNode* NewAbridgedFTW = IREmit->_Andn(OpSize::i32Bit, AbridgedFTW, RMask);
IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
if (std::popcount(Mask)) {
uint8_t BitIdx = __builtin_ctz(Mask);
SetX87ValidTag(GetOffsetTopWithCache_Slow(BitIdx), false);
} else {
// Same rotate right as above but this time on the invalid mask
auto* TopValue = GetTopWithCache_Slow();
OrderedNode* RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), TopValue);
OrderedNode* RMask = IREmit->_Or(OpSize::i32Bit, MaskC, IREmit->_Lshl(OpSize::i32Bit, MaskC, GetConstant(8)));
RMask = IREmit->_Lshr(OpSize::i32Bit, RMask, RotAmount);
OrderedNode* AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
OrderedNode* NewAbridgedFTW = IREmit->_Andn(OpSize::i32Bit, AbridgedFTW, RMask);
IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
}
} else {
LogMan::Msg::DFmt("No invalid tags written");
}
Expand Down
16 changes: 8 additions & 8 deletions unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
Original file line number Diff line number Diff line change
Expand Up @@ -1977,10 +1977,9 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"str s2, [x6, #52]",
"mov w21, #0x8",
"ldr w4, [x20, #8]",
"mov w22, #0xfffffff8",
"ldr s2, [x20, w22, sxtw]",
"mov w21, #0xfffffff8",
"ldr s2, [x20, w21, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
Expand Down Expand Up @@ -2036,8 +2035,8 @@
"fmov s2, s0",
"str s2, [x4, #56]",
"ldr w5, [x20, #8]",
"mov w22, #0xfffffffc",
"ldr s2, [x20, w22, sxtw]",
"mov w21, #0xfffffffc",
"ldr s2, [x20, w21, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
Expand Down Expand Up @@ -2097,10 +2096,11 @@
"ldr w9, [x20]",
"add x8, x20, #0x4 (4)",
"ldrb w20, [x28, #747]",
"sub w20, w21, w20",
"mov w21, #0x8080",
"lsr w20, w21, w20",
"add w20, w20, #0x7 (7)",
"and w20, w20, #0x7",
"ldrb w21, [x28, #1026]",
"mov w22, #0x1",
"lsl w20, w22, w20",
"bic w20, w21, w20",
"strb w20, [x28, #1026]"
]
Expand Down
Loading

0 comments on commit 51cd94d

Please sign in to comment.