Skip to content

Commit

Permalink
Merge pull request #3491 from alyssarosenzweig/rclse/waw
Browse files Browse the repository at this point in the history
RCLSE: Optimize store-after-store
  • Loading branch information
Sonicadvance1 authored Mar 14, 2024
2 parents 8a3d08e + ed59f73 commit ca6b2e4
Show file tree
Hide file tree
Showing 14 changed files with 897 additions and 1,052 deletions.
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/JIT/Arm64/EncryptionOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ DEF_OP(VSha256U0) {
else {
mov(VTMP1.Q(), Src1.Q());
sha256su0(VTMP1, Src2);
mov(Dst.Q(), Src1.Q());
mov(Dst.Q(), VTMP1.Q());
}
}

Expand Down
14 changes: 12 additions & 2 deletions FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,10 +547,20 @@ bool RCLSE::ClassifyContextLoad(FEXCore::IR::IREmitter *IREmit, ContextInfo *Loc

bool RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::OrderedNode *ValueNode) {
auto Info = FindMemberInfo(LocalInfo, Offset, Size);
ContextMemberInfo PreviousMemberInfoCopy = *Info;
RecordAccess(Info, Class, Offset, Size, LastAccessType::WRITE, ValueNode,
CodeNode);
// TODO: Optimize redundant stores.
// ContextMemberInfo PreviousMemberInfoCopy = *Info;

if (PreviousMemberInfoCopy.AccessRegClass == Info->AccessRegClass &&
PreviousMemberInfoCopy.AccessOffset == Info->AccessOffset &&
PreviousMemberInfoCopy.AccessSize == Size &&
PreviousMemberInfoCopy.Accessed == LastAccessType::WRITE) {
// This optimizes redundant stores with no intervening load
IREmit->Remove(PreviousMemberInfoCopy.StoreNode);
return true;
}

// TODO: Optimize the case of partial stores.
return false;
}

Expand Down
3 changes: 3 additions & 0 deletions unittests/ASM/Disabled_Tests_Simulator
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,6 @@ Test_VEX/vroundpd.asm
Test_VEX/vroundps.asm
Test_VEX/vroundsd.asm
Test_VEX/vroundss.asm

# Simulator doesn't support cycle counter reading
Test_TwoByte/0F_31.asm
20 changes: 13 additions & 7 deletions unittests/InstructionCountCI/FEXOpt/MultiInst.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
],
"Instructions": {
"push ax, bx": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 4,
"Comment": [
"Mergable 16-bit pushes. May or may not be an optimization."
],
Expand All @@ -23,12 +23,14 @@
"push bx"
],
"ExpectedArm64ASM": [
"strh w4, [x8, #-2]!",
"mov x20, x8",
"strh w4, [x20, #-2]!",
"mov x8, x20",
"strh w7, [x8, #-2]!"
]
},
"push rax, rbx": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 4,
"Comment": [
"Mergable 64-bit pushes"
],
Expand All @@ -37,12 +39,14 @@
"push rbx"
],
"ExpectedArm64ASM": [
"str x4, [x8, #-8]!",
"mov x20, x8",
"str x4, [x20, #-8]!",
"mov x8, x20",
"str x7, [x8, #-8]!"
]
},
"adds xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": [
"Redundant scalar adds that can get eliminated without AFP."
],
Expand All @@ -51,9 +55,11 @@
"addss xmm0, xmm2"
],
"ExpectedArm64ASM": [
"mov v2.16b, v16.16b",
"fadd s0, s16, s17",
"mov v16.s[0], v0.s[0]",
"fadd s0, s16, s18",
"mov v2.s[0], v0.s[0]",
"mov v16.16b, v2.16b",
"fadd s0, s2, s18",
"mov v16.s[0], v0.s[0]"
]
},
Expand Down
8 changes: 5 additions & 3 deletions unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
],
"Instructions": {
"adds xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 4,
"Comment": [
"Redundant scalar operations should get eliminated with AFP"
],
Expand All @@ -24,8 +24,10 @@
"addss xmm0, xmm2"
],
"ExpectedArm64ASM": [
"fadd s16, s16, s17",
"fadd s16, s16, s18"
"mov v2.16b, v16.16b",
"fadd s2, s16, s17",
"mov v16.16b, v2.16b",
"fadd s16, s2, s18"
]
}
}
Expand Down
Loading

0 comments on commit ca6b2e4

Please sign in to comment.