Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize SIMM9 with TSO #4216

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 32 additions & 26 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4176,35 +4176,41 @@ AddressMode OpDispatchBuilder::SelectAddressMode(AddressMode A, bool AtomicTSO,
// addresses are reserved and therefore wrap around is invalid.
//
// TODO: Also handle GPR TSO if we can guarantee the constant inlines.
if (SupportsRegIndex) {
if ((A.Base || A.Segment) && A.Offset) {
const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && A.AddrSize == OpSize::i32Bit && GPRSize == OpSize::i32Bit;

if ((A.AddrSize == OpSize::i64Bit) || Const_16K) {
// Peel off the offset
AddressMode B = A;
B.Offset = 0;

return {
.Base = LoadEffectiveAddress(B, true /* AddSegmentBase */, false),
.Index = _Constant(A.Offset),
.IndexType = MEM_OFFSET_SXTX,
.IndexScale = 1,
};
}
if ((A.Base || A.Segment) && A.Offset) {
const bool SupportsSmallConst = A.AddrSize == OpSize::i32Bit && GPRSize == OpSize::i32Bit;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs to be const bool SupportsSmallConst = A.AddrSize == GPRSize; Otherwise we lose small const support for LRCPC2 on 64-bit processes with 64-bit addresses?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the small const logic is wacked as it is on 64-bit. I'll investigate this a bit more.

const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && SupportsSmallConst;

// Signed immediate unscaled 9-bit range for both regular and LRCPC2 ops.
bool IsSIMM9 = (A.Offset >= -256) && (A.Offset <= 255) && SupportsSmallConst;
IsSIMM9 &= CTX->HostFeatures.SupportsTSOImm9;

// More general offsets work only for regular ops.
bool IsGeneral = ((A.AddrSize == OpSize::i64Bit) || Const_16K);

if (IsSIMM9 || (!AtomicTSO && IsGeneral)) {
// Peel off the offset
AddressMode B = A;
B.Offset = 0;

return {
.Base = LoadEffectiveAddress(B, true /* AddSegmentBase */, false),
.Index = _Constant(A.Offset),
.IndexType = MEM_OFFSET_SXTX,
.IndexScale = 1,
};
}
}

// Try a (possibly scaled) register index.
if (A.AddrSize == OpSize::i64Bit && A.Base && (A.Index || A.Segment) && !A.Offset &&
(A.IndexScale == 1 || A.IndexScale == IR::OpSizeToSize(AccessSize))) {
if (A.Index && A.Segment) {
A.Base = _Add(GPRSize, A.Base, A.Segment);
} else if (A.Segment) {
A.Index = A.Segment;
A.IndexScale = 1;
}
return A;
// Try a (possibly scaled) register index.
if (SupportsRegIndex && A.AddrSize == OpSize::i64Bit && A.Base && (A.Index || A.Segment) && !A.Offset &&
(A.IndexScale == 1 || A.IndexScale == IR::OpSizeToSize(AccessSize))) {
if (A.Index && A.Segment) {
A.Base = _Add(GPRSize, A.Base, A.Segment);
} else if (A.Segment) {
A.Index = A.Segment;
A.IndexScale = 1;
}
return A;
}

// Fallback on software address calculation
Expand Down
27 changes: 8 additions & 19 deletions unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"Features": {
"Bitness": 64,
"Bitness": 32,
"EnabledHostFeatures": [
"TSO",
"LRCPC",
Expand All @@ -21,7 +21,7 @@
"Instructions": {
"Load variables from structs": {
"x86InstructionCount": 7,
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 16,
"Comment": [
"Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned",
"Loads a bunch of values from structs passed as arguments",
Expand All @@ -37,29 +37,18 @@
"sub eax, [ebx + 4]"
],
"ExpectedArm64ASM": [
"add x20, x7, #0x8 (8)",
"mov w20, w20",
"ldapur w11, [x20]",
"ldapur w11, [x7, #8]",
"nop",
"add x20, x7, #0x4 (4)",
"mov w20, w20",
"ldapur w5, [x20]",
"ldapur w5, [x7, #4]",
"nop",
"mov w20, w7",
"ldapur w6, [x20]",
"ldapur w6, [x7]",
"nop",
"add x20, x7, #0xc (12)",
"mov w20, w20",
"ldapur w10, [x20]",
"ldapur w10, [x7, #12]",
"nop",
"mul w5, w5, w11",
"add x20, x6, #0xc (12)",
"mov w20, w20",
"ldapur w4, [x20]",
"ldapur w4, [x6, #12]",
"nop",
"add x20, x6, #0x4 (4)",
"mov w20, w20",
"ldapur w20, [x20]",
"ldapur w20, [x6, #4]",
"nop",
"eor w27, w4, w20",
"subs w26, w4, w20",
Expand Down
Loading