From ce52b0046c2aea1e2a925c5ad20e526de74356c3 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 13 Dec 2024 11:16:16 -0500 Subject: [PATCH 1/4] InstCountCI: fix turnip instcountci this was 32-bit Signed-off-by: Alyssa Rosenzweig --- .../FEXOpt/MultiInst_TSO_32bit.json | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json index 69bc00aa52..3a5c57abab 100644 --- a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json @@ -1,6 +1,6 @@ { "Features": { - "Bitness": 64, + "Bitness": 32, "EnabledHostFeatures": [ "TSO", "LRCPC", @@ -21,7 +21,7 @@ "Instructions": { "Load variables from structs": { "x86InstructionCount": 7, - "ExpectedInstructionCount": 27, + "ExpectedInstructionCount": 21, "Comment": [ "Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned", "Loads a bunch of values from structs passed as arguments", @@ -37,28 +37,22 @@ "sub eax, [ebx + 4]" ], "ExpectedArm64ASM": [ - "add x20, x7, #0x8 (8)", - "mov w20, w20", + "add w20, w7, #0x8 (8)", "ldapur w11, [x20]", "nop", - "add x20, x7, #0x4 (4)", - "mov w20, w20", + "add w20, w7, #0x4 (4)", "ldapur w5, [x20]", "nop", - "mov w20, w7", - "ldapur w6, [x20]", + "ldapur w6, [x7]", "nop", - "add x20, x7, #0xc (12)", - "mov w20, w20", + "add w20, w7, #0xc (12)", "ldapur w10, [x20]", "nop", "mul w5, w5, w11", - "add x20, x6, #0xc (12)", - "mov w20, w20", + "add w20, w6, #0xc (12)", "ldapur w4, [x20]", "nop", - "add x20, x6, #0x4 (4)", - "mov w20, w20", + "add w20, w6, #0x4 (4)", "ldapur w20, [x20]", "nop", "eor w27, w4, w20", From 2c7eaf3413a295a4cf6f66ed1fe762374face161 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 13 Dec 2024 11:09:27 -0500 Subject: [PATCH 2/4] OpcodeDispatcher: refactor address mode select Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 9b08ea64f4..4ccbb0c225 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4176,35 +4176,33 @@ AddressMode OpDispatchBuilder::SelectAddressMode(AddressMode A, bool AtomicTSO, // addresses are reserved and therefore wrap around is invalid. // // TODO: Also handle GPR TSO if we can guarantee the constant inlines. - if (SupportsRegIndex) { - if ((A.Base || A.Segment) && A.Offset) { - const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && A.AddrSize == OpSize::i32Bit && GPRSize == OpSize::i32Bit; - - if ((A.AddrSize == OpSize::i64Bit) || Const_16K) { - // Peel off the offset - AddressMode B = A; - B.Offset = 0; - - return { - .Base = LoadEffectiveAddress(B, true /* AddSegmentBase */, false), - .Index = _Constant(A.Offset), - .IndexType = MEM_OFFSET_SXTX, - .IndexScale = 1, - }; - } + if (SupportsRegIndex && (A.Base || A.Segment) && A.Offset) { + const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && A.AddrSize == OpSize::i32Bit && GPRSize == OpSize::i32Bit; + + if ((A.AddrSize == OpSize::i64Bit) || Const_16K) { + // Peel off the offset + AddressMode B = A; + B.Offset = 0; + + return { + .Base = LoadEffectiveAddress(B, true /* AddSegmentBase */, false), + .Index = _Constant(A.Offset), + .IndexType = MEM_OFFSET_SXTX, + .IndexScale = 1, + }; } + } - // Try a (possibly scaled) register index. - if (A.AddrSize == OpSize::i64Bit && A.Base && (A.Index || A.Segment) && !A.Offset && - (A.IndexScale == 1 || A.IndexScale == IR::OpSizeToSize(AccessSize))) { - if (A.Index && A.Segment) { - A.Base = _Add(GPRSize, A.Base, A.Segment); - } else if (A.Segment) { - A.Index = A.Segment; - A.IndexScale = 1; - } - return A; + // Try a (possibly scaled) register index. + if (SupportsRegIndex && A.AddrSize == OpSize::i64Bit && A.Base && (A.Index || A.Segment) && !A.Offset && + (A.IndexScale == 1 || A.IndexScale == IR::OpSizeToSize(AccessSize))) { + if (A.Index && A.Segment) { + A.Base = _Add(GPRSize, A.Base, A.Segment); + } else if (A.Segment) { + A.Index = A.Segment; + A.IndexScale = 1; } + return A; } // Fallback on software address calculation From b856bcaeb9625bc9e9ce27beaa6478509e9eb26c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 13 Dec 2024 11:10:10 -0500 Subject: [PATCH 3/4] OpcodeDispatcher: optimize simm9 Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 4ccbb0c225..b514ce39e1 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4176,10 +4176,18 @@ AddressMode OpDispatchBuilder::SelectAddressMode(AddressMode A, bool AtomicTSO, // addresses are reserved and therefore wrap around is invalid. // // TODO: Also handle GPR TSO if we can guarantee the constant inlines. - if (SupportsRegIndex && (A.Base || A.Segment) && A.Offset) { - const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && A.AddrSize == OpSize::i32Bit && GPRSize == OpSize::i32Bit; + if ((A.Base || A.Segment) && A.Offset) { + const bool SupportsSmallConst = A.AddrSize == OpSize::i32Bit && GPRSize == OpSize::i32Bit; + const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && SupportsSmallConst; - if ((A.AddrSize == OpSize::i64Bit) || Const_16K) { + // Signed immediate unscaled 9-bit range for both regular and LRCPC2 ops. + bool IsSIMM9 = (A.Offset >= -256) && (A.Offset <= 255) && SupportsSmallConst; + IsSIMM9 &= CTX->HostFeatures.SupportsTSOImm9; + + // More general offsets work only for regular ops. + bool IsGeneral = ((A.AddrSize == OpSize::i64Bit) || Const_16K); + + if (IsSIMM9 || (!AtomicTSO && IsGeneral)) { // Peel off the offset AddressMode B = A; B.Offset = 0; From 4af37e755a5a94fff52c1284e719fd9d6be933ca Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 13 Dec 2024 11:24:24 -0500 Subject: [PATCH 4/4] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../FEXOpt/MultiInst_TSO_32bit.json | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json index 3a5c57abab..6ae3380eab 100644 --- a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json @@ -21,7 +21,7 @@ "Instructions": { "Load variables from structs": { "x86InstructionCount": 7, - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 16, "Comment": [ "Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned", "Loads a bunch of values from structs passed as arguments", @@ -37,23 +37,18 @@ "sub eax, [ebx + 4]" ], "ExpectedArm64ASM": [ - "add w20, w7, #0x8 (8)", - "ldapur w11, [x20]", + "ldapur w11, [x7, #8]", "nop", - "add w20, w7, #0x4 (4)", - "ldapur w5, [x20]", + "ldapur w5, [x7, #4]", "nop", "ldapur w6, [x7]", "nop", - "add w20, w7, #0xc (12)", - "ldapur w10, [x20]", + "ldapur w10, [x7, #12]", "nop", "mul w5, w5, w11", - "add w20, w6, #0xc (12)", - "ldapur w4, [x20]", + "ldapur w4, [x6, #12]", "nop", - "add w20, w6, #0x4 (4)", - "ldapur w20, [x20]", + "ldapur w20, [x6, #4]", "nop", "eor w27, w4, w20", "subs w26, w4, w20",