From 800d447f3dc609741fdadae268f18e5fac195d6f Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 11 Dec 2024 14:55:19 -0800 Subject: [PATCH 1/2] InstCountCI: Add support for TSO and LRCPC1/2 --- Scripts/InstructionCountParser.py | 6 +++++ Source/Tools/CodeSizeValidation/Main.cpp | 31 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/Scripts/InstructionCountParser.py b/Scripts/InstructionCountParser.py index d5d970e26d..e88fa6ae7a 100755 --- a/Scripts/InstructionCountParser.py +++ b/Scripts/InstructionCountParser.py @@ -55,6 +55,9 @@ class HostFeatures(Flag) : FEATURE_CRYPTO = (1 << 10) FEATURE_AES256 = (1 << 11) FEATURE_SVEBITPERM = (1 << 12) + FEATURE_TSO = (1 << 13) + FEATURE_LRCPC = (1 << 14) + FEATURE_LRCPC2 = (1 << 15) HostFeaturesLookup = { "SVE128" : HostFeatures.FEATURE_SVE128, @@ -70,6 +73,9 @@ class HostFeatures(Flag) : "CRYPTO" : HostFeatures.FEATURE_CRYPTO, "AES256" : HostFeatures.FEATURE_AES256, "SVEBITPERM" : HostFeatures.FEATURE_SVEBITPERM, + "TSO" : HostFeatures.FEATURE_TSO, + "LRCPC" : HostFeatures.FEATURE_LRCPC, + "LRCPC2" : HostFeatures.FEATURE_LRCPC2, } def GetHostFeatures(data): diff --git a/Source/Tools/CodeSizeValidation/Main.cpp b/Source/Tools/CodeSizeValidation/Main.cpp index 7c244b0d11..420e5d84c2 100644 --- a/Source/Tools/CodeSizeValidation/Main.cpp +++ b/Source/Tools/CodeSizeValidation/Main.cpp @@ -506,6 +506,9 @@ int main(int argc, char** argv, char** const envp) { FEATURE_CRYPTO = (1U << 10), FEATURE_AES256 = (1U << 11), FEATURE_SVEBITPERM = (1U << 12), + FEATURE_TSO = (1U << 13), + FEATURE_LRCPC = (1U << 14), + FEATURE_LRCPC2 = (1U << 15), }; uint64_t SVEWidth = 0; @@ -547,6 +550,20 @@ int main(int argc, char** argv, char** const envp) { if (TestHeaderData->EnabledHostFeatures & FEATURE_SVEBITPERM) { HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::ENABLESVEBITPERM); } + if (TestHeaderData->EnabledHostFeatures & FEATURE_LRCPC) { + HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::ENABLELRCPC); + } + if (TestHeaderData->EnabledHostFeatures & FEATURE_LRCPC2) { + HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::ENABLELRCPC2); + } + + if (TestHeaderData->EnabledHostFeatures & FEATURE_TSO) { + // Always disable auto migration. + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOAUTOMIGRATION, "0"); + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "1"); + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, "1"); + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, "1"); + } // Always enable ARMv8.1 LSE atomics. HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::ENABLEATOMICS); @@ -584,6 +601,20 @@ int main(int argc, char** argv, char** const envp) { if (TestHeaderData->DisabledHostFeatures & FEATURE_SVEBITPERM) { HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::DISABLESVEBITPERM); } + if (TestHeaderData->DisabledHostFeatures & FEATURE_LRCPC) { + HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::DISABLELRCPC); + } + if (TestHeaderData->DisabledHostFeatures & FEATURE_LRCPC2) { + HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::DISABLELRCPC2); + } + + if (TestHeaderData->DisabledHostFeatures & FEATURE_TSO) { + // Always disable auto migration. + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOAUTOMIGRATION, "0"); + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "0"); + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, "0"); + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, "0"); + } // Always enable preserve_all abi. HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::ENABLEPRESERVEALLABI); From ac1e32994a8e547338ff1d4b8fd801cd5406f962 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 11 Dec 2024 14:55:29 -0800 Subject: [PATCH 2/2] InstCountCI: Adds hot block that doesn't generate optimal code --- .../FEXOpt/MultiInst_32bit.json | 50 +++++++++++++ .../FEXOpt/MultiInst_TSO_32bit.json | 70 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 unittests/InstructionCountCI/FEXOpt/MultiInst_32bit.json create mode 100644 unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_32bit.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_32bit.json new file mode 100644 index 0000000000..9f2dc6db77 --- /dev/null +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_32bit.json @@ -0,0 +1,50 @@ +{ + "Features": { + "Bitness": 32, + "EnabledHostFeatures": [ + "FLAGM", + "FLAGM2" + ], + "DisabledHostFeatures": [ + "SVE128", + "SVE256", + "RPRES", + "AFP" + ] + }, + "Comment": [ + "These are instruction combinations that could be more optimal if FEX optimized for them" + ], + "Instructions": { + "Load variables from structs": { + "x86InstructionCount": 7, + "ExpectedInstructionCount": 10, + "Comment": [ + "Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned", + "Loads a bunch of values from structs passed as arguments", + "Loads failed to use LRCPC2/ldapur with small immediate offset when TSO is enabled, but is fine when TSO isn't enabled." + ], + "x86Insts": [ + "mov edi, [ecx + 8]", + "mov edx, [ecx + 4]", + "mov ebx, [ecx]", + "mov esi, [ecx + 0xc]", + "imul edx, edi", + "mov eax, [ebx + 0xc]", + "sub eax, [ebx + 4]" + ], + "ExpectedArm64ASM": [ + "ldr w11, [x7, #8]", + "ldr w5, [x7, #4]", + "ldr w6, [x7]", + "ldr w10, [x7, #12]", + "mul w5, w5, w11", + "ldr w4, [x6, #12]", + "ldr w20, [x6, #4]", + "eor w27, w4, w20", + "subs w26, w4, w20", + "mov x4, x26" + ] + } + } +} diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json new file mode 100644 index 0000000000..69bc00aa52 --- /dev/null +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json @@ -0,0 +1,70 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "TSO", + "LRCPC", + "LRCPC2", + "FLAGM", + "FLAGM2" + ], + "DisabledHostFeatures": [ + "SVE128", + "SVE256", + "RPRES", + "AFP" + ] + }, + "Comment": [ + "These are instruction combinations that could be more optimal if FEX optimized for them" + ], + "Instructions": { + "Load variables from structs": { + "x86InstructionCount": 7, + "ExpectedInstructionCount": 27, + "Comment": [ + "Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned", + "Loads a bunch of values from structs passed as arguments", + "Loads failed to use LRCPC2/ldapur with small immediate offset when possible" + ], + "x86Insts": [ + "mov edi, [ecx + 8]", + "mov edx, [ecx + 4]", + "mov ebx, [ecx]", + "mov esi, [ecx + 0xc]", + "imul edx, edi", + "mov eax, [ebx + 0xc]", + "sub eax, [ebx + 4]" + ], + "ExpectedArm64ASM": [ + "add x20, x7, #0x8 (8)", + "mov w20, w20", + "ldapur w11, [x20]", + "nop", + "add x20, x7, #0x4 (4)", + "mov w20, w20", + "ldapur w5, [x20]", + "nop", + "mov w20, w7", + "ldapur w6, [x20]", + "nop", + "add x20, x7, #0xc (12)", + "mov w20, w20", + "ldapur w10, [x20]", + "nop", + "mul w5, w5, w11", + "add x20, x6, #0xc (12)", + "mov w20, w20", + "ldapur w4, [x20]", + "nop", + "add x20, x6, #0x4 (4)", + "mov w20, w20", + "ldapur w20, [x20]", + "nop", + "eor w27, w4, w20", + "subs w26, w4, w20", + "mov x4, x26" + ] + } + } +}