Skip to content

Commit

Permalink
Merge pull request #4209 from Sonicadvance1/tso_support_instcountci
Browse files Browse the repository at this point in the history
InstCountCI: Implement support for TSO and LRCPC and add hot block that could be optimized
  • Loading branch information
alyssarosenzweig authored Dec 13, 2024
2 parents 735a4f9 + ac1e329 commit 9fb69ed
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Scripts/InstructionCountParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class HostFeatures(Flag) :
FEATURE_CRYPTO = (1 << 10)
FEATURE_AES256 = (1 << 11)
FEATURE_SVEBITPERM = (1 << 12)
FEATURE_TSO = (1 << 13)
FEATURE_LRCPC = (1 << 14)
FEATURE_LRCPC2 = (1 << 15)

HostFeaturesLookup = {
"SVE128" : HostFeatures.FEATURE_SVE128,
Expand All @@ -70,6 +73,9 @@ class HostFeatures(Flag) :
"CRYPTO" : HostFeatures.FEATURE_CRYPTO,
"AES256" : HostFeatures.FEATURE_AES256,
"SVEBITPERM" : HostFeatures.FEATURE_SVEBITPERM,
"TSO" : HostFeatures.FEATURE_TSO,
"LRCPC" : HostFeatures.FEATURE_LRCPC,
"LRCPC2" : HostFeatures.FEATURE_LRCPC2,
}

def GetHostFeatures(data):
Expand Down
31 changes: 31 additions & 0 deletions Source/Tools/CodeSizeValidation/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,9 @@ int main(int argc, char** argv, char** const envp) {
FEATURE_CRYPTO = (1U << 10),
FEATURE_AES256 = (1U << 11),
FEATURE_SVEBITPERM = (1U << 12),
FEATURE_TSO = (1U << 13),
FEATURE_LRCPC = (1U << 14),
FEATURE_LRCPC2 = (1U << 15),
};

uint64_t SVEWidth = 0;
Expand Down Expand Up @@ -547,6 +550,20 @@ int main(int argc, char** argv, char** const envp) {
if (TestHeaderData->EnabledHostFeatures & FEATURE_SVEBITPERM) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLESVEBITPERM);
}
if (TestHeaderData->EnabledHostFeatures & FEATURE_LRCPC) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLELRCPC);
}
if (TestHeaderData->EnabledHostFeatures & FEATURE_LRCPC2) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLELRCPC2);
}

if (TestHeaderData->EnabledHostFeatures & FEATURE_TSO) {
// Always disable auto migration.
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOAUTOMIGRATION, "0");
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "1");
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, "1");
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, "1");
}

// Always enable ARMv8.1 LSE atomics.
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEATOMICS);
Expand Down Expand Up @@ -584,6 +601,20 @@ int main(int argc, char** argv, char** const envp) {
if (TestHeaderData->DisabledHostFeatures & FEATURE_SVEBITPERM) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLESVEBITPERM);
}
if (TestHeaderData->DisabledHostFeatures & FEATURE_LRCPC) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLELRCPC);
}
if (TestHeaderData->DisabledHostFeatures & FEATURE_LRCPC2) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLELRCPC2);
}

if (TestHeaderData->DisabledHostFeatures & FEATURE_TSO) {
// Always disable auto migration.
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOAUTOMIGRATION, "0");
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "0");
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, "0");
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, "0");
}

// Always enable preserve_all abi.
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEPRESERVEALLABI);
Expand Down
50 changes: 50 additions & 0 deletions unittests/InstructionCountCI/FEXOpt/MultiInst_32bit.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"Features": {
"Bitness": 32,
"EnabledHostFeatures": [
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256",
"RPRES",
"AFP"
]
},
"Comment": [
"These are instruction combinations that could be more optimal if FEX optimized for them"
],
"Instructions": {
"Load variables from structs": {
"x86InstructionCount": 7,
"ExpectedInstructionCount": 10,
"Comment": [
"Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned",
"Loads a bunch of values from structs passed as arguments",
"Loads failed to use LRCPC2/ldapur with small immediate offset when TSO is enabled, but is fine when TSO isn't enabled."
],
"x86Insts": [
"mov edi, [ecx + 8]",
"mov edx, [ecx + 4]",
"mov ebx, [ecx]",
"mov esi, [ecx + 0xc]",
"imul edx, edi",
"mov eax, [ebx + 0xc]",
"sub eax, [ebx + 4]"
],
"ExpectedArm64ASM": [
"ldr w11, [x7, #8]",
"ldr w5, [x7, #4]",
"ldr w6, [x7]",
"ldr w10, [x7, #12]",
"mul w5, w5, w11",
"ldr w4, [x6, #12]",
"ldr w20, [x6, #4]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"mov x4, x26"
]
}
}
}
70 changes: 70 additions & 0 deletions unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"TSO",
"LRCPC",
"LRCPC2",
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256",
"RPRES",
"AFP"
]
},
"Comment": [
"These are instruction combinations that could be more optimal if FEX optimized for them"
],
"Instructions": {
"Load variables from structs": {
"x86InstructionCount": 7,
"ExpectedInstructionCount": 27,
"Comment": [
"Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned",
"Loads a bunch of values from structs passed as arguments",
"Loads failed to use LRCPC2/ldapur with small immediate offset when possible"
],
"x86Insts": [
"mov edi, [ecx + 8]",
"mov edx, [ecx + 4]",
"mov ebx, [ecx]",
"mov esi, [ecx + 0xc]",
"imul edx, edi",
"mov eax, [ebx + 0xc]",
"sub eax, [ebx + 4]"
],
"ExpectedArm64ASM": [
"add x20, x7, #0x8 (8)",
"mov w20, w20",
"ldapur w11, [x20]",
"nop",
"add x20, x7, #0x4 (4)",
"mov w20, w20",
"ldapur w5, [x20]",
"nop",
"mov w20, w7",
"ldapur w6, [x20]",
"nop",
"add x20, x7, #0xc (12)",
"mov w20, w20",
"ldapur w10, [x20]",
"nop",
"mul w5, w5, w11",
"add x20, x6, #0xc (12)",
"mov w20, w20",
"ldapur w4, [x20]",
"nop",
"add x20, x6, #0x4 (4)",
"mov w20, w20",
"ldapur w20, [x20]",
"nop",
"eor w27, w4, w20",
"subs w26, w4, w20",
"mov x4, x26"
]
}
}
}

0 comments on commit 9fb69ed

Please sign in to comment.