Skip to content

Commit

Permalink
instcountci: testing multiple 80bit ldst using SVE
Browse files Browse the repository at this point in the history
In preparation for FEX-Emu#4166 which should improve on these results.
  • Loading branch information
pmatos committed Dec 2, 2024
1 parent 2e7fc60 commit 33a3763
Show file tree
Hide file tree
Showing 2 changed files with 354 additions and 11 deletions.
22 changes: 11 additions & 11 deletions unittests/InstructionCountCI/Primary.json
Original file line number Diff line number Diff line change
Expand Up @@ -1996,10 +1996,10 @@
"ExpectedInstructionCount": 4,
"Comment": "0x86",
"ExpectedArm64ASM": [
"mov x20, x7",
"bfxil x20, x6, #0, #8",
"bfxil x6, x7, #0, #8",
"mov x7, x20"
"mov x20, x6",
"bfxil x20, x7, #0, #8",
"bfxil x7, x6, #0, #8",
"mov x6, x20"
]
},
"xchg [rax], cl": {
Expand All @@ -2014,10 +2014,10 @@
"ExpectedInstructionCount": 4,
"Comment": "0x87",
"ExpectedArm64ASM": [
"mov x20, x7",
"bfxil x20, x6, #0, #16",
"bfxil x6, x7, #0, #16",
"mov x7, x20"
"mov x20, x6",
"bfxil x20, x7, #0, #16",
"bfxil x7, x6, #0, #16",
"mov x6, x20"
]
},
"xchg [rax], cx": {
Expand All @@ -2032,9 +2032,9 @@
"ExpectedInstructionCount": 3,
"Comment": "0x87",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w6, w7",
"mov x7, x20"
"mov w20, w7",
"mov w7, w6",
"mov x6, x20"
]
},
"xchg [rax], ecx": {
Expand Down
343 changes: 343 additions & 0 deletions unittests/InstructionCountCI/X87ldst-SVE.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"SVE128",
"SVE256"
],
"DisabledHostFeatures": [
"AFP",
"FLAGM",
"FLAGM2",
"RPRES"
]
},
"Instructions": {
"fstp tword [rax]": {
"ExpectedInstructionCount": 15,
"Comment": "Single 80-bit store.",
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w22, w22, w20",
"bic w21, w21, w22",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]"
]
},
"2-store 80bit": {
"x86InstructionCount": 2,
"ExpectedInstructionCount": 29,
"x86Insts": [
"fstp tword [rax]",
"fstp tword [rax+10]"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0xa (10)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]"
]
},
"8-store 80bit": {
"x86InstructionCount": 8,
"ExpectedInstructionCount": 113,
"x86Insts": [
"fstp tword [rax]",
"fstp tword [rax+10]",
"fstp tword [rax+20]",
"fstp tword [rax+30]",
"fstp tword [rax+40]",
"fstp tword [rax+50]",
"fstp tword [rax+60]",
"fstp tword [rax+70]"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0xa (10)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x14 (20)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x1e (30)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x28 (40)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x32 (50)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x3c (60)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x46 (70)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]"
]
},
"fld tword [rax]": {
"ExpectedInstructionCount": 14,
"Comment": "Single 80-bit store.",
"ExpectedArm64ASM": [
"ldr d2, [x4]",
"add x20, x4, #0x8 (8)",
"ld1 {v2.h}[4], [x20]",
"ldrb w20, [x28, #1019]",
"mov w21, #0x1",
"sub w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #1040]",
"ldrb w22, [x28, #1298]",
"lsl w20, w21, w20",
"orr w20, w22, w20",
"strb w20, [x28, #1298]"
]
},
"2-load 80bit": {
"x86InstructionCount": 2,
"ExpectedInstructionCount": 24,
"x86Insts": [
"fld tword [rax]",
"fld tword [rax+10]"
],
"ExpectedArm64ASM": [
"ldr d2, [x4]",
"add x20, x4, #0x8 (8)",
"ld1 {v2.h}[4], [x20]",
"add x20, x4, #0xa (10)",
"ldr d3, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v3.h}[4], [x20]",
"ldrb w20, [x28, #1019]",
"sub w20, w20, #0x2 (2)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"str q3, [x0, #1040]",
"add w21, w20, #0x1 (1)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q2, [x0, #1040]",
"mov w21, #0x8",
"sub w20, w21, w20",
"ldrb w21, [x28, #1298]",
"mov w22, #0x303",
"lsr w20, w22, w20",
"orr w20, w21, w20",
"strb w20, [x28, #1298]"
]
},
"8-load 80bit": {
"x86InstructionCount": 8,
"ExpectedInstructionCount": 67,
"x86Insts": [
"fld tword [rax]",
"fld tword [rax+10]",
"fld tword [rax+20]",
"fld tword [rax+30]",
"fld tword [rax+40]",
"fld tword [rax+50]",
"fld tword [rax+60]",
"fld tword [rax+70]"
],
"ExpectedArm64ASM": [
"ldr d2, [x4]",
"add x20, x4, #0x8 (8)",
"ld1 {v2.h}[4], [x20]",
"add x20, x4, #0xa (10)",
"ldr d3, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v3.h}[4], [x20]",
"add x20, x4, #0x14 (20)",
"ldr d4, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v4.h}[4], [x20]",
"add x20, x4, #0x1e (30)",
"ldr d5, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v5.h}[4], [x20]",
"add x20, x4, #0x28 (40)",
"ldr d6, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v6.h}[4], [x20]",
"add x20, x4, #0x32 (50)",
"ldr d7, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v7.h}[4], [x20]",
"add x20, x4, #0x3c (60)",
"ldr d8, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v8.h}[4], [x20]",
"add x20, x4, #0x46 (70)",
"ldr d9, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v9.h}[4], [x20]",
"ldrb w20, [x28, #1019]",
"sub w20, w20, #0x8 (8)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"str q9, [x0, #1040]",
"add w21, w20, #0x1 (1)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q8, [x0, #1040]",
"add w21, w20, #0x2 (2)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q7, [x0, #1040]",
"add w21, w20, #0x3 (3)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q6, [x0, #1040]",
"add w21, w20, #0x4 (4)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q5, [x0, #1040]",
"add w21, w20, #0x5 (5)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q4, [x0, #1040]",
"add w21, w20, #0x6 (6)",
"and w21, w21, #0x7",
"add x0, x28, x21, lsl #4",
"str q3, [x0, #1040]",
"add w20, w20, #0x7 (7)",
"and w20, w20, #0x7",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #1040]",
"mov w20, #0xff",
"strb w20, [x28, #1298]"
]
}
}
}

0 comments on commit 33a3763

Please sign in to comment.