Skip to content

Commit

Permalink
instcountci: Generate SVE for 80bit load/stores when possible
Browse files Browse the repository at this point in the history
  • Loading branch information
pmatos committed Dec 6, 2024
1 parent 1d3ce30 commit 0b1229d
Showing 1 changed file with 50 additions and 83 deletions.
133 changes: 50 additions & 83 deletions unittests/InstructionCountCI/X87ldst-SVE.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@
},
"Instructions": {
"fstp tword [rax]": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 13,
"Comment": "Single 80-bit store.",
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x4]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w22, w22, w20",
Expand All @@ -36,7 +34,7 @@
},
"2-store 80bit": {
"x86InstructionCount": 2,
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 25,
"x86Insts": [
"fstp tword [rax]",
"fstp tword [rax+10]"
Expand All @@ -45,10 +43,8 @@
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x4]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w23, w22, w20",
Expand All @@ -60,10 +56,8 @@
"add x21, x4, #0xa (10)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
Expand All @@ -75,7 +69,7 @@
},
"8-store 80bit": {
"x86InstructionCount": 8,
"ExpectedInstructionCount": 113,
"ExpectedInstructionCount": 97,
"x86Insts": [
"fstp tword [rax]",
"fstp tword [rax+10]",
Expand All @@ -90,10 +84,8 @@
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x4]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w23, w22, w20",
Expand All @@ -105,10 +97,8 @@
"add x21, x4, #0xa (10)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
Expand All @@ -119,10 +109,8 @@
"add x21, x4, #0x14 (20)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
Expand All @@ -133,10 +121,8 @@
"add x21, x4, #0x1e (30)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
Expand All @@ -147,10 +133,8 @@
"add x21, x4, #0x28 (40)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
Expand All @@ -161,10 +145,8 @@
"add x21, x4, #0x32 (50)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
Expand All @@ -175,10 +157,8 @@
"add x21, x4, #0x3c (60)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
Expand All @@ -189,10 +169,8 @@
"add x21, x4, #0x46 (70)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ptrue p2.h, vl5",
"st1h {z2.h}, p2, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
Expand All @@ -203,12 +181,11 @@
]
},
"fld tword [rax]": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 13,
"Comment": "Single 80-bit store.",
"ExpectedArm64ASM": [
"ldr d2, [x4]",
"add x20, x4, #0x8 (8)",
"ld1 {v2.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z2.h}, p2/z, [x4]",
"ldrb w20, [x28, #1019]",
"mov w21, #0x1",
"sub w20, w20, #0x1 (1)",
Expand All @@ -224,19 +201,17 @@
},
"2-load 80bit": {
"x86InstructionCount": 2,
"ExpectedInstructionCount": 24,
"ExpectedInstructionCount": 22,
"x86Insts": [
"fld tword [rax]",
"fld tword [rax+10]"
],
"ExpectedArm64ASM": [
"ldr d2, [x4]",
"add x20, x4, #0x8 (8)",
"ld1 {v2.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z2.h}, p2/z, [x4]",
"add x20, x4, #0xa (10)",
"ldr d3, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v3.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z3.h}, p2/z, [x20]",
"ldrb w20, [x28, #1019]",
"sub w20, w20, #0x2 (2)",
"and w20, w20, #0x7",
Expand All @@ -258,7 +233,7 @@
},
"8-load 80bit": {
"x86InstructionCount": 8,
"ExpectedInstructionCount": 67,
"ExpectedInstructionCount": 59,
"x86Insts": [
"fld tword [rax]",
"fld tword [rax+10]",
Expand All @@ -270,37 +245,29 @@
"fld tword [rax+70]"
],
"ExpectedArm64ASM": [
"ldr d2, [x4]",
"add x20, x4, #0x8 (8)",
"ld1 {v2.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z2.h}, p2/z, [x4]",
"add x20, x4, #0xa (10)",
"ldr d3, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v3.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z3.h}, p2/z, [x20]",
"add x20, x4, #0x14 (20)",
"ldr d4, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v4.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z4.h}, p2/z, [x20]",
"add x20, x4, #0x1e (30)",
"ldr d5, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v5.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z5.h}, p2/z, [x20]",
"add x20, x4, #0x28 (40)",
"ldr d6, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v6.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z6.h}, p2/z, [x20]",
"add x20, x4, #0x32 (50)",
"ldr d7, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v7.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z7.h}, p2/z, [x20]",
"add x20, x4, #0x3c (60)",
"ldr d8, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v8.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z8.h}, p2/z, [x20]",
"add x20, x4, #0x46 (70)",
"ldr d9, [x20]",
"add x20, x20, #0x8 (8)",
"ld1 {v9.h}[4], [x20]",
"ptrue p2.h, vl5",
"ld1h {z9.h}, p2/z, [x20]",
"ldrb w20, [x28, #1019]",
"sub w20, w20, #0x8 (8)",
"and w20, w20, #0x7",
Expand Down

0 comments on commit 0b1229d

Please sign in to comment.