From 7ad9f7a43a983cf9dcd346de81f588e63703dce7 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Fri, 22 Nov 2024 11:17:21 +0100 Subject: [PATCH] instcountci: testing multiple 80bit stores using SVE In preparation for #4166 which should improve on these results. --- .../InstructionCountCI/X87store-SVE.json | 206 ++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 unittests/InstructionCountCI/X87store-SVE.json diff --git a/unittests/InstructionCountCI/X87store-SVE.json b/unittests/InstructionCountCI/X87store-SVE.json new file mode 100644 index 0000000000..cf54d18ce7 --- /dev/null +++ b/unittests/InstructionCountCI/X87store-SVE.json @@ -0,0 +1,206 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "SVE128", + "SVE256" + ], + "DisabledHostFeatures": [ + "AFP", + "FLAGM", + "FLAGM2", + "RPRES" + ] + }, + "Instructions": { + "fstp tword [rax]": { + "ExpectedInstructionCount": 15, + "Comment": "Single 80-bit store.", + "ExpectedArm64ASM": [ + "ldrb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x4]", + "mov x21, v2.d[1]", + "add x22, x4, #0x8 (8)", + "strh w21, [x22]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w22, w22, w20", + "bic w21, w21, w22", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]" + ] + }, + "2-store 80bit": { + "x86InstructionCount": 2, + "ExpectedInstructionCount": 29, + "x86Insts": [ + "fstp tword [rax]", + "fstp tword [rax+10]" + ], + "ExpectedArm64ASM": [ + "ldrb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x4]", + "mov x21, v2.d[1]", + "add x22, x4, #0x8 (8)", + "strh w21, [x22]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0xa (10)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w22, w22, w20", + "bic w21, w21, w22", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]" + ] + }, + "8-store 80bit": { + "x86InstructionCount": 8, + "ExpectedInstructionCount": 113, + "x86Insts": [ + "fstp tword [rax]", + "fstp tword [rax+10]", + "fstp tword [rax+20]", + "fstp tword [rax+30]", + "fstp tword [rax+40]", + "fstp tword [rax+50]", + "fstp tword [rax+60]", + "fstp tword [rax+70]" + ], + "ExpectedArm64ASM": [ + "ldrb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x4]", + "mov x21, v2.d[1]", + "add x22, x4, #0x8 (8)", + "strh w21, [x22]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0xa (10)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x14 (20)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x1e (30)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x28 (40)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x32 (50)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x3c (60)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x46 (70)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w22, w22, w20", + "bic w21, w21, w22", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]" + ] + } + } +}