diff --git a/unittests/InstructionCountCI/X87ldst-SVE.json b/unittests/InstructionCountCI/X87ldst-SVE.json new file mode 100644 index 0000000000..4b63bd77e8 --- /dev/null +++ b/unittests/InstructionCountCI/X87ldst-SVE.json @@ -0,0 +1,343 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "SVE128", + "SVE256" + ], + "DisabledHostFeatures": [ + "AFP", + "FLAGM", + "FLAGM2", + "RPRES" + ] + }, + "Instructions": { + "fstp tword [rax]": { + "ExpectedInstructionCount": 15, + "Comment": "Single 80-bit store.", + "ExpectedArm64ASM": [ + "ldrb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x4]", + "mov x21, v2.d[1]", + "add x22, x4, #0x8 (8)", + "strh w21, [x22]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w22, w22, w20", + "bic w21, w21, w22", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]" + ] + }, + "2-store 80bit": { + "x86InstructionCount": 2, + "ExpectedInstructionCount": 29, + "x86Insts": [ + "fstp tword [rax]", + "fstp tword [rax+10]" + ], + "ExpectedArm64ASM": [ + "ldrb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x4]", + "mov x21, v2.d[1]", + "add x22, x4, #0x8 (8)", + "strh w21, [x22]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0xa (10)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w22, w22, w20", + "bic w21, w21, w22", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]" + ] + }, + "8-store 80bit": { + "x86InstructionCount": 8, + "ExpectedInstructionCount": 113, + "x86Insts": [ + "fstp tword [rax]", + "fstp tword [rax+10]", + "fstp tword [rax+20]", + "fstp tword [rax+30]", + "fstp tword [rax+40]", + "fstp tword [rax+50]", + "fstp tword [rax+60]", + "fstp tword [rax+70]" + ], + "ExpectedArm64ASM": [ + "ldrb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x4]", + "mov x21, v2.d[1]", + "add x22, x4, #0x8 (8)", + "strh w21, [x22]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0xa (10)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x14 (20)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x1e (30)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x28 (40)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x32 (50)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x3c (60)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x21, x4, #0x46 (70)", + "add x0, x28, x20, lsl #4", + "ldr q2, [x0, #1040]", + "str d2, [x21]", + "mov x23, v2.d[1]", + "add x21, x21, #0x8 (8)", + "strh w23, [x21]", + "ldrb w21, [x28, #1298]", + "lsl w22, w22, w20", + "bic w21, w21, w22", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]" + ] + }, + "fld tword [rax]": { + "ExpectedInstructionCount": 14, + "Comment": "Single 80-bit store.", + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", + "ldrb w20, [x28, #1019]", + "mov w21, #0x1", + "sub w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "str q2, [x0, #1040]", + "ldrb w22, [x28, #1298]", + "lsl w20, w21, w20", + "orr w20, w22, w20", + "strb w20, [x28, #1298]" + ] + }, + "2-load 80bit": { + "x86InstructionCount": 2, + "ExpectedInstructionCount": 24, + "x86Insts": [ + "fld tword [rax]", + "fld tword [rax+10]" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", + "add x20, x4, #0xa (10)", + "ldr d3, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v3.h}[4], [x20]", + "ldrb w20, [x28, #1019]", + "sub w20, w20, #0x2 (2)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "str q3, [x0, #1040]", + "add w21, w20, #0x1 (1)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q2, [x0, #1040]", + "mov w21, #0x8", + "sub w20, w21, w20", + "ldrb w21, [x28, #1298]", + "mov w22, #0x303", + "lsr w20, w22, w20", + "orr w20, w21, w20", + "strb w20, [x28, #1298]" + ] + }, + "8-load 80bit": { + "x86InstructionCount": 8, + "ExpectedInstructionCount": 67, + "x86Insts": [ + "fld tword [rax]", + "fld tword [rax+10]", + "fld tword [rax+20]", + "fld tword [rax+30]", + "fld tword [rax+40]", + "fld tword [rax+50]", + "fld tword [rax+60]", + "fld tword [rax+70]" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", + "add x20, x4, #0xa (10)", + "ldr d3, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v3.h}[4], [x20]", + "add x20, x4, #0x14 (20)", + "ldr d4, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v4.h}[4], [x20]", + "add x20, x4, #0x1e (30)", + "ldr d5, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v5.h}[4], [x20]", + "add x20, x4, #0x28 (40)", + "ldr d6, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v6.h}[4], [x20]", + "add x20, x4, #0x32 (50)", + "ldr d7, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v7.h}[4], [x20]", + "add x20, x4, #0x3c (60)", + "ldr d8, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v8.h}[4], [x20]", + "add x20, x4, #0x46 (70)", + "ldr d9, [x20]", + "add x20, x20, #0x8 (8)", + "ld1 {v9.h}[4], [x20]", + "ldrb w20, [x28, #1019]", + "sub w20, w20, #0x8 (8)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "str q9, [x0, #1040]", + "add w21, w20, #0x1 (1)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q8, [x0, #1040]", + "add w21, w20, #0x2 (2)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q7, [x0, #1040]", + "add w21, w20, #0x3 (3)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q6, [x0, #1040]", + "add w21, w20, #0x4 (4)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #1040]", + "add w21, w20, #0x5 (5)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q4, [x0, #1040]", + "add w21, w20, #0x6 (6)", + "and w21, w21, #0x7", + "add x0, x28, x21, lsl #4", + "str q3, [x0, #1040]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "add x0, x28, x20, lsl #4", + "str q2, [x0, #1040]", + "mov w20, #0xff", + "strb w20, [x28, #1298]" + ] + } + } +}