diff --git a/unittests/InstructionCountCI/X87store-SVE.json b/unittests/InstructionCountCI/X87store-SVE.json index cf54d18ce7..d4feba020f 100644 --- a/unittests/InstructionCountCI/X87store-SVE.json +++ b/unittests/InstructionCountCI/X87store-SVE.json @@ -14,16 +14,15 @@ }, "Instructions": { "fstp tword [rax]": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 14, "Comment": "Single 80-bit store.", "ExpectedArm64ASM": [ "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x4]", - "mov x21, v2.d[1]", - "add x22, x4, #0x8 (8)", - "strh w21, [x22]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x4]", "ldrb w21, [x28, #1298]", "mov w22, #0x1", "lsl w22, w22, w20", @@ -36,7 +35,7 @@ }, "2-store 80bit": { "x86InstructionCount": 2, - "ExpectedInstructionCount": 29, + "ExpectedInstructionCount": 27, "x86Insts": [ "fstp tword [rax]", "fstp tword [rax+10]" @@ -45,10 +44,9 @@ "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x4]", - "mov x21, v2.d[1]", - "add x22, x4, #0x8 (8)", - "strh w21, [x22]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x4]", "ldrb w21, [x28, #1298]", "mov w22, #0x1", "lsl w23, w22, w20", @@ -60,10 +58,9 @@ "add x21, x4, #0xa (10)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w22, w22, w20", "bic w21, w21, w22", @@ -75,7 +72,7 @@ }, "8-store 80bit": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 113, + "ExpectedInstructionCount": 105, "x86Insts": [ "fstp tword [rax]", "fstp tword [rax+10]", @@ -90,10 +87,9 @@ "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x4]", - "mov x21, v2.d[1]", - "add x22, x4, #0x8 (8)", - "strh w21, [x22]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x4]", "ldrb w21, [x28, #1298]", "mov w22, #0x1", "lsl w23, w22, w20", @@ -105,10 +101,9 @@ "add x21, x4, #0xa (10)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", "bic w21, w21, w23", @@ -119,10 +114,9 @@ "add x21, x4, #0x14 (20)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", "bic w21, w21, w23", @@ -133,10 +127,9 @@ "add x21, x4, #0x1e (30)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", "bic w21, w21, w23", @@ -147,10 +140,9 @@ "add x21, x4, #0x28 (40)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", "bic w21, w21, w23", @@ -161,10 +153,9 @@ "add x21, x4, #0x32 (50)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", "bic w21, w21, w23", @@ -175,10 +166,9 @@ "add x21, x4, #0x3c (60)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", "bic w21, w21, w23", @@ -189,10 +179,9 @@ "add x21, x4, #0x46 (70)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "str d2, [x21]", - "mov x23, v2.d[1]", - "add x21, x21, #0x8 (8)", - "strh w23, [x21]", + "mov x0, #0xa", + "whilelt p5.b, xzr, x0", + "st1b {z2.b}, p5, [x21]", "ldrb w21, [x28, #1298]", "lsl w22, w22, w20", "bic w21, w21, w22",