From 1cf34058c764db6e1651bb5b99fb659bb76f8737 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Tue, 3 Dec 2024 17:41:18 +0100 Subject: [PATCH] instcountci: Cache predicate register generation from pattern --- unittests/InstructionCountCI/X87ldst-SVE.json | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/unittests/InstructionCountCI/X87ldst-SVE.json b/unittests/InstructionCountCI/X87ldst-SVE.json index d82b68d9b1..ec0abde349 100644 --- a/unittests/InstructionCountCI/X87ldst-SVE.json +++ b/unittests/InstructionCountCI/X87ldst-SVE.json @@ -34,7 +34,7 @@ }, "2-store 80bit": { "x86InstructionCount": 2, - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 24, "x86Insts": [ "fstp tword [rax]", "fstp tword [rax+10]" @@ -56,7 +56,6 @@ "add x21, x4, #0xa (10)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w22, w22, w20", @@ -69,7 +68,7 @@ }, "8-store 80bit": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 97, + "ExpectedInstructionCount": 90, "x86Insts": [ "fstp tword [rax]", "fstp tword [rax+10]", @@ -97,7 +96,6 @@ "add x21, x4, #0xa (10)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -109,7 +107,6 @@ "add x21, x4, #0x14 (20)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -121,7 +118,6 @@ "add x21, x4, #0x1e (30)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -133,7 +129,6 @@ "add x21, x4, #0x28 (40)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -145,7 +140,6 @@ "add x21, x4, #0x32 (50)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -157,7 +151,6 @@ "add x21, x4, #0x3c (60)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -169,7 +162,6 @@ "add x21, x4, #0x46 (70)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w22, w22, w20", @@ -201,7 +193,7 @@ }, "2-load 80bit": { "x86InstructionCount": 2, - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 21, "x86Insts": [ "fld tword [rax]", "fld tword [rax+10]" @@ -210,7 +202,6 @@ "ptrue p2.h, vl5", "ld1h {z2.h}, p2/z, [x4]", "add x20, x4, #0xa (10)", - "ptrue p2.h, vl5", "ld1h {z3.h}, p2/z, [x20]", "ldrb w20, [x28, #1019]", "sub w20, w20, #0x2 (2)", @@ -233,7 +224,7 @@ }, "8-load 80bit": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 59, + "ExpectedInstructionCount": 52, "x86Insts": [ "fld tword [rax]", "fld tword [rax+10]", @@ -248,25 +239,18 @@ "ptrue p2.h, vl5", "ld1h {z2.h}, p2/z, [x4]", "add x20, x4, #0xa (10)", - "ptrue p2.h, vl5", "ld1h {z3.h}, p2/z, [x20]", "add x20, x4, #0x14 (20)", - "ptrue p2.h, vl5", "ld1h {z4.h}, p2/z, [x20]", "add x20, x4, #0x1e (30)", - "ptrue p2.h, vl5", "ld1h {z5.h}, p2/z, [x20]", "add x20, x4, #0x28 (40)", - "ptrue p2.h, vl5", "ld1h {z6.h}, p2/z, [x20]", "add x20, x4, #0x32 (50)", - "ptrue p2.h, vl5", "ld1h {z7.h}, p2/z, [x20]", "add x20, x4, #0x3c (60)", - "ptrue p2.h, vl5", "ld1h {z8.h}, p2/z, [x20]", "add x20, x4, #0x46 (70)", - "ptrue p2.h, vl5", "ld1h {z9.h}, p2/z, [x20]", "ldrb w20, [x28, #1019]", "sub w20, w20, #0x8 (8)",